diff options
Diffstat (limited to 'tools/testing/selftests/net')
376 files changed, 41547 insertions, 6517 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 2f9d378edec3..532bb732bc6d 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -2,9 +2,10 @@ bind_bhash bind_timewait bind_wildcard -csum +busy_poller cmsg_sender diag_uid +epoll_busy_poll fin_ack_lat gro hwtstamp_config @@ -16,8 +17,11 @@ ipsec ipv6_flowlabel ipv6_flowlabel_mgr log.txt +msg_oob msg_zerocopy +netlink-dumps nettest +proc_net_pktgen psock_fanout psock_snd psock_tpacket @@ -31,18 +35,21 @@ reuseport_dualstack rxtimestamp sctp_hello scm_pidfd +scm_rights sk_bind_sendto_listen sk_connect_zero_addr +sk_so_peek_off +skf_net_off socket so_incoming_cpu so_netns_cookie so_txtime +so_rcv_listener stress_reuseport_listen tap tcp_fastopen_backup_key tcp_inq tcp_mmap -test_unix_oob timestamping tls toeplitz diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 7b6918d5f4af..ea84b88bcb30 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -1,13 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for net selftests -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ - rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh + rtnetlink.sh xfrm_policy.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh @@ -20,7 +20,6 @@ TEST_PROGS += reuseaddr_ports_exhausted.sh TEST_PROGS += txtimestamp.sh TEST_PROGS += vrf-xfrm-tests.sh TEST_PROGS += rxtimestamp.sh -TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh TEST_PROGS += bareudp.sh @@ -32,9 +31,15 @@ TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh +TEST_PROGS += gre_ipv6_lladdr.sh TEST_PROGS += cmsg_so_mark.sh -TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh +TEST_PROGS += cmsg_so_priority.sh +TEST_PROGS += test_so_rcv.sh +TEST_PROGS += cmsg_time.sh cmsg_ip.sh TEST_PROGS += netns-name.sh +TEST_PROGS += link_netns.py +TEST_PROGS += nl_netdev.py +TEST_PROGS += rtnetlink.py TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh @@ -43,6 +48,8 @@ TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_flavors_test.sh +TEST_PROGS += srv6_end_dx4_netfilter_test.sh +TEST_PROGS += srv6_end_dx6_netfilter_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh @@ -53,7 +60,8 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh -TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh +TEST_PROGS += netns-sysctl.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite @@ -67,89 +75,63 @@ TEST_GEN_FILES += ipsec TEST_GEN_FILES += ioam6_parser TEST_GEN_FILES += gro TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa -TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap +TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender TEST_GEN_FILES += stress_reuseport_listen +TEST_GEN_FILES += so_rcv_listener TEST_PROGS += test_vxlan_vnifiltering.sh TEST_GEN_FILES += io_uring_zerocopy_tx TEST_PROGS += io_uring_zerocopy_tx.sh TEST_GEN_FILES += bind_bhash TEST_GEN_PROGS += sk_bind_sendto_listen TEST_GEN_PROGS += sk_connect_zero_addr +TEST_GEN_PROGS += sk_so_peek_off TEST_PROGS += test_ingress_egress_chaining.sh TEST_GEN_PROGS += so_incoming_cpu TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello -TEST_GEN_FILES += csum -TEST_GEN_FILES += nat6to4.o -TEST_GEN_FILES += xdp_dummy.o TEST_GEN_FILES += ip_local_port_range -TEST_GEN_FILES += bind_wildcard +TEST_GEN_PROGS += bind_wildcard +TEST_GEN_PROGS += bind_timewait TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh -TEST_PROGS += fdb_flush.sh +TEST_PROGS += fdb_flush.sh fdb_notify.sh TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh +TEST_PROGS += vlan_bridge_binding.sh +TEST_PROGS += bpf_offload.py +TEST_PROGS += ipv6_route_update_soft_lockup.sh +TEST_PROGS += busy_poll_test.sh +TEST_GEN_PROGS += proc_net_pktgen +TEST_PROGS += lwt_dst_cache_ref_loop.sh +TEST_PROGS += skf_net_off.sh +TEST_GEN_FILES += skf_net_off + +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := busy_poller netlink-dumps +TEST_GEN_FILES += $(YNL_GEN_FILES) TEST_FILES := settings -TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh +TEST_FILES += in_netns.sh lib.sh setup_loopback.sh setup_veth.sh + +TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) TEST_INCLUDES := forwarding/lib.sh include ../lib.mk +# YNL build +YNL_GENS := netdev +include ynl.mk + +$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread $(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/ -# Rules to generate bpf objs -CLANG ?= clang -SCRATCH_DIR := $(OUTPUT)/tools -BUILD_DIR := $(SCRATCH_DIR)/build -BPFDIR := $(abspath ../../../lib/bpf) -APIDIR := $(abspath ../../../include/uapi) - -CCINCLUDE += -I../bpf -CCINCLUDE += -I../../../../usr/include/ -CCINCLUDE += -I$(SCRATCH_DIR)/include - -BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a - -MAKE_DIRS := $(BUILD_DIR)/libbpf -$(MAKE_DIRS): - mkdir -p $@ - -# Get Clang's default includes on this system, as opposed to those seen by -# '--target=bpf'. This fixes "missing" files on some architectures/distros, -# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. -# -# Use '-idirafter': Don't interfere with include mechanics except where the -# build would have failed anyways. -define get_sys_includes -$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ - | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') -endef - -ifneq ($(CROSS_COMPILE),) -CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) -endif - -CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) - -$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS) - $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ - -$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ - $(APIDIR)/linux/bpf.h \ - | $(BUILD_DIR)/libbpf - $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - EXTRA_CFLAGS='-g -O0' \ - DESTDIR=$(SCRATCH_DIR) prefix= all install_headers - -EXTRA_CLEAN := $(SCRATCH_DIR) +include bpf.mk diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 221c387a7d7f..50584479540b 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd +TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config new file mode 100644 index 000000000000..37368567768c --- /dev/null +++ b/tools/testing/selftests/net/af_unix/config @@ -0,0 +1,3 @@ +CONFIG_UNIX=y +CONFIG_AF_UNIX_OOB=y +CONFIG_UNIX_DIAG=m diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c new file mode 100644 index 000000000000..3ed3882a93b8 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -0,0 +1,757 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +#include <netinet/in.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/signalfd.h> +#include <sys/socket.h> + +#include "../../kselftest_harness.h" + +#define BUF_SZ 32 + +FIXTURE(msg_oob) +{ + int fd[4]; /* 0: AF_UNIX sender + * 1: AF_UNIX receiver + * 2: TCP sender + * 3: TCP receiver + */ + int signal_fd; + int epoll_fd[2]; /* 0: AF_UNIX receiver + * 1: TCP receiver + */ + bool tcp_compliant; +}; + +FIXTURE_VARIANT(msg_oob) +{ + bool peek; +}; + +FIXTURE_VARIANT_ADD(msg_oob, no_peek) +{ + .peek = false, +}; + +FIXTURE_VARIANT_ADD(msg_oob, peek) +{ + .peek = true +}; + +static void create_unix_socketpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + int ret; + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(ret, 0); +} + +static void create_tcp_socketpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct sockaddr_in addr; + socklen_t addrlen; + int listen_fd; + int ret; + + listen_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(listen_fd, 0); + + ret = listen(listen_fd, -1); + ASSERT_EQ(ret, 0); + + addrlen = sizeof(addr); + ret = getsockname(listen_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_EQ(ret, 0); + + self->fd[2] = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(self->fd[2], 0); + + ret = connect(self->fd[2], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(ret, 0); + + self->fd[3] = accept(listen_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_GE(self->fd[3], 0); + + ret = fcntl(self->fd[3], F_SETFL, O_NONBLOCK); + ASSERT_EQ(ret, 0); +} + +static void setup_sigurg(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct signalfd_siginfo siginfo; + int pid = getpid(); + sigset_t mask; + int i, ret; + + for (i = 0; i < 2; i++) { + ret = ioctl(self->fd[i * 2 + 1], FIOSETOWN, &pid); + ASSERT_EQ(ret, 0); + } + + ret = sigemptyset(&mask); + ASSERT_EQ(ret, 0); + + ret = sigaddset(&mask, SIGURG); + ASSERT_EQ(ret, 0); + + ret = sigprocmask(SIG_BLOCK, &mask, NULL); + ASSERT_EQ(ret, 0); + + self->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK); + ASSERT_GE(self->signal_fd, 0); + + ret = read(self->signal_fd, &siginfo, sizeof(siginfo)); + ASSERT_EQ(ret, -1); +} + +static void setup_epollpri(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct epoll_event event = { + .events = EPOLLPRI, + }; + int i; + + for (i = 0; i < 2; i++) { + int ret; + + self->epoll_fd[i] = epoll_create1(0); + ASSERT_GE(self->epoll_fd[i], 0); + + ret = epoll_ctl(self->epoll_fd[i], EPOLL_CTL_ADD, self->fd[i * 2 + 1], &event); + ASSERT_EQ(ret, 0); + } +} + +static void close_sockets(FIXTURE_DATA(msg_oob) *self) +{ + int i; + + for (i = 0; i < 4; i++) + close(self->fd[i]); +} + +FIXTURE_SETUP(msg_oob) +{ + create_unix_socketpair(_metadata, self); + create_tcp_socketpair(_metadata, self); + + setup_sigurg(_metadata, self); + setup_epollpri(_metadata, self); + + self->tcp_compliant = true; +} + +FIXTURE_TEARDOWN(msg_oob) +{ + close_sockets(self); +} + +static void __epollpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + bool oob_remaining) +{ + struct epoll_event event[2] = {}; + int i, ret[2]; + + for (i = 0; i < 2; i++) + ret[i] = epoll_wait(self->epoll_fd[i], &event[i], 1, 0); + + ASSERT_EQ(ret[0], oob_remaining); + + if (self->tcp_compliant) + ASSERT_EQ(ret[0], ret[1]); + + if (oob_remaining) { + ASSERT_EQ(event[0].events, EPOLLPRI); + + if (self->tcp_compliant) + ASSERT_EQ(event[0].events, event[1].events); + } +} + +static void __sendpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const void *buf, size_t len, int flags) +{ + int i, ret[2]; + + for (i = 0; i < 2; i++) { + struct signalfd_siginfo siginfo = {}; + int bytes; + + ret[i] = send(self->fd[i * 2], buf, len, flags); + + bytes = read(self->signal_fd, &siginfo, sizeof(siginfo)); + + if (flags & MSG_OOB) { + ASSERT_EQ(bytes, sizeof(siginfo)); + ASSERT_EQ(siginfo.ssi_signo, SIGURG); + + bytes = read(self->signal_fd, &siginfo, sizeof(siginfo)); + } + + ASSERT_EQ(bytes, -1); + } + + ASSERT_EQ(ret[0], len); + ASSERT_EQ(ret[0], ret[1]); +} + +static void __recvpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const char *expected_buf, int expected_len, + int buf_len, int flags) +{ + int i, ret[2], recv_errno[2], expected_errno = 0; + char recv_buf[2][BUF_SZ] = {}; + bool printed = false; + + ASSERT_GE(BUF_SZ, buf_len); + + errno = 0; + + for (i = 0; i < 2; i++) { + ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + recv_errno[i] = errno; + } + + if (expected_len < 0) { + expected_errno = -expected_len; + expected_len = -1; + } + + if (ret[0] != expected_len || recv_errno[0] != expected_errno) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf); + + ASSERT_EQ(ret[0], expected_len); + ASSERT_EQ(recv_errno[0], expected_errno); + } + + if (ret[0] != ret[1] || recv_errno[0] != recv_errno[1]) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + + printed = true; + + if (self->tcp_compliant) { + ASSERT_EQ(ret[0], ret[1]); + ASSERT_EQ(recv_errno[0], recv_errno[1]); + } + } + + if (expected_len >= 0) { + int cmp; + + cmp = strncmp(expected_buf, recv_buf[0], expected_len); + if (cmp) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf); + + ASSERT_EQ(cmp, 0); + } + + cmp = strncmp(recv_buf[0], recv_buf[1], expected_len); + if (cmp) { + if (!printed) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + } + + if (self->tcp_compliant) + ASSERT_EQ(cmp, 0); + } + } +} + +static void __setinlinepair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + int i, oob_inline = 1; + + for (i = 0; i < 2; i++) { + int ret; + + ret = setsockopt(self->fd[i * 2 + 1], SOL_SOCKET, SO_OOBINLINE, + &oob_inline, sizeof(oob_inline)); + ASSERT_EQ(ret, 0); + } +} + +static void __siocatmarkpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + bool oob_head) +{ + int answ[2] = {}; + int i; + + for (i = 0; i < 2; i++) { + int ret; + + ret = ioctl(self->fd[i * 2 + 1], SIOCATMARK, &answ[i]); + ASSERT_EQ(ret, 0); + } + + ASSERT_EQ(answ[0], oob_head); + + if (self->tcp_compliant) + ASSERT_EQ(answ[0], answ[1]); +} + +#define sendpair(buf, len, flags) \ + __sendpair(_metadata, self, buf, len, flags) + +#define recvpair(expected_buf, expected_len, buf_len, flags) \ + do { \ + if (variant->peek) \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, \ + buf_len, (flags) | MSG_PEEK); \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, buf_len, flags); \ + } while (0) + +#define epollpair(oob_remaining) \ + __epollpair(_metadata, self, oob_remaining) + +#define siocatmarkpair(oob_head) \ + __siocatmarkpair(_metadata, self, oob_head) + +#define setinlinepair() \ + __setinlinepair(_metadata, self) + +#define tcp_incompliant \ + for (self->tcp_compliant = false; \ + self->tcp_compliant == false; \ + self->tcp_compliant = true) + +TEST_F(msg_oob, non_oob) +{ + sendpair("x", 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */ + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_ahead) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */ + epollpair(false); + siocatmarkpair(true); + + recvpair("world", 5, 5, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_break_drop) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */ + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, ex_oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("ld", 2, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("r", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + recvpair("ld", 2, 2, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, ex_oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */ + epollpair(false); + siocatmarkpair(true); + } +} + +TEST_F(msg_oob, ex_oob_drop_2) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + tcp_incompliant { + siocatmarkpair(false); + } + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */ + epollpair(false); + siocatmarkpair(true); + } +} + +TEST_F(msg_oob, ex_oob_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, ex_oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("r", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + sendpair("ld", 2, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + tcp_incompliant { + recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */ + } + + epollpair(true); + siocatmarkpair(true); + + recvpair("d", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, ex_oob_siocatmark) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + sendpair("world", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ + epollpair(true); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob) +{ + setinlinepair(); + + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob_break) +{ + setinlinepair(); + + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("o", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + setinlinepair(); + + recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */ + epollpair(false); + siocatmarkpair(true); + + tcp_incompliant { + recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */ + } + + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("ld", 2, 0); + epollpair(true); + siocatmarkpair(false); + + setinlinepair(); + + recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("rld", 3, 3, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_no_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + setinlinepair(); + + sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */ + epollpair(true); + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + setinlinepair(); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP recv()s "y". */ + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */ + epollpair(false); + siocatmarkpair(false); + } +} + +TEST_F(msg_oob, inline_ex_oob_siocatmark) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + setinlinepair(); + + sendpair("world", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ + epollpair(true); + siocatmarkpair(false); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c new file mode 100644 index 000000000000..8b015f16c03d --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ +#define _GNU_SOURCE +#include <sched.h> + +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include "../../kselftest_harness.h" + +FIXTURE(scm_rights) +{ + int fd[32]; +}; + +FIXTURE_VARIANT(scm_rights) +{ + char name[32]; + int type; + int flags; + bool test_listener; + bool disabled; +}; + +FIXTURE_VARIANT_ADD(scm_rights, dgram) +{ + .name = "UNIX ", + .type = SOCK_DGRAM, + .flags = 0, + .test_listener = false, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, dgram_disabled) +{ + .name = "UNIX ", + .type = SOCK_DGRAM, + .flags = 0, + .test_listener = false, + .disabled = true, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = false, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = false, + .disabled = true, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_oob) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = false, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_oob_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = false, + .disabled = true, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = true, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = true, + .disabled = true, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = true, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = true, + .disabled = true, +}; + +static int count_sockets(struct __test_metadata *_metadata, + const FIXTURE_VARIANT(scm_rights) *variant) +{ + int sockets = -1, len, ret; + char *line = NULL; + size_t unused; + FILE *f; + + f = fopen("/proc/net/protocols", "r"); + ASSERT_NE(NULL, f); + + len = strlen(variant->name); + + while (getline(&line, &unused, f) != -1) { + int unused2; + + if (strncmp(line, variant->name, len)) + continue; + + ret = sscanf(line + len, "%d %d", &unused2, &sockets); + ASSERT_EQ(2, ret); + + break; + } + + free(line); + + ret = fclose(f); + ASSERT_EQ(0, ret); + + return sockets; +} + +FIXTURE_SETUP(scm_rights) +{ + int ret; + + ret = unshare(CLONE_NEWNET); + ASSERT_EQ(0, ret); + + if (variant->disabled) + return; + + ret = count_sockets(_metadata, variant); + ASSERT_EQ(0, ret); +} + +FIXTURE_TEARDOWN(scm_rights) +{ + int ret; + + if (variant->disabled) + return; + + sleep(1); + + ret = count_sockets(_metadata, variant); + ASSERT_EQ(0, ret); +} + +static void create_listeners(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int n) +{ + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + socklen_t addrlen; + int i, ret; + + for (i = 0; i < n * 2; i += 2) { + self->fd[i] = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LE(0, self->fd[i]); + + addrlen = sizeof(addr.sun_family); + ret = bind(self->fd[i], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(0, ret); + + ret = listen(self->fd[i], -1); + ASSERT_EQ(0, ret); + + if (variant->disabled) { + ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS, + &(int){0}, sizeof(int)); + ASSERT_EQ(0, ret); + } + + addrlen = sizeof(addr); + ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen); + ASSERT_EQ(0, ret); + + self->fd[i + 1] = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LE(0, self->fd[i + 1]); + + ret = connect(self->fd[i + 1], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(0, ret); + } +} + +static void create_socketpairs(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int n) +{ + int i, ret; + + ASSERT_GE(sizeof(self->fd) / sizeof(int), n); + + for (i = 0; i < n * 2; i += 2) { + ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i); + ASSERT_EQ(0, ret); + + if (variant->disabled) { + ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS, + &(int){0}, sizeof(int)); + ASSERT_EQ(0, ret); + } + } +} + +static void __create_sockets(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int n) +{ + ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0])); + + if (variant->test_listener) + create_listeners(_metadata, self, variant, n); + else + create_socketpairs(_metadata, self, variant, n); +} + +static void __close_sockets(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + int n) +{ + int i, ret; + + ASSERT_GE(sizeof(self->fd) / sizeof(int), n); + + for (i = 0; i < n * 2; i++) { + ret = close(self->fd[i]); + ASSERT_EQ(0, ret); + } +} + +void __send_fd(struct __test_metadata *_metadata, + const FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int inflight, int receiver) +{ +#define MSG "x" +#define MSGLEN 1 + struct { + struct cmsghdr cmsghdr; + int fd[2]; + } cmsg = { + .cmsghdr = { + .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)), + .cmsg_level = SOL_SOCKET, + .cmsg_type = SCM_RIGHTS, + }, + .fd = { + self->fd[inflight * 2], + self->fd[inflight * 2], + }, + }; + struct iovec iov = { + .iov_base = MSG, + .iov_len = MSGLEN, + }; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &cmsg, + .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)), + }; + int ret; + + ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags); + + if (variant->disabled) { + ASSERT_EQ(-1, ret); + ASSERT_EQ(-EPERM, -errno); + } else { + ASSERT_EQ(MSGLEN, ret); + } +} + +#define create_sockets(n) \ + __create_sockets(_metadata, self, variant, n) +#define close_sockets(n) \ + __close_sockets(_metadata, self, n) +#define send_fd(inflight, receiver) \ + __send_fd(_metadata, self, variant, inflight, receiver) + +TEST_F(scm_rights, self_ref) +{ + create_sockets(2); + + send_fd(0, 0); + + send_fd(1, 1); + + close_sockets(2); +} + +TEST_F(scm_rights, triangle) +{ + create_sockets(6); + + send_fd(0, 1); + send_fd(1, 2); + send_fd(2, 0); + + send_fd(3, 4); + send_fd(4, 5); + send_fd(5, 3); + + close_sockets(6); +} + +TEST_F(scm_rights, cross_edge) +{ + create_sockets(8); + + send_fd(0, 1); + send_fd(1, 2); + send_fd(2, 0); + send_fd(1, 3); + send_fd(3, 2); + + send_fd(4, 5); + send_fd(5, 6); + send_fd(6, 4); + send_fd(5, 7); + send_fd(7, 6); + + close_sockets(8); +} + +TEST_F(scm_rights, backtrack_from_scc) +{ + create_sockets(10); + + send_fd(0, 1); + send_fd(0, 4); + send_fd(1, 2); + send_fd(2, 3); + send_fd(3, 1); + + send_fd(5, 6); + send_fd(5, 9); + send_fd(6, 7); + send_fd(7, 8); + send_fd(8, 6); + + close_sockets(10); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c deleted file mode 100644 index a7c51889acd5..000000000000 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ /dev/null @@ -1,436 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -#include <stdio.h> -#include <stdlib.h> -#include <sys/socket.h> -#include <arpa/inet.h> -#include <unistd.h> -#include <string.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <errno.h> -#include <netinet/tcp.h> -#include <sys/un.h> -#include <sys/signal.h> -#include <sys/poll.h> - -static int pipefd[2]; -static int signal_recvd; -static pid_t producer_id; -static char sock_name[32]; - -static void sig_hand(int sn, siginfo_t *si, void *p) -{ - signal_recvd = sn; -} - -static int set_sig_handler(int signal) -{ - struct sigaction sa; - - sa.sa_sigaction = sig_hand; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_SIGINFO | SA_RESTART; - - return sigaction(signal, &sa, NULL); -} - -static void set_filemode(int fd, int set) -{ - int flags = fcntl(fd, F_GETFL, 0); - - if (set) - flags &= ~O_NONBLOCK; - else - flags |= O_NONBLOCK; - fcntl(fd, F_SETFL, flags); -} - -static void signal_producer(int fd) -{ - char cmd; - - cmd = 'S'; - write(fd, &cmd, sizeof(cmd)); -} - -static void wait_for_signal(int fd) -{ - char buf[5]; - - read(fd, buf, 5); -} - -static void die(int status) -{ - fflush(NULL); - unlink(sock_name); - kill(producer_id, SIGTERM); - exit(status); -} - -int is_sioctatmark(int fd) -{ - int ans = -1; - - if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) { -#ifdef DEBUG - perror("SIOCATMARK Failed"); -#endif - } - return ans; -} - -void read_oob(int fd, char *c) -{ - - *c = ' '; - if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) { -#ifdef DEBUG - perror("Reading MSG_OOB Failed"); -#endif - } -} - -int read_data(int pfd, char *buf, int size) -{ - int len = 0; - - memset(buf, size, '0'); - len = read(pfd, buf, size); -#ifdef DEBUG - if (len < 0) - perror("read failed"); -#endif - return len; -} - -static void wait_for_data(int pfd, int event) -{ - struct pollfd pfds[1]; - - pfds[0].fd = pfd; - pfds[0].events = event; - poll(pfds, 1, -1); -} - -void producer(struct sockaddr_un *consumer_addr) -{ - int cfd; - char buf[64]; - int i; - - memset(buf, 'x', sizeof(buf)); - cfd = socket(AF_UNIX, SOCK_STREAM, 0); - - wait_for_signal(pipefd[0]); - if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(*consumer_addr)) != 0) { - perror("Connect failed"); - kill(0, SIGTERM); - exit(1); - } - - for (i = 0; i < 2; i++) { - /* Test 1: Test for SIGURG and OOB */ - wait_for_signal(pipefd[0]); - memset(buf, 'x', sizeof(buf)); - buf[63] = '@'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - wait_for_signal(pipefd[0]); - - /* Test 2: Test for OOB being overwitten */ - memset(buf, 'x', sizeof(buf)); - buf[63] = '%'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - buf[63] = '#'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - wait_for_signal(pipefd[0]); - - /* Test 3: Test for SIOCATMARK */ - memset(buf, 'x', sizeof(buf)); - buf[63] = '@'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - buf[63] = '%'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - send(cfd, buf, sizeof(buf), 0); - - wait_for_signal(pipefd[0]); - - /* Test 4: Test for 1byte OOB msg */ - memset(buf, 'x', sizeof(buf)); - buf[0] = '@'; - send(cfd, buf, 1, MSG_OOB); - } -} - -int -main(int argc, char **argv) -{ - int lfd, pfd; - struct sockaddr_un consumer_addr, paddr; - socklen_t len = sizeof(consumer_addr); - char buf[1024]; - int on = 0; - char oob; - int atmark; - - lfd = socket(AF_UNIX, SOCK_STREAM, 0); - memset(&consumer_addr, 0, sizeof(consumer_addr)); - consumer_addr.sun_family = AF_UNIX; - sprintf(sock_name, "unix_oob_%d", getpid()); - unlink(sock_name); - strcpy(consumer_addr.sun_path, sock_name); - - if ((bind(lfd, (struct sockaddr *)&consumer_addr, - sizeof(consumer_addr))) != 0) { - perror("socket bind failed"); - exit(1); - } - - pipe(pipefd); - - listen(lfd, 1); - - producer_id = fork(); - if (producer_id == 0) { - producer(&consumer_addr); - exit(0); - } - - set_sig_handler(SIGURG); - signal_producer(pipefd[1]); - - pfd = accept(lfd, (struct sockaddr *) &paddr, &len); - fcntl(pfd, F_SETOWN, getpid()); - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 1: - * veriyf that SIGURG is - * delivered, 63 bytes are - * read, oob is '@', and POLLPRI works. - */ - wait_for_data(pfd, POLLPRI); - read_oob(pfd, &oob); - len = read_data(pfd, buf, 1024); - if (!signal_recvd || len != 63 || oob != '@') { - fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 2: - * Verify that the first OOB is over written by - * the 2nd one and the first OOB is returned as - * part of the read, and sigurg is received. - */ - wait_for_data(pfd, POLLIN | POLLPRI); - len = 0; - while (len < 70) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - read_oob(pfd, &oob); - if (!signal_recvd || len != 127 || oob != '#') { - fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 3: - * verify that 2nd oob over writes - * the first one and read breaks at - * oob boundary returning 127 bytes - * and sigurg is received and atmark - * is set. - * oob is '%' and second read returns - * 64 bytes. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 150) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - read_oob(pfd, &oob); - - if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { - fprintf(stderr, - "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n", - signal_recvd, len, oob, atmark); - die(1); - } - - signal_recvd = 0; - - len = read_data(pfd, buf, 1024); - if (len != 64) { - fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 4: - * verify that a single byte - * oob message is delivered. - * set non blocking mode and - * check proper error is - * returned and sigurg is - * received and correct - * oob is read. - */ - - set_filemode(pfd, 0); - - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - if ((len == -1) && (errno == 11)) - len = 0; - - read_oob(pfd, &oob); - - if (!signal_recvd || len != 0 || oob != '@') { - fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - set_filemode(pfd, 1); - - /* Inline Testing */ - - on = 1; - if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) { - perror("SO_OOBINLINE"); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 1 -- Inline: - * Check that SIGURG is - * delivered and 63 bytes are - * read and oob is '@' - */ - - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - - if (!signal_recvd || len != 63) { - fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n", - signal_recvd, len); - die(1); - } - - len = read_data(pfd, buf, 1024); - - if (len != 1) { - fprintf(stderr, - "Test 1.1 Inline failed, sigurg %d len %d oob %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 2 -- Inline: - * Verify that the first OOB is over written by - * the 2nd one and read breaks correctly on - * 2nd OOB boundary with the first OOB returned as - * part of the read, and sigurg is delivered and - * siocatmark returns true. - * next read returns one byte, the oob byte - * and siocatmark returns false. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 70) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 127 || atmark != 1 || !signal_recvd) { - fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n", - len, atmark); - die(1); - } - - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 1 || buf[0] != '#' || atmark == 1) { - fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n", - len, buf[0], atmark); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 3 -- Inline: - * verify that 2nd oob over writes - * the first one and read breaks at - * oob boundary returning 127 bytes - * and sigurg is received and siocatmark - * is true after the read. - * subsequent read returns 65 bytes - * because of oob which should be '%'. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 126) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (!signal_recvd || len != 127 || !atmark) { - fprintf(stderr, - "Test 3 Inline failed, sigurg %d len %d data %c\n", - signal_recvd, len, buf[0]); - die(1); - } - - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 65 || buf[0] != '%' || atmark != 0) { - fprintf(stderr, - "Test 3.1 Inline failed, len %d oob %c atmark %d\n", - len, buf[0], atmark); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 4 -- Inline: - * verify that a single - * byte oob message is delivered - * and read returns one byte, the oob - * byte and sigurg is received - */ - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - if (!signal_recvd || len != 1 || buf[0] != '@') { - fprintf(stderr, - "Test 4 Inline failed, signal %d len %d data %c\n", - signal_recvd, len, buf[0]); - die(1); - } - die(0); -} diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh index 75528788cb95..3ef209cacb8e 100755 --- a/tools/testing/selftests/net/amt.sh +++ b/tools/testing/selftests/net/amt.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Author: Taehee Yoo <ap420073@gmail.com> @@ -77,6 +77,7 @@ readonly LISTENER=$(mktemp -u listener-XXXXXXXX) readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX) readonly RELAY=$(mktemp -u relay-XXXXXXXX) readonly SOURCE=$(mktemp -u source-XXXXXXXX) +readonly SMCROUTEDIR="$(mktemp -d)" ERR=4 err=0 @@ -85,6 +86,11 @@ exit_cleanup() for ns in "$@"; do ip netns delete "${ns}" 2>/dev/null || true done + if [ -f "$SMCROUTEDIR/amt.pid" ]; then + smcpid=$(< $SMCROUTEDIR/amt.pid) + kill $smcpid + fi + rm -rf $SMCROUTEDIR exit $ERR } @@ -167,7 +173,7 @@ setup_iptables() setup_mcast_routing() { - ip netns exec "${RELAY}" smcrouted + ip netns exec "${RELAY}" smcrouted -P $SMCROUTEDIR/amt.pid ip netns exec "${RELAY}" smcroutectl a relay_src \ 172.17.0.2 239.0.0.1 amtr ip netns exec "${RELAY}" smcroutectl a relay_src \ @@ -188,15 +194,21 @@ test_remote_ip() send_mcast_torture4() { - ip netns exec "${SOURCE}" bash -c \ - 'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001' + for i in `seq 10`; do + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 100M | nc -w 1 -u 239.0.0.1 4001' + echo -n "." + done } send_mcast_torture6() { - ip netns exec "${SOURCE}" bash -c \ - 'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001' + for i in `seq 10`; do + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 100M | nc -w 1 -u ff0e::5:6 6001' + echo -n "." + done } check_features() @@ -210,8 +222,8 @@ check_features() test_ipv4_forward() { - RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000) - if [ "$RESULT4" == "172.17.0.2" ]; then + RESULT4=$(ip netns exec "${LISTENER}" timeout 15 socat - UDP4-LISTEN:4000,readbytes=128 || true) + if echo "$RESULT4" | grep -q "172.17.0.2"; then printf "TEST: %-60s [ OK ]\n" "IPv4 amt multicast forwarding" exit 0 else @@ -222,8 +234,8 @@ test_ipv4_forward() test_ipv6_forward() { - RESULT6=$(ip netns exec "${LISTENER}" nc -w 1 -l -u ff0e::5:6 6000) - if [ "$RESULT6" == "2001:db8:3::2" ]; then + RESULT6=$(ip netns exec "${LISTENER}" timeout 15 socat - UDP6-LISTEN:6000,readbytes=128 || true) + if echo "$RESULT6" | grep -q "2001:db8:3::2"; then printf "TEST: %-60s [ OK ]\n" "IPv6 amt multicast forwarding" exit 0 else @@ -236,14 +248,14 @@ send_mcast4() { sleep 2 ip netns exec "${SOURCE}" bash -c \ - 'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' & + 'printf "%s %128s" 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' & } send_mcast6() { sleep 2 ip netns exec "${SOURCE}" bash -c \ - 'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' & + 'printf "%s %128s" 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' & } check_features @@ -272,10 +284,12 @@ wait $pid || err=$? if [ $err -eq 1 ]; then ERR=1 fi +printf "TEST: %-50s" "IPv4 amt traffic forwarding torture" send_mcast_torture4 -printf "TEST: %-60s [ OK ]\n" "IPv4 amt traffic forwarding torture" +printf " [ OK ]\n" +printf "TEST: %-50s" "IPv6 amt traffic forwarding torture" send_mcast_torture6 -printf "TEST: %-60s [ OK ]\n" "IPv6 amt traffic forwarding torture" +printf " [ OK ]\n" sleep 5 if [ "${ERR}" -eq 1 ]; then echo "Some tests failed." >&2 diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh index a40c0e9bd023..eef5cbf6eecc 100755 --- a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh +++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh @@ -73,25 +73,19 @@ setup_v6() { # namespaces. veth0 is veth-router, veth1 is veth-host. # first, set up the inteface's link to the namespace # then, set the interface "up" - ip -6 -netns ${ROUTER_NS_V6} link add name ${ROUTER_INTF} \ - type veth peer name ${HOST_INTF} - - ip -6 -netns ${ROUTER_NS_V6} link set dev ${ROUTER_INTF} up - ip -6 -netns ${ROUTER_NS_V6} link set dev ${HOST_INTF} netns \ - ${HOST_NS_V6} + ip -n ${ROUTER_NS_V6} link add name ${ROUTER_INTF} \ + type veth peer name ${HOST_INTF} netns ${HOST_NS_V6} - ip -6 -netns ${HOST_NS_V6} link set dev ${HOST_INTF} up - ip -6 -netns ${ROUTER_NS_V6} addr add \ - ${ROUTER_ADDR_V6}/${PREFIX_WIDTH_V6} dev ${ROUTER_INTF} nodad + # Add tc rule to filter out host na message + tc -n ${ROUTER_NS_V6} qdisc add dev ${ROUTER_INTF} clsact + tc -n ${ROUTER_NS_V6} filter add dev ${ROUTER_INTF} \ + ingress protocol ipv6 pref 1 handle 101 \ + flower src_ip ${HOST_ADDR_V6} ip_proto icmpv6 type 136 skip_hw action pass HOST_CONF=net.ipv6.conf.${HOST_INTF} ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.ndisc_notify=1 ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.disable_ipv6=0 - ip -6 -netns ${HOST_NS_V6} addr add ${HOST_ADDR_V6}/${PREFIX_WIDTH_V6} \ - dev ${HOST_INTF} - ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF} - ip netns exec ${ROUTER_NS_V6} sysctl -w \ ${ROUTER_CONF}.forwarding=1 >/dev/null 2>&1 ip netns exec ${ROUTER_NS_V6} sysctl -w \ @@ -99,6 +93,13 @@ setup_v6() { ip netns exec ${ROUTER_NS_V6} sysctl -w \ ${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na} \ >/dev/null 2>&1 + + ip -n ${ROUTER_NS_V6} link set dev ${ROUTER_INTF} up + ip -n ${HOST_NS_V6} link set dev ${HOST_INTF} up + ip -n ${ROUTER_NS_V6} addr add ${ROUTER_ADDR_V6}/${PREFIX_WIDTH_V6} \ + dev ${ROUTER_INTF} nodad + ip -n ${HOST_NS_V6} addr add ${HOST_ADDR_V6}/${PREFIX_WIDTH_V6} \ + dev ${HOST_INTF} set +e } @@ -162,26 +163,6 @@ arp_test_gratuitous_combinations() { arp_test_gratuitous 2 1 } -cleanup_tcpdump() { - set -e - [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout} - [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr} - tcpdump_stdout= - tcpdump_stderr= - set +e -} - -start_tcpdump() { - set -e - tcpdump_stdout=`mktemp` - tcpdump_stderr=`mktemp` - ip netns exec ${ROUTER_NS_V6} timeout 15s \ - tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \ - "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR_V6}" \ - > ${tcpdump_stdout} 2> /dev/null - set +e -} - verify_ndisc() { local accept_untracked_na=$1 local same_subnet=$2 @@ -222,8 +203,9 @@ ndisc_test_untracked_advertisements() { HOST_ADDR_V6=2001:db8:abcd:0012::3 fi fi - setup_v6 $1 $2 - start_tcpdump + setup_v6 $1 + slowwait_for_counter 15 1 \ + tc_rule_handle_stats_get "dev ${ROUTER_INTF} ingress" 101 ".packets" "-n ${ROUTER_NS_V6}" if verify_ndisc $1 $2; then printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}" @@ -231,7 +213,6 @@ ndisc_test_untracked_advertisements() { printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}" fi - cleanup_tcpdump cleanup_v6 set +e } diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh index f366cadbc5e8..4046131e7888 100755 --- a/tools/testing/selftests/net/bareudp.sh +++ b/tools/testing/selftests/net/bareudp.sh @@ -106,26 +106,16 @@ # | | # +-----------------------------------------------------------------------+ +. ./lib.sh + ERR=4 # Return 4 by default, which is the SKIP code for kselftest PING6="ping" PAUSE_ON_FAIL="no" -readonly NS0=$(mktemp -u ns0-XXXXXXXX) -readonly NS1=$(mktemp -u ns1-XXXXXXXX) -readonly NS2=$(mktemp -u ns2-XXXXXXXX) -readonly NS3=$(mktemp -u ns3-XXXXXXXX) - # Exit the script after having removed the network namespaces it created -# -# Parameters: -# -# * The list of network namespaces to delete before exiting. -# exit_cleanup() { - for ns in "$@"; do - ip netns delete "${ns}" 2>/dev/null || true - done + cleanup_all_ns if [ "${ERR}" -eq 4 ]; then echo "Error: Setting up the testing environment failed." >&2 @@ -140,17 +130,7 @@ exit_cleanup() # namespaces created by this script are deleted. create_namespaces() { - ip netns add "${NS0}" || exit_cleanup - ip netns add "${NS1}" || exit_cleanup "${NS0}" - ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}" - ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}" -} - -# The trap function handler -# -exit_cleanup_all() -{ - exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}" + setup_ns NS0 NS1 NS2 NS3 || exit_cleanup } # Configure a network interface using a host route @@ -188,10 +168,6 @@ iface_config() # setup_underlay() { - for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do - ip -netns "${ns}" link set dev lo up - done; - ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}" ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}" ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}" @@ -234,14 +210,6 @@ setup_overlay_ipv4() ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1 ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10 ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33 - - # The intermediate namespaces don't have routes for the reverse path, - # as it will be handled by tc. So we need to ensure that rp_filter is - # not going to block the traffic. - ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0 - ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0 } setup_overlay_ipv6() @@ -521,13 +489,10 @@ done check_features -# Create namespaces before setting up the exit trap. -# Otherwise, exit_cleanup_all() could delete namespaces that were not created -# by this script. -create_namespaces - set -e -trap exit_cleanup_all EXIT +trap exit_cleanup EXIT + +create_namespaces setup_underlay setup_overlay_ipv4 diff --git a/tools/testing/selftests/net/bpf.mk b/tools/testing/selftests/net/bpf.mk new file mode 100644 index 000000000000..a4f6755dd894 --- /dev/null +++ b/tools/testing/selftests/net/bpf.mk @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: GPL-2.0 +# Rules to generate bpf objs +CLANG ?= clang +SCRATCH_DIR := $(OUTPUT)/tools +BUILD_DIR := $(SCRATCH_DIR)/build +BPFDIR := $(top_srcdir)/tools/lib/bpf +APIDIR := $(top_srcdir)/tools/include/uapi + +CCINCLUDE += -I$(selfdir)/bpf +CCINCLUDE += -I$(top_srcdir)/usr/include/ +CCINCLUDE += -I$(SCRATCH_DIR)/include + +BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a + +MAKE_DIRS := $(BUILD_DIR)/libbpf +$(MAKE_DIRS): + $(call msg,MKDIR,,$@) + $(Q)mkdir -p $@ + +# Get Clang's default includes on this system, as opposed to those seen by +# '--target=bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +endef + +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) + +BPF_PROG_OBJS := $(patsubst %.c,$(OUTPUT)/%.o,$(wildcard *.bpf.c)) + +$(BPF_PROG_OBJS): $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS) + $(call msg,BPF_PROG,,$@) + $(Q)$(CLANG) -O2 -g --target=bpf $(CCINCLUDE) $(CLANG_SYS_INCLUDES) \ + -c $< -o $@ + +$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + $(APIDIR)/linux/bpf.h \ + | $(BUILD_DIR)/libbpf + $(call msg,MAKE,,$@) + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + EXTRA_CFLAGS='-g -O0' \ + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + +EXTRA_CLEAN += $(SCRATCH_DIR) diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py new file mode 100755 index 000000000000..b2c271b79240 --- /dev/null +++ b/tools/testing/selftests/net/bpf_offload.py @@ -0,0 +1,1357 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2017 Netronome Systems, Inc. +# Copyright (c) 2019 Mellanox Technologies. All rights reserved +# +# This software is licensed under the GNU General License Version 2, +# June 1991 as shown in the file COPYING in the top-level directory of this +# source tree. +# +# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE +# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME +# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +from datetime import datetime +import argparse +import errno +import json +import os +import pprint +import random +import re +import stat +import string +import struct +import subprocess +import time +import traceback + +from lib.py import NetdevSim, NetdevSimDev + + +logfile = None +log_level = 1 +skip_extack = False +bpf_test_dir = os.path.dirname(os.path.realpath(__file__)) +pp = pprint.PrettyPrinter() +devs = [] # devices we created for clean up +files = [] # files to be removed +netns = [] # net namespaces to be removed + +def log_get_sec(level=0): + return "*" * (log_level + level) + +def log_level_inc(add=1): + global log_level + log_level += add + +def log_level_dec(sub=1): + global log_level + log_level -= sub + +def log_level_set(level): + global log_level + log_level = level + +def log(header, data, level=None): + """ + Output to an optional log. + """ + if logfile is None: + return + if level is not None: + log_level_set(level) + + if not isinstance(data, str): + data = pp.pformat(data) + + if len(header): + logfile.write("\n" + log_get_sec() + " ") + logfile.write(header) + if len(header) and len(data.strip()): + logfile.write("\n") + logfile.write(data) + +def skip(cond, msg): + if not cond: + return + print("SKIP: " + msg) + log("SKIP: " + msg, "", level=1) + os.sys.exit(0) + +def fail(cond, msg): + if not cond: + return + print("FAIL: " + msg) + tb = "".join(traceback.extract_stack().format()) + print(tb) + log("FAIL: " + msg, tb, level=1) + os.sys.exit(1) + +def start_test(msg): + log(msg, "", level=1) + log_level_inc() + print(msg) + +def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True): + """ + Run a command in subprocess and return tuple of (retval, stdout); + optionally return stderr as well as third value. + """ + proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if background: + msg = "%s START: %s" % (log_get_sec(1), + datetime.now().strftime("%H:%M:%S.%f")) + log("BKG " + proc.args, msg) + return proc + + return cmd_result(proc, include_stderr=include_stderr, fail=fail) + +def cmd_result(proc, include_stderr=False, fail=False): + stdout, stderr = proc.communicate() + stdout = stdout.decode("utf-8") + stderr = stderr.decode("utf-8") + proc.stdout.close() + proc.stderr.close() + + stderr = "\n" + stderr + if stderr[-1] == "\n": + stderr = stderr[:-1] + + sec = log_get_sec(1) + log("CMD " + proc.args, + "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" % + (proc.returncode, sec, stdout, sec, stderr, + sec, datetime.now().strftime("%H:%M:%S.%f"))) + + if proc.returncode != 0 and fail: + if len(stderr) > 0 and stderr[-1] == "\n": + stderr = stderr[:-1] + raise Exception("Command failed: %s\n%s" % (proc.args, stderr)) + + if include_stderr: + return proc.returncode, stdout, stderr + else: + return proc.returncode, stdout + +def rm(f): + cmd("rm -f %s" % (f)) + if f in files: + files.remove(f) + +def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False): + params = "" + if JSON: + params += "%s " % (flags["json"]) + + if ns: + ns = "ip netns exec %s " % (ns) + elif ns is None: + ns = "" + + if include_stderr: + ret, stdout, stderr = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=True) + else: + ret, stdout = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=False) + + if JSON and len(stdout.strip()) != 0: + out = json.loads(stdout) + else: + out = stdout + + if include_stderr: + return ret, out, stderr + else: + return ret, out + +def bpftool(args, JSON=True, ns="", fail=True, include_stderr=False): + return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) + +def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True): + _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True) + # Remove the base progs + for p in base_progs: + if p in progs: + progs.remove(p) + if exclude_orphaned: + progs = [ p for p in progs if not p['orphaned'] ] + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs loaded, expected %d" % + (len(progs), expected)) + return progs + +def bpftool_map_list(expected=None, ns=""): + _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) + # Remove the base maps + maps = [m for m in maps if m not in base_maps and m.get('name') and m.get('name') not in base_map_names] + if expected is not None: + if len(maps) != expected: + fail(True, "%d BPF maps loaded, expected %d" % + (len(maps), expected)) + return maps + +def bpftool_prog_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nprogs = len(bpftool_prog_list()) + if nprogs == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) + +def bpftool_map_list_wait(expected=0, n_retry=20, ns=""): + nmaps = None + for i in range(n_retry): + maps = bpftool_map_list(ns=ns) + nmaps = len(maps) + if nmaps == expected: + return maps + time.sleep(0.05) + raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) + +def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None, + fail=True, include_stderr=False, dev_bind=None): + args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name) + if prog_type is not None: + args += " type " + prog_type + if dev is not None: + args += " dev " + dev + elif dev_bind is not None: + args += " xdpmeta_dev " + dev_bind + if len(maps): + args += " map " + " map ".join(maps) + + res = bpftool(args, fail=fail, include_stderr=include_stderr) + if res[0] == 0: + files.append(file_name) + return res + +def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False): + if force: + args = "-force " + args + return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) + +def tc(args, JSON=True, ns="", fail=True, include_stderr=False): + return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) + +def ethtool(dev, opt, args, fail=True): + return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) + +def bpf_obj(name, sec="xdp", path=bpf_test_dir,): + return "obj %s sec %s" % (os.path.join(path, name), sec) + +def bpf_pinned(name): + return "pinned %s" % (name) + +def bpf_bytecode(bytecode): + return "bytecode \"%s\"" % (bytecode) + +def mknetns(n_retry=10): + for i in range(n_retry): + name = ''.join([random.choice(string.ascii_letters) for i in range(8)]) + ret, _ = ip("netns add %s" % (name), fail=False) + if ret == 0: + netns.append(name) + return name + return None + +def int2str(fmt, val): + ret = [] + for b in struct.pack(fmt, val): + ret.append(int(b)) + return " ".join(map(lambda x: str(x), ret)) + +def str2int(strtab): + inttab = [] + for i in strtab: + inttab.append(int(i, 16)) + ba = bytearray(inttab) + if len(strtab) == 4: + fmt = "I" + elif len(strtab) == 8: + fmt = "Q" + else: + raise Exception("String array of len %d can't be unpacked to an int" % + (len(strtab))) + return struct.unpack(fmt, ba)[0] + +class DebugfsDir: + """ + Class for accessing DebugFS directories as a dictionary. + """ + + def __init__(self, path): + self.path = path + self._dict = self._debugfs_dir_read(path) + + def __len__(self): + return len(self._dict.keys()) + + def __getitem__(self, key): + if type(key) is int: + key = list(self._dict.keys())[key] + return self._dict[key] + + def __setitem__(self, key, value): + log("DebugFS set %s = %s" % (key, value), "") + log_level_inc() + + cmd("echo '%s' > %s/%s" % (value, self.path, key)) + log_level_dec() + + _, out = cmd('cat %s/%s' % (self.path, key)) + self._dict[key] = out.strip() + + def _debugfs_dir_read(self, path): + dfs = {} + + log("DebugFS state for %s" % (path), "") + log_level_inc(add=2) + + _, out = cmd('ls ' + path) + for f in out.split(): + if f == "ports": + continue + + p = os.path.join(path, f) + if not os.stat(p).st_mode & stat.S_IRUSR: + continue + + if os.path.isfile(p): + # We need to init trap_flow_action_cookie before read it + if f == "trap_flow_action_cookie": + cmd('echo deadbeef > %s/%s' % (path, f)) + _, out = cmd('cat %s/%s' % (path, f)) + dfs[f] = out.strip() + elif os.path.isdir(p): + dfs[f] = DebugfsDir(p) + else: + raise Exception("%s is neither file nor directory" % (p)) + + log_level_dec() + log("DebugFS state", dfs) + log_level_dec() + + return dfs + +class BpfNetdevSimDev(NetdevSimDev): + """ + Class for netdevsim bus device and its attributes. + """ + def __init__(self, port_count=1, ns=None): + super().__init__(port_count, ns=ns) + devs.append(self) + + def _make_port(self, port_index, ifname): + return BpfNetdevSim(self, port_index, ifname, self.ns) + + def dfs_num_bound_progs(self): + path = os.path.join(self.dfs_dir, "bpf_bound_progs") + _, progs = cmd('ls %s' % (path)) + return len(progs.split()) + + def dfs_get_bound_progs(self, expected): + progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs")) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs bound, expected %d" % + (len(progs), expected)) + return progs + + def remove(self): + super().remove() + devs.remove(self) + + +class BpfNetdevSim(NetdevSim): + """ + Class for netdevsim netdevice and its attributes. + """ + + def __init__(self, nsimdev, port_index, ifname, ns=None): + super().__init__(nsimdev, port_index, ifname, ns=ns) + + self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index) + self.dfs_refresh() + + def __getitem__(self, key): + return self.dev[key] + + def remove(self): + self.nsimdev.remove_nsim(self) + + def dfs_refresh(self): + self.dfs = DebugfsDir(self.dfs_dir) + return self.dfs + + def dfs_read(self, f): + path = os.path.join(self.dfs_dir, f) + _, data = cmd('cat %s' % (path)) + return data.strip() + + def wait_for_flush(self, bound=0, total=0, n_retry=20): + for i in range(n_retry): + nbound = self.nsimdev.dfs_num_bound_progs() + nprogs = len(bpftool_prog_list()) + if nbound == bound and nprogs == total: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs)) + + def set_ns(self, ns): + name = ns if ns else "1" + ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns) + self.ns = ns + + def set_mtu(self, mtu, fail=True): + return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), + fail=fail) + + def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False, + fail=True, include_stderr=False): + if verbose: + bpf += " verbose" + return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) + + def unset_xdp(self, mode, force=False, JSON=True, + fail=True, include_stderr=False): + return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) + + def ip_link_show(self, xdp): + _, link = ip("link show dev %s" % (self['ifname'])) + if len(link) > 1: + raise Exception("Multiple objects on ip link show") + if len(link) < 1: + return {} + fail(xdp != "xdp" in link, + "XDP program not reporting in iplink (reported %s, expected %s)" % + ("xdp" in link, xdp)) + return link[0] + + def tc_add_ingress(self): + tc("qdisc add dev %s ingress" % (self['ifname'])) + + def tc_del_ingress(self): + tc("qdisc del dev %s ingress" % (self['ifname'])) + + def tc_flush_filters(self, bound=0, total=0): + self.tc_del_ingress() + self.tc_add_ingress() + self.wait_for_flush(bound=bound, total=total) + + def tc_show_ingress(self, expected=None): + # No JSON support, oh well... + flags = ["skip_sw", "skip_hw", "in_hw"] + named = ["protocol", "pref", "chain", "handle", "id", "tag"] + + args = "-s filter show dev %s ingress" % (self['ifname']) + _, out = tc(args, JSON=False) + + filters = [] + lines = out.split('\n') + for line in lines: + words = line.split() + if "handle" not in words: + continue + fltr = {} + for flag in flags: + fltr[flag] = flag in words + for name in named: + try: + idx = words.index(name) + fltr[name] = words[idx + 1] + except ValueError: + pass + filters.append(fltr) + + if expected is not None: + fail(len(filters) != expected, + "%d ingress filters loaded, expected %d" % + (len(filters), expected)) + return filters + + def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None, + chain=None, cls="", params="", + fail=True, include_stderr=False): + spec = "" + if prio is not None: + spec += " prio %d" % (prio) + if handle: + spec += " handle %s" % (handle) + if chain is not None: + spec += " chain %d" % (chain) + + return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\ + .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec, + cls=cls, params=params), + fail=fail, include_stderr=include_stderr) + + def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None, + chain=None, da=False, verbose=False, + skip_sw=False, skip_hw=False, + fail=True, include_stderr=False): + cls = "bpf " + bpf + + params = "" + if da: + params += " da" + if verbose: + params += " verbose" + if skip_sw: + params += " skip_sw" + if skip_hw: + params += " skip_hw" + + return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls, + chain=chain, params=params, + fail=fail, include_stderr=include_stderr) + + def set_ethtool_tc_offloads(self, enable, fail=True): + args = "hw-tc-offload %s" % ("on" if enable else "off") + return ethtool(self, "-K", args, fail=fail) + +################################################################################ +def clean_up(): + global files, netns, devs + + for dev in devs: + dev.remove() + for f in files: + cmd("rm -f %s" % (f)) + for ns in netns: + cmd("ip netns delete %s" % (ns)) + files = [] + netns = [] + +def pin_prog(file_name, idx=0): + progs = bpftool_prog_list(expected=(idx + 1)) + prog = progs[idx] + bpftool("prog pin id %d %s" % (prog["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def pin_map(file_name, idx=0, expected=1): + maps = bpftool_map_list_wait(expected=expected) + m = maps[idx] + bpftool("map pin id %d %s" % (m["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def check_dev_info_removed(prog_file=None, map_file=None): + bpftool_prog_list(expected=0) + bpftool_prog_list(expected=1, exclude_orphaned=False) + ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) + fail(ret != 0, "failed to show prog with removed device") + + bpftool_map_list_wait(expected=0) + ret, err = bpftool("map show pin %s" % (map_file), fail=False) + fail(ret == 0, "Showing map with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing map with removed device expected ENODEV, error is %s" % + (err["error"])) + +def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False): + progs = bpftool_prog_list(expected=1, ns=ns) + prog = progs[0] + + fail("dev" not in prog.keys(), "Device parameters not reported") + dev = prog["dev"] + fail("ifindex" not in dev.keys(), "Device parameters not reported") + fail("ns_dev" not in dev.keys(), "Device parameters not reported") + fail("ns_inode" not in dev.keys(), "Device parameters not reported") + + if not other_ns: + fail("ifname" not in dev.keys(), "Ifname not reported") + fail(dev["ifname"] != sim["ifname"], + "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"])) + else: + fail("ifname" in dev.keys(), "Ifname is reported for other ns") + + maps = bpftool_map_list_wait(expected=2, ns=ns) + for m in maps: + fail("dev" not in m.keys(), "Device parameters not reported") + fail(dev != m["dev"], "Map's device different than program's") + +def check_extack(output, reference, args): + if skip_extack: + return + lines = output.split("\n") + comp = len(lines) >= 2 and lines[1] == 'Error: ' + reference + fail(not comp, "Missing or incorrect netlink extack message") + +def check_extack_nsim(output, reference, args): + check_extack(output, "netdevsim: " + reference, args) + +def check_no_extack(res, needle): + haystack = (res[1] + res[2]).strip() + fail(haystack.count(needle) or haystack.count("Warning:"), + "Unexpected command output, leaky extack? ('%s', '%s')" % (needle, haystack)) + +def check_verifier_log(output, reference): + lines = output.split("\n") + for l in reversed(lines): + if l == reference: + return + fail(True, "Missing or incorrect message from netdevsim in verifier log") + +def check_multi_basic(two_xdps): + fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs") + fail("prog" in two_xdps, "Base program reported in multi program mode") + fail(len(two_xdps["attached"]) != 2, + "Wrong attached program count with two programs") + fail(two_xdps["attached"][0]["prog"]["id"] == + two_xdps["attached"][1]["prog"]["id"], + "Offloaded and other programs have the same id") + +def test_spurios_extack(sim, obj, skip_hw, needle): + res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw, + include_stderr=True) + check_no_extack(res, needle) + res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_hw=skip_hw, include_stderr=True) + check_no_extack(res, needle) + res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf", + include_stderr=True) + check_no_extack(res, needle) + +def test_multi_prog(simdev, sim, obj, modename, modeid): + start_test("Test multi-attachment XDP - %s + offload..." % + (modename or "default", )) + sim.set_xdp(obj, "offload") + xdp = sim.ip_link_show(xdp=True)["xdp"] + offloaded = sim.dfs_read("bpf_offloaded_id") + fail("prog" not in xdp, "Base program not reported in single program mode") + fail(len(xdp["attached"]) != 1, + "Wrong attached program count with one program") + + sim.set_xdp(obj, modename) + two_xdps = sim.ip_link_show(xdp=True)["xdp"] + + fail(xdp["attached"][0] not in two_xdps["attached"], + "Offload program not reported after other activated") + check_multi_basic(two_xdps) + + offloaded2 = sim.dfs_read("bpf_offloaded_id") + fail(offloaded != offloaded2, + "Offload ID changed after loading other program") + + start_test("Test multi-attachment XDP - replace...") + ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) + fail(ret == 0, "Replaced one of programs without -force") + check_extack(err, "XDP program already attached.", args) + + start_test("Test multi-attachment XDP - remove without mode...") + ret, _, err = sim.unset_xdp("", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program without a mode flag") + check_extack(err, "More than one program loaded, unset mode is ambiguous.", args) + + sim.unset_xdp("offload") + xdp = sim.ip_link_show(xdp=True)["xdp"] + offloaded = sim.dfs_read("bpf_offloaded_id") + + fail(xdp["mode"] != modeid, "Bad mode reported after multiple programs") + fail("prog" not in xdp, + "Base program not reported after multi program mode") + fail(xdp["attached"][0] not in two_xdps["attached"], + "Offload program not reported after other activated") + fail(len(xdp["attached"]) != 1, + "Wrong attached program count with remaining programs") + fail(offloaded != "0", "Offload ID reported with only other program left") + + start_test("Test multi-attachment XDP - reattach...") + sim.set_xdp(obj, "offload") + two_xdps = sim.ip_link_show(xdp=True)["xdp"] + + fail(xdp["attached"][0] not in two_xdps["attached"], + "Other program not reported after offload activated") + check_multi_basic(two_xdps) + + start_test("Test multi-attachment XDP - device remove...") + simdev.remove() + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.set_ethtool_tc_offloads(True) + return [simdev, sim] + +# Parse command line +parser = argparse.ArgumentParser() +parser.add_argument("--log", help="output verbose log to given file") +args = parser.parse_args() +if args.log: + logfile = open(args.log, 'w+') + logfile.write("# -*-Org-*-") + +log("Prepare...", "", level=1) +log_level_inc() + +# Check permissions +skip(os.getuid() != 0, "test must be run as root") + +# Check tools +ret, progs = bpftool("prog", fail=False) +skip(ret != 0, "bpftool not installed") +base_progs = progs +_, base_maps = bpftool("map") +base_map_names = [ + 'pid_iter.rodata', # created on each bpftool invocation + 'libbpf_det_bind', # created on each bpftool invocation + 'libbpf_global', +] + +# Check netdevsim +if not os.path.isdir("/sys/bus/netdevsim/"): + ret, out = cmd("modprobe netdevsim", fail=False) + skip(ret != 0, "netdevsim module could not be loaded") + +# Check debugfs +_, out = cmd("mount") +if out.find("/sys/kernel/debug type debugfs") == -1: + cmd("mount -t debugfs none /sys/kernel/debug") + +# Check samples are compiled +samples = ["sample_ret0.bpf.o", "sample_map_ret0.bpf.o"] +for s in samples: + ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False) + skip(ret != 0, "sample %s/%s not found, please compile it" % + (bpf_test_dir, s)) + +# Check if iproute2 is built with libmnl (needed by extack support) +_, _, err = cmd("tc qdisc delete dev lo handle 0", + fail=False, include_stderr=True) +if err.find("Error: Failed to find qdisc with specified handle.") == -1: + print("Warning: no extack message in iproute2 output, libmnl missing?") + log("Warning: no extack message in iproute2 output, libmnl missing?", "") + skip_extack = True + +# Check if net namespaces seem to work +ns = mknetns() +skip(ns is None, "Could not create a net namespace") +cmd("ip netns delete %s" % (ns)) +netns = [] + +try: + obj = bpf_obj("sample_ret0.bpf.o") + bytecode = bpf_bytecode("1,6 0 0 4294967295,") + + start_test("Test destruction of generic XDP...") + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.set_xdp(obj, "generic") + simdev.remove() + bpftool_prog_list_wait(expected=0) + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.tc_add_ingress() + + start_test("Test TC non-offloaded...") + ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False) + fail(ret != 0, "Software TC filter did not load") + + start_test("Test TC non-offloaded isn't getting bound...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + simdev.dfs_get_bound_progs(expected=0) + + sim.tc_flush_filters() + + start_test("Test TC offloads are off by default...") + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC filter loaded without enabling TC offloads") + check_extack(err, "TC offload is disabled on net device.", args) + sim.wait_for_flush() + + sim.set_ethtool_tc_offloads(True) + sim.dfs["bpf_tc_non_bound_accept"] = "Y" + + start_test("Test TC offload by default...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + simdev.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Filter not offloaded by default") + + sim.tc_flush_filters() + + start_test("Test TC cBPF bytcode tries offload by default...") + ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False) + fail(ret != 0, "Software TC filter did not load") + simdev.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Bytecode not offloaded by default") + + sim.tc_flush_filters() + sim.dfs["bpf_tc_non_bound_accept"] = "N" + + start_test("Test TC cBPF unbound bytecode doesn't offload...") + ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC bytecode loaded for offload") + check_extack_nsim(err, "netdevsim configured to reject unbound programs.", + args) + sim.wait_for_flush() + + start_test("Test non-0 chain offload...") + ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1, + skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Offloaded a filter to chain other than 0") + check_extack(err, "Driver supports only offload of chain 0.", args) + sim.tc_flush_filters() + + start_test("Test TC replace...") + sim.cls_bpf_add_filter(obj, prio=1, handle=1) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + start_test("Test TC replace bad flags...") + for i in range(3): + for j in range(3): + ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_sw=(j == 1), skip_hw=(j == 2), + fail=False) + fail(bool(ret) != bool(j), + "Software TC incorrect load in replace test, iteration %d" % + (j)) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + start_test("Test spurious extack from the driver...") + test_spurios_extack(sim, obj, False, "netdevsim") + test_spurios_extack(sim, obj, True, "netdevsim") + + sim.set_ethtool_tc_offloads(False) + + test_spurios_extack(sim, obj, False, "TC offload is disabled") + test_spurios_extack(sim, obj, True, "TC offload is disabled") + + sim.set_ethtool_tc_offloads(True) + + sim.tc_flush_filters() + + start_test("Test TC offloads failure...") + sim.dfs["dev/bpf_bind_verifier_accept"] = 0 + ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC filter did not reject with TC offloads enabled") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + sim.dfs["dev/bpf_bind_verifier_accept"] = 1 + + start_test("Test TC offloads work...") + ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret != 0, "TC filter did not load with TC offloads enabled") + + start_test("Test TC offload basics...") + dfs = simdev.dfs_get_bound_progs(expected=1) + progs = bpftool_prog_list(expected=1) + ingress = sim.tc_show_ingress(expected=1) + + dprog = dfs[0] + prog = progs[0] + fltr = ingress[0] + fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter") + fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter") + fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back") + + start_test("Test TC offload is device-bound...") + fail(str(prog["id"]) != fltr["id"], "Program IDs don't match") + fail(prog["tag"] != fltr["tag"], "Program tags don't match") + fail(fltr["id"] != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test disabling TC offloads is rejected while filters installed...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...") + sim.set_ethtool_tc_offloads(True) + + start_test("Test qdisc removal frees things...") + sim.tc_flush_filters() + sim.tc_show_ingress(expected=0) + + start_test("Test disabling TC offloads is OK without filters...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret != 0, + "Driver refused to disable TC offloads without filters installed...") + + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of TC filters...") + sim.cls_bpf_add_filter(obj, skip_sw=True) + simdev.remove() + bpftool_prog_list_wait(expected=0) + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of XDP...") + sim.set_xdp(obj, "offload") + simdev.remove() + bpftool_prog_list_wait(expected=0) + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.set_ethtool_tc_offloads(True) + + start_test("Test XDP prog reporting...") + sim.set_xdp(obj, "drv") + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + + start_test("Test XDP prog replace without force...") + ret, _ = sim.set_xdp(obj, "drv", fail=False) + fail(ret == 0, "Replaced XDP program without -force") + sim.wait_for_flush(total=1) + + start_test("Test XDP prog replace with force...") + ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False) + fail(ret != 0, "Could not replace XDP program with -force") + bpftool_prog_list_wait(expected=1) + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + fail("dev" in progs[0].keys(), + "Device parameters reported for non-offloaded program") + + start_test("Test XDP prog replace with bad flags...") + ret, _, err = sim.set_xdp(obj, "generic", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Replaced XDP program with a program in different mode") + check_extack(err, + "Native and generic XDP can't be active at the same time.", + args) + + start_test("Test MTU restrictions...") + ret, _ = sim.set_mtu(9000, fail=False) + fail(ret == 0, + "Driver should refuse to increase MTU to 9000 with XDP loaded...") + sim.unset_xdp("drv") + bpftool_prog_list_wait(expected=0) + sim.set_mtu(9000) + ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True) + fail(ret == 0, "Driver should refuse to load program with MTU of 9000...") + check_extack_nsim(err, "MTU too large w/ XDP enabled.", args) + sim.set_mtu(1500) + + sim.wait_for_flush() + start_test("Test non-offload XDP attaching to HW...") + bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/nooffload") + nooffload = bpf_pinned("/sys/fs/bpf/nooffload") + ret, _, err = sim.set_xdp(nooffload, "offload", + fail=False, include_stderr=True) + fail(ret == 0, "attached non-offloaded XDP program to HW") + check_extack_nsim(err, "xdpoffload of non-bound program.", args) + rm("/sys/fs/bpf/nooffload") + + start_test("Test offload XDP attaching to drv...") + bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload", + dev=sim['ifname']) + offload = bpf_pinned("/sys/fs/bpf/offload") + ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True) + fail(ret == 0, "attached offloaded XDP program to drv") + check_extack(err, "Using offloaded program without HW_MODE flag is not supported.", args) + rm("/sys/fs/bpf/offload") + sim.wait_for_flush() + + bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/devbound", + dev_bind=sim['ifname']) + devbound = bpf_pinned("/sys/fs/bpf/devbound") + start_test("Test dev-bound program in generic mode...") + ret, _, err = sim.set_xdp(devbound, "generic", fail=False, include_stderr=True) + fail(ret == 0, "devbound program in generic mode allowed") + check_extack(err, "Can't attach device-bound programs in generic mode.", args) + rm("/sys/fs/bpf/devbound") + sim.wait_for_flush() + + start_test("Test XDP load failure...") + sim.dfs["dev/bpf_bind_verifier_accept"] = 0 + ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload", + dev=sim['ifname'], fail=False, include_stderr=True) + fail(ret == 0, "verifier should fail on load") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + sim.dfs["dev/bpf_bind_verifier_accept"] = 1 + sim.wait_for_flush() + + start_test("Test XDP offload...") + _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True) + ipl = sim.ip_link_show(xdp=True) + link_xdp = ipl["xdp"]["prog"] + progs = bpftool_prog_list(expected=1) + prog = progs[0] + fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID") + + start_test("Test XDP offload is device bound...") + dfs = simdev.dfs_get_bound_progs(expected=1) + dprog = dfs[0] + + fail(prog["id"] != link_xdp["id"], "Program IDs don't match") + fail(prog["tag"] != link_xdp["tag"], "Program tags don't match") + fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test removing XDP program many times...") + sim.unset_xdp("offload") + sim.unset_xdp("offload") + sim.unset_xdp("drv") + sim.unset_xdp("drv") + sim.unset_xdp("") + sim.unset_xdp("") + bpftool_prog_list_wait(expected=0) + + start_test("Test attempt to use a program for a wrong device...") + simdev2 = BpfNetdevSimDev() + sim2, = simdev2.nsims + sim2.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) + fail(ret == 0, "Pinned program loaded for a different device accepted") + check_extack(err, "Program bound to different device.", args) + simdev2.remove() + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) + fail(ret == 0, "Pinned program loaded for a removed device accepted") + check_extack(err, "Program bound to different device.", args) + rm(pin_file) + bpftool_prog_list_wait(expected=0) + + simdev, sim = test_multi_prog(simdev, sim, obj, "", 1) + simdev, sim = test_multi_prog(simdev, sim, obj, "drv", 1) + simdev, sim = test_multi_prog(simdev, sim, obj, "generic", 2) + + start_test("Test mixing of TC and XDP...") + sim.tc_add_ingress() + sim.set_xdp(obj, "offload") + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Loading TC when XDP active should fail") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) + sim.unset_xdp("offload") + sim.wait_for_flush() + + sim.cls_bpf_add_filter(obj, skip_sw=True) + ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) + fail(ret == 0, "Loading XDP when TC active should fail") + check_extack_nsim(err, "TC program is already loaded.", args) + + start_test("Test binding TC from pinned...") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + sim.tc_flush_filters(bound=1, total=1) + sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True) + sim.tc_flush_filters(bound=1, total=1) + + start_test("Test binding XDP from pinned...") + sim.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1) + + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + + start_test("Test offload of wrong type fails...") + ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False) + fail(ret == 0, "Managed to attach XDP program to TC") + + start_test("Test asking for TC offload of two filters...") + sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) + ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Managed to offload two TC filters at the same time") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) + + sim.tc_flush_filters(bound=2, total=2) + + start_test("Test if netdev removal waits for translation...") + delay_msec = 500 + sim.dfs["dev/bpf_bind_verifier_delay"] = delay_msec + start = time.time() + cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \ + (sim['ifname'], obj) + tc_proc = cmd(cmd_line, background=True, fail=False) + # Wait for the verifier to start + while simdev.dfs_num_bound_progs() <= 2: + pass + simdev.remove() + end = time.time() + ret, _ = cmd_result(tc_proc, fail=False) + time_diff = end - start + log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff)) + + fail(ret == 0, "Managed to load TC filter on a unregistering device") + delay_sec = delay_msec * 0.001 + fail(time_diff < delay_sec, "Removal process took %s, expected %s" % + (time_diff, delay_sec)) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + map_obj = bpf_obj("sample_map_ret0.bpf.o") + start_test("Test loading program with maps...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + + start_test("Test bpftool bound info reporting (own ns)...") + check_dev_info(False, "") + + start_test("Test bpftool bound info reporting (other ns)...") + ns = mknetns() + sim.set_ns(ns) + check_dev_info(True, "") + + start_test("Test bpftool bound info reporting (remote ns)...") + check_dev_info(False, ns) + + start_test("Test bpftool bound info reporting (back to own ns)...") + sim.set_ns("") + check_dev_info(False, "") + + prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog") + map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2) + simdev.remove() + + start_test("Test bpftool bound info reporting (removed dev)...") + check_dev_info_removed(prog_file=prog_file, map_file=map_file) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + + start_test("Test map update (no flags)...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + maps = bpftool_map_list_wait(expected=2) + array = maps[0] if maps[0]["type"] == "array" else maps[1] + htab = maps[0] if maps[0]["type"] == "hash" else maps[1] + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, _ = bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "added too many entries") + + start_test("Test map update (exists)...") + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, err = bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "updated non-existing key") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map update (noexist)...") + for m in maps: + for i in range(2): + ret, err = bpftool("map update id %d key %s value %s noexist" % + (m["id"], int2str("I", i), int2str("Q", i * 3)), + fail=False) + fail(ret == 0, "updated existing key") + fail(err["error"].find("File exists") == -1, + "expected EEXIST, error is '%s'" % (err["error"])) + + start_test("Test map dump...") + for m in maps: + _, entries = bpftool("map dump id %d" % (m["id"])) + for i in range(2): + key = str2int(entries[i]["key"]) + fail(key != i, "expected key %d, got %d" % (key, i)) + val = str2int(entries[i]["value"]) + fail(val != i * 3, "expected value %d, got %d" % (val, i * 3)) + + start_test("Test map getnext...") + for m in maps: + _, entry = bpftool("map getnext id %d" % (m["id"])) + key = str2int(entry["next_key"]) + fail(key != 0, "next key %d, expected %d" % (key, 0)) + _, entry = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 0))) + key = str2int(entry["next_key"]) + fail(key != 1, "next key %d, expected %d" % (key, 1)) + ret, err = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 1)), fail=False) + fail(ret == 0, "got next key past the end of map") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map delete (htab)...") + for i in range(2): + bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i))) + + start_test("Test map delete (array)...") + for i in range(2): + ret, err = bpftool("map delete id %d key %s" % + (htab["id"], int2str("I", i)), fail=False) + fail(ret == 0, "removed entry from an array") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map remove...") + sim.unset_xdp("offload") + bpftool_map_list_wait(expected=0) + simdev.remove() + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + simdev.remove() + bpftool_map_list_wait(expected=0) + + start_test("Test map creation fail path...") + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.dfs["bpf_map_accept"] = "N" + ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False) + fail(ret == 0, + "netdevsim didn't refuse to create a map with offload disabled") + + simdev.remove() + + start_test("Test multi-dev ASIC program reuse...") + simdevA = BpfNetdevSimDev() + simA, = simdevA.nsims + simdevB = BpfNetdevSimDev(3) + simB1, simB2, simB3 = simdevB.nsims + sims = (simA, simB1, simB2, simB3) + simB = (simB1, simB2, simB3) + + bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimA", + dev=simA['ifname']) + progA = bpf_pinned("/sys/fs/bpf/nsimA") + bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB", + dev=simB1['ifname']) + progB = bpf_pinned("/sys/fs/bpf/nsimB") + + simA.set_xdp(progA, "offload", JSON=False) + for d in simdevB.nsims: + d.set_xdp(progB, "offload", JSON=False) + + start_test("Test multi-dev ASIC cross-dev replace...") + ret, _ = simA.set_xdp(progB, "offload", force=True, JSON=False, fail=False) + fail(ret == 0, "cross-ASIC program allowed") + for d in simdevB.nsims: + ret, _ = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False) + fail(ret == 0, "cross-ASIC program allowed") + + start_test("Test multi-dev ASIC cross-dev install...") + for d in sims: + d.unset_xdp("offload") + + ret, _, err = simA.set_xdp(progB, "offload", force=True, JSON=False, + fail=False, include_stderr=True) + fail(ret == 0, "cross-ASIC program allowed") + check_extack(err, "Program bound to different device.", args) + for d in simdevB.nsims: + ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False, + fail=False, include_stderr=True) + fail(ret == 0, "cross-ASIC program allowed") + check_extack(err, "Program bound to different device.", args) + + start_test("Test multi-dev ASIC cross-dev map reuse...") + + mapA = bpftool("prog show %s" % (progA))[1]["map_ids"][0] + mapB = bpftool("prog show %s" % (progB))[1]["map_ids"][0] + + ret, _ = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB_", + dev=simB3['ifname'], + maps=["idx 0 id %d" % (mapB)], + fail=False) + fail(ret != 0, "couldn't reuse a map on the same ASIC") + rm("/sys/fs/bpf/nsimB_") + + ret, _, err = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimA_", + dev=simA['ifname'], + maps=["idx 0 id %d" % (mapB)], + fail=False, include_stderr=True) + fail(ret == 0, "could reuse a map on a different ASIC") + fail(err.count("offload device mismatch between prog and map") == 0, + "error message missing for cross-ASIC map") + + ret, _, err = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB_", + dev=simB1['ifname'], + maps=["idx 0 id %d" % (mapA)], + fail=False, include_stderr=True) + fail(ret == 0, "could reuse a map on a different ASIC") + fail(err.count("offload device mismatch between prog and map") == 0, + "error message missing for cross-ASIC map") + + start_test("Test multi-dev ASIC cross-dev destruction...") + bpftool_prog_list_wait(expected=2) + + simdevA.remove() + bpftool_prog_list_wait(expected=1) + + ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"] + fail(ifnameB != simB1['ifname'], "program not bound to original device") + simB1.remove() + bpftool_prog_list_wait(expected=1) + + start_test("Test multi-dev ASIC cross-dev destruction - move...") + ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"] + fail(ifnameB not in (simB2['ifname'], simB3['ifname']), + "program not bound to remaining devices") + + simB2.remove() + ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"] + fail(ifnameB != simB3['ifname'], "program not bound to remaining device") + + simB3.remove() + simdevB.remove() + bpftool_prog_list_wait(expected=0) + + start_test("Test multi-dev ASIC cross-dev destruction - orphaned...") + ret, out = bpftool("prog show %s" % (progB), fail=False) + fail(ret != 0, "couldn't get information about orphaned program") + + print("%s: OK" % (os.path.basename(__file__))) + +finally: + log("Clean up...", "", level=1) + log_level_inc() + clean_up() diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh new file mode 100755 index 000000000000..7d2d40812074 --- /dev/null +++ b/tools/testing/selftests/net/busy_poll_test.sh @@ -0,0 +1,165 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source lib.sh + +NSIM_SV_ID=$((256 + RANDOM % 256)) +NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID +NSIM_CL_ID=$((512 + RANDOM % 256)) +NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +SERVER_IP=192.168.1.1 +CLIENT_IP=192.168.1.2 +SERVER_PORT=48675 + +# busy poll config +MAX_EVENTS=8 +BUSY_POLL_USECS=0 +BUSY_POLL_BUDGET=16 +PREFER_BUSY_POLL=1 + +# IRQ deferral config +NAPI_DEFER_HARD_IRQS=100 +GRO_FLUSH_TIMEOUT=50000 +SUSPEND_TIMEOUT=20000000 + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_SV_SYS/net -exec basename {} \;) + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_CL_SYS/net -exec basename {} \;) + + # ensure the server has 1 queue + ethtool -L $NSIM_SV_NAME combined 1 2>/dev/null + + ip link set $NSIM_SV_NAME netns nssv + ip link set $NSIM_CL_NAME netns nscl + + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME + + ip netns exec nssv ip link set dev $NSIM_SV_NAME up + ip netns exec nscl ip link set dev $NSIM_CL_NAME up + + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +test_busypoll() +{ + suspend_value=${1:-0} + tmp_file=$(mktemp) + out_file=$(mktemp) + + # fill a test file with random data + dd if=/dev/urandom of=${tmp_file} bs=1M count=1 2> /dev/null + + timeout -k 1s 30s ip netns exec nssv ./busy_poller \ + -p${SERVER_PORT} \ + -b${SERVER_IP} \ + -m${MAX_EVENTS} \ + -u${BUSY_POLL_USECS} \ + -P${PREFER_BUSY_POLL} \ + -g${BUSY_POLL_BUDGET} \ + -i${NSIM_SV_IFIDX} \ + -s${suspend_value} \ + -o${out_file}& + + wait_local_port_listen nssv ${SERVER_PORT} tcp + + ip netns exec nscl socat -u $tmp_file TCP:${SERVER_IP}:${SERVER_PORT} + + wait + + tmp_file_md5sum=$(md5sum $tmp_file | cut -f1 -d' ') + out_file_md5sum=$(md5sum $out_file | cut -f1 -d' ') + + if [ "$tmp_file_md5sum" = "$out_file_md5sum" ]; then + res=0 + else + echo "md5sum mismatch" + echo "input file md5sum: ${tmp_file_md5sum}"; + echo "output file md5sum: ${out_file_md5sum}"; + res=1 + fi + + rm $out_file $tmp_file + + return $res +} + +test_busypoll_with_suspend() +{ + test_busypoll ${SUSPEND_TIMEOUT} + + return $? +} + +### +### Code start +### + +modprobe netdevsim + +# linking + +echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_SV_FD=$((256 + RANDOM % 256)) +exec {NSIM_SV_FD}</var/run/netns/nssv +NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex) + +NSIM_CL_FD=$((256 + RANDOM % 256)) +exec {NSIM_CL_FD}</var/run/netns/nscl +NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex) + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \ + $NSIM_DEV_SYS_LINK + +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +test_busypoll +if [ $? -ne 0 ]; then + echo "test_busypoll failed" + cleanup_ns + exit 1 +fi + +test_busypoll_with_suspend +if [ $? -ne 0 ]; then + echo "test_busypoll_with_suspend failed" + cleanup_ns + exit 1 +fi + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit 0 diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c new file mode 100644 index 000000000000..04c7ff577bb8 --- /dev/null +++ b/tools/testing/selftests/net/busy_poller.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <inttypes.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <ynl.h> + +#include <arpa/inet.h> +#include <netinet/in.h> + +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include <linux/genetlink.h> +#include <linux/netlink.h> + +#include "netdev-user.h" + +/* The below ifdef blob is required because: + * + * - sys/epoll.h does not (yet) have the ioctl definitions included. So, + * systems with older glibcs will not have them available. However, + * sys/epoll.h does include the type definition for epoll_data, which is + * needed by the user program (e.g. epoll_event.data.fd) + * + * - linux/eventpoll.h does not define the epoll_data type, it is simply an + * opaque __u64. It does, however, include the ioctl definition. + * + * Including both headers is impossible (types would be redefined), so I've + * opted instead to take sys/epoll.h, and include the blob below. + * + * Someday, when glibc is globally up to date, the blob below can be removed. + */ +#if !defined(EPOLL_IOC_TYPE) +struct epoll_params { + uint32_t busy_poll_usecs; + uint16_t busy_poll_budget; + uint8_t prefer_busy_poll; + + /* pad the struct to a multiple of 64bits */ + uint8_t __pad; +}; + +#define EPOLL_IOC_TYPE 0x8A +#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) +#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) +#endif + +static uint16_t cfg_port = 8000; +static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; +static char *cfg_outfile; +static int cfg_max_events = 8; +static uint32_t cfg_ifindex; + +/* busy poll params */ +static uint32_t cfg_busy_poll_usecs; +static uint16_t cfg_busy_poll_budget; +static uint8_t cfg_prefer_busy_poll; + +/* IRQ params */ +static uint32_t cfg_defer_hard_irqs; +static uint64_t cfg_gro_flush_timeout; +static uint64_t cfg_irq_suspend_timeout; + +static void usage(const char *filepath) +{ + error(1, 0, + "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>", + filepath); +} + +static void parse_opts(int argc, char **argv) +{ + unsigned long long tmp; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + + while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { + /* most options take integer values, except o and b, so reduce + * code duplication a bit for the common case by calling + * strtoull here and leave bounds checking and casting per + * option below. + */ + if (c != 'o' && c != 'b') + tmp = strtoull(optarg, NULL, 0); + + switch (c) { + case 'u': + if (tmp == ULLONG_MAX || tmp > UINT32_MAX) + error(1, ERANGE, "busy_poll_usecs too large"); + + cfg_busy_poll_usecs = (uint32_t)tmp; + break; + case 'P': + if (tmp == ULLONG_MAX || tmp > 1) + error(1, ERANGE, + "prefer busy poll should be 0 or 1"); + + cfg_prefer_busy_poll = (uint8_t)tmp; + break; + case 'g': + if (tmp == ULLONG_MAX || tmp > UINT16_MAX) + error(1, ERANGE, + "busy poll budget must be [0, UINT16_MAX]"); + + cfg_busy_poll_budget = (uint16_t)tmp; + break; + case 'p': + if (tmp == ULLONG_MAX || tmp > UINT16_MAX) + error(1, ERANGE, "port must be <= 65535"); + + cfg_port = (uint16_t)tmp; + break; + case 'b': + ret = inet_aton(optarg, &cfg_bind_addr); + if (ret == 0) + error(1, errno, + "bind address %s invalid", optarg); + break; + case 'o': + cfg_outfile = strdup(optarg); + if (!cfg_outfile) + error(1, 0, "outfile invalid"); + break; + case 'm': + if (tmp == ULLONG_MAX || tmp > INT_MAX) + error(1, ERANGE, + "max events must be > 0 and <= INT_MAX"); + + cfg_max_events = (int)tmp; + break; + case 'd': + if (tmp == ULLONG_MAX || tmp > INT32_MAX) + error(1, ERANGE, + "defer_hard_irqs must be <= INT32_MAX"); + + cfg_defer_hard_irqs = (uint32_t)tmp; + break; + case 'r': + if (tmp == ULLONG_MAX || tmp > UINT64_MAX) + error(1, ERANGE, + "gro_flush_timeout must be < UINT64_MAX"); + + cfg_gro_flush_timeout = (uint64_t)tmp; + break; + case 's': + if (tmp == ULLONG_MAX || tmp > UINT64_MAX) + error(1, ERANGE, + "irq_suspend_timeout must be < ULLONG_MAX"); + + cfg_irq_suspend_timeout = (uint64_t)tmp; + break; + case 'i': + if (tmp == ULLONG_MAX || tmp > INT_MAX) + error(1, ERANGE, + "ifindex must be <= INT_MAX"); + + cfg_ifindex = (int)tmp; + break; + } + } + + if (!cfg_ifindex) + usage(argv[0]); + + if (optind != argc) + usage(argv[0]); +} + +static void epoll_ctl_add(int epfd, int fd, uint32_t events) +{ + struct epoll_event ev; + + ev.events = events; + ev.data.fd = fd; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1) + error(1, errno, "epoll_ctl add fd: %d", fd); +} + +static void setnonblock(int sockfd) +{ + int flags; + + flags = fcntl(sockfd, F_GETFL, 0); + + if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1) + error(1, errno, "unable to set socket to nonblocking mode"); +} + +static void write_chunk(int fd, char *buf, ssize_t buflen) +{ + ssize_t remaining = buflen; + char *buf_offset = buf; + ssize_t writelen = 0; + ssize_t write_result; + + while (writelen < buflen) { + write_result = write(fd, buf_offset, remaining); + if (write_result == -1) + error(1, errno, "unable to write data to outfile"); + + writelen += write_result; + remaining -= write_result; + buf_offset += write_result; + } +} + +static void setup_queue(void) +{ + struct netdev_napi_get_list *napi_list = NULL; + struct netdev_napi_get_req_dump *req = NULL; + struct netdev_napi_set_req *set_req = NULL; + struct ynl_sock *ys; + struct ynl_error yerr; + uint32_t napi_id = 0; + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) + error(1, 0, "YNL: %s", yerr.msg); + + req = netdev_napi_get_req_dump_alloc(); + netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex); + napi_list = netdev_napi_get_dump(ys, req); + + /* assume there is 1 NAPI configured and take the first */ + if (napi_list->obj._present.id) + napi_id = napi_list->obj.id; + else + error(1, 0, "napi ID not present?"); + + set_req = netdev_napi_set_req_alloc(); + netdev_napi_set_req_set_id(set_req, napi_id); + netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs); + netdev_napi_set_req_set_gro_flush_timeout(set_req, + cfg_gro_flush_timeout); + netdev_napi_set_req_set_irq_suspend_timeout(set_req, + cfg_irq_suspend_timeout); + + if (netdev_napi_set(ys, set_req)) + error(1, 0, "can't set NAPI params: %s\n", yerr.msg); + + netdev_napi_get_list_free(napi_list); + netdev_napi_get_req_dump_free(req); + netdev_napi_set_req_free(set_req); + ynl_sock_destroy(ys); +} + +static void run_poller(void) +{ + struct epoll_event events[cfg_max_events]; + struct epoll_params epoll_params = {0}; + struct sockaddr_in server_addr; + int i, epfd, nfds; + ssize_t readlen; + int outfile_fd; + char buf[1024]; + int sockfd; + int conn; + int val; + + outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644); + if (outfile_fd == -1) + error(1, errno, "unable to open outfile: %s", cfg_outfile); + + sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sockfd == -1) + error(1, errno, "unable to create listen socket"); + + server_addr.sin_family = AF_INET; + server_addr.sin_port = htons(cfg_port); + server_addr.sin_addr = cfg_bind_addr; + + /* these values are range checked during parse_opts, so casting is safe + * here + */ + epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; + epoll_params.busy_poll_budget = cfg_busy_poll_budget; + epoll_params.prefer_busy_poll = cfg_prefer_busy_poll; + epoll_params.__pad = 0; + + val = 1; + if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))) + error(1, errno, "poller setsockopt reuseaddr"); + + setnonblock(sockfd); + + if (bind(sockfd, (struct sockaddr *)&server_addr, + sizeof(struct sockaddr_in))) + error(0, errno, "poller bind to port: %d\n", cfg_port); + + if (listen(sockfd, 1)) + error(1, errno, "poller listen"); + + epfd = epoll_create1(0); + if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1) + error(1, errno, "unable to set busy poll params"); + + epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET); + + for (;;) { + nfds = epoll_wait(epfd, events, cfg_max_events, -1); + for (i = 0; i < nfds; i++) { + if (events[i].data.fd == sockfd) { + conn = accept(sockfd, NULL, NULL); + if (conn == -1) + error(1, errno, + "accepting incoming connection failed"); + + setnonblock(conn); + epoll_ctl_add(epfd, conn, + EPOLLIN | EPOLLET | EPOLLRDHUP | + EPOLLHUP); + } else if (events[i].events & EPOLLIN) { + for (;;) { + readlen = read(events[i].data.fd, buf, + sizeof(buf)); + if (readlen > 0) + write_chunk(outfile_fd, buf, + readlen); + else + break; + } + } else { + /* spurious event ? */ + } + if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) { + epoll_ctl(epfd, EPOLL_CTL_DEL, + events[i].data.fd, NULL); + close(events[i].data.fd); + close(outfile_fd); + return; + } + } + } +} + +int main(int argc, char *argv[]) +{ + parse_opts(argc, argv); + setup_queue(); + run_poller(); + + if (cfg_outfile) + free(cfg_outfile); + + return 0; +} diff --git a/tools/testing/selftests/net/can/.gitignore b/tools/testing/selftests/net/can/.gitignore new file mode 100644 index 000000000000..764a53fc837f --- /dev/null +++ b/tools/testing/selftests/net/can/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +test_raw_filter diff --git a/tools/testing/selftests/net/can/Makefile b/tools/testing/selftests/net/can/Makefile new file mode 100644 index 000000000000..5b82e60a03e7 --- /dev/null +++ b/tools/testing/selftests/net/can/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) + +TEST_PROGS := test_raw_filter.sh + +TEST_GEN_FILES := test_raw_filter + +include ../../lib.mk diff --git a/tools/testing/selftests/net/can/test_raw_filter.c b/tools/testing/selftests/net/can/test_raw_filter.c new file mode 100644 index 000000000000..4101c36390fd --- /dev/null +++ b/tools/testing/selftests/net/can/test_raw_filter.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +/* + * Copyright (c) 2011 Volkswagen Group Electronic Research + * All rights reserved. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <net/if.h> +#include <linux/if.h> + +#include <linux/can.h> +#include <linux/can/raw.h> + +#include "../../kselftest_harness.h" + +#define ID 0x123 + +char CANIF[IFNAMSIZ]; + +static int send_can_frames(int sock, int testcase) +{ + struct can_frame frame; + + frame.can_dlc = 1; + frame.data[0] = testcase; + + frame.can_id = ID; + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + frame.can_id = (ID | CAN_RTR_FLAG); + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + frame.can_id = (ID | CAN_EFF_FLAG); + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + frame.can_id = (ID | CAN_EFF_FLAG | CAN_RTR_FLAG); + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + return 0; + +write_err: + perror("write"); + return 1; +} + +FIXTURE(can_filters) { + int sock; +}; + +FIXTURE_SETUP(can_filters) +{ + struct sockaddr_can addr; + struct ifreq ifr; + int recv_own_msgs = 1; + int s, ret; + + s = socket(PF_CAN, SOCK_RAW, CAN_RAW); + ASSERT_GE(s, 0) + TH_LOG("failed to create CAN_RAW socket: %d", errno); + + strncpy(ifr.ifr_name, CANIF, sizeof(ifr.ifr_name)); + ret = ioctl(s, SIOCGIFINDEX, &ifr); + ASSERT_GE(ret, 0) + TH_LOG("failed SIOCGIFINDEX: %d", errno); + + addr.can_family = AF_CAN; + addr.can_ifindex = ifr.ifr_ifindex; + + setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS, + &recv_own_msgs, sizeof(recv_own_msgs)); + + ret = bind(s, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(ret, 0) + TH_LOG("failed bind socket: %d", errno); + + self->sock = s; +} + +FIXTURE_TEARDOWN(can_filters) +{ + close(self->sock); +} + +FIXTURE_VARIANT(can_filters) { + int testcase; + canid_t id; + canid_t mask; + int exp_num_rx; + canid_t exp_flags[]; +}; + +/* Receive all frames when filtering for the ID in standard frame format */ +FIXTURE_VARIANT_ADD(can_filters, base) { + .testcase = 1, + .id = ID, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Ignore EFF flag in filter ID if not covered by filter mask */ +FIXTURE_VARIANT_ADD(can_filters, base_eff) { + .testcase = 2, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Ignore RTR flag in filter ID if not covered by filter mask */ +FIXTURE_VARIANT_ADD(can_filters, base_rtr) { + .testcase = 3, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Ignore EFF and RTR flags in filter ID if not covered by filter mask */ +FIXTURE_VARIANT_ADD(can_filters, base_effrtr) { + .testcase = 4, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF frames when expecting no EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff) { + .testcase = 5, + .id = ID, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_RTR_FLAG, + }, +}; + +/* Receive only EFF frames when filter id and filter mask include EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff_eff) { + .testcase = 6, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF frames when expecting no EFF flag, ignoring RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff_rtr) { + .testcase = 7, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_RTR_FLAG, + }, +}; + +/* Receive only EFF frames when filter id and filter mask include EFF flag, + * ignoring RTR flag + */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff_effrtr) { + .testcase = 8, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive no remote frames when filtering for no RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr) { + .testcase = 9, + .id = ID, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_EFF_FLAG, + }, +}; + +/* Receive no remote frames when filtering for no RTR flag, ignoring EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr_eff) { + .testcase = 10, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_EFF_FLAG, + }, +}; + +/* Receive only remote frames when filter includes RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr_rtr) { + .testcase = 11, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_RTR_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only remote frames when filter includes RTR flag, ignoring EFF + * flag + */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr_effrtr) { + .testcase = 12, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_RTR_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF data frame when filtering for no flags */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr) { + .testcase = 13, + .id = ID, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + 0, + }, +}; + +/* Receive only EFF data frame when filtering for EFF but no RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_eff) { + .testcase = 14, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_EFF_FLAG, + }, +}; + +/* Receive only SFF remote frame when filtering for RTR but no EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_rtr) { + .testcase = 15, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_RTR_FLAG, + }, +}; + +/* Receive only EFF remote frame when filtering for EFF and RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_effrtr) { + .testcase = 16, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF data frame when filtering for no EFF flag and no RTR flag + * but based on EFF mask + */ +FIXTURE_VARIANT_ADD(can_filters, eff) { + .testcase = 17, + .id = ID, + .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + 0, + }, +}; + +/* Receive only EFF data frame when filtering for EFF flag and no RTR flag but + * based on EFF mask + */ +FIXTURE_VARIANT_ADD(can_filters, eff_eff) { + .testcase = 18, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_EFF_FLAG, + }, +}; + +/* This test verifies that the raw CAN filters work, by checking if only frames + * with the expected set of flags are received. For each test case, the given + * filter (id and mask) is added and four CAN frames are sent with every + * combination of set/unset EFF/RTR flags. + */ +TEST_F(can_filters, test_filter) +{ + struct can_filter rfilter; + int ret; + + rfilter.can_id = variant->id; + rfilter.can_mask = variant->mask; + setsockopt(self->sock, SOL_CAN_RAW, CAN_RAW_FILTER, + &rfilter, sizeof(rfilter)); + + TH_LOG("filters: can_id = 0x%08X can_mask = 0x%08X", + rfilter.can_id, rfilter.can_mask); + + ret = send_can_frames(self->sock, variant->testcase); + ASSERT_EQ(ret, 0) + TH_LOG("failed to send CAN frames"); + + for (int i = 0; i <= variant->exp_num_rx; i++) { + struct can_frame frame; + struct timeval tv = { + .tv_sec = 0, + .tv_usec = 50000, /* 50ms timeout */ + }; + fd_set rdfs; + + FD_ZERO(&rdfs); + FD_SET(self->sock, &rdfs); + + ret = select(self->sock + 1, &rdfs, NULL, NULL, &tv); + ASSERT_GE(ret, 0) + TH_LOG("failed select for frame %d, err: %d)", i, errno); + + ret = FD_ISSET(self->sock, &rdfs); + if (i == variant->exp_num_rx) { + ASSERT_EQ(ret, 0) + TH_LOG("too many frames received"); + } else { + ASSERT_NE(ret, 0) + TH_LOG("too few frames received"); + + ret = read(self->sock, &frame, sizeof(frame)); + ASSERT_GE(ret, 0) + TH_LOG("failed to read frame %d, err: %d", i, errno); + + TH_LOG("rx: can_id = 0x%08X rx = %d", frame.can_id, i); + + ASSERT_EQ(ID, frame.can_id & CAN_SFF_MASK) + TH_LOG("received wrong can_id"); + ASSERT_EQ(variant->testcase, frame.data[0]) + TH_LOG("received wrong test case"); + + ASSERT_EQ(frame.can_id & ~CAN_ERR_MASK, + variant->exp_flags[i]) + TH_LOG("received unexpected flags"); + } + } +} + +int main(int argc, char **argv) +{ + char *ifname = getenv("CANIF"); + + if (!ifname) { + printf("CANIF environment variable must contain the test interface\n"); + return KSFT_FAIL; + } + + strncpy(CANIF, ifname, sizeof(CANIF) - 1); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/net/can/test_raw_filter.sh b/tools/testing/selftests/net/can/test_raw_filter.sh new file mode 100755 index 000000000000..276d6c06ac95 --- /dev/null +++ b/tools/testing/selftests/net/can/test_raw_filter.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + test_raw_filter +" + +net_dir=$(dirname $0)/.. +source $net_dir/lib.sh + +export CANIF=${CANIF:-"vcan0"} +BITRATE=${BITRATE:-500000} + +setup() +{ + if [[ $CANIF == vcan* ]]; then + ip link add name $CANIF type vcan || exit $ksft_skip + else + ip link set dev $CANIF type can bitrate $BITRATE || exit $ksft_skip + fi + ip link set dev $CANIF up + pwd +} + +cleanup() +{ + ip link set dev $CANIF down + if [[ $CANIF == vcan* ]]; then + ip link delete $CANIF + fi +} + +test_raw_filter() +{ + ./test_raw_filter + check_err $? + log_test "test_raw_filter" +} + +trap cleanup EXIT +setup + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/cmsg_ip.sh b/tools/testing/selftests/net/cmsg_ip.sh new file mode 100755 index 000000000000..b55680e081ad --- /dev/null +++ b/tools/testing/selftests/net/cmsg_ip.sh @@ -0,0 +1,187 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +IP4=172.16.0.1/24 +TGT4=172.16.0.2 +IP6=2001:db8:1::1/64 +TGT6=2001:db8:1::2 +TMPF=$(mktemp --suffix ".pcap") + +cleanup() +{ + rm -f $TMPF + cleanup_ns $NS +} + +trap cleanup EXIT + +tcpdump -h | grep immediate-mode >> /dev/null +if [ $? -ne 0 ]; then + echo "SKIP - tcpdump with --immediate-mode option required" + exit $ksft_skip +fi + +# Namespaces +setup_ns NS +NSEXE="ip netns exec $NS" + +$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null + +# Connectivity +ip -netns $NS link add type dummy +ip -netns $NS link set dev dummy0 up +ip -netns $NS addr add $IP4 dev dummy0 +ip -netns $NS addr add $IP6 dev dummy0 + +# Test +BAD=0 +TOTAL=0 + +check_result() { + ((TOTAL++)) + if [ $1 -ne $2 ]; then + echo " Case $3 returned $1, expected $2" + ((BAD++)) + fi +} + +# IPV6_DONTFRAG +for ovr in setsock cmsg both diff; do + for df in 0 1; do + for p in u U i r; do + [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-F $df" + [ $ovr == "cmsg" ] && m="-f $df" + [ $ovr == "both" ] && m="-F $df -f $df" + [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df" + + $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234 + check_result $? $df "DONTFRAG $prot $ovr" + done + done +done + +# IP_TOS + IPV6_TCLASS + +test_dscp() { + local -r IPVER=$1 + local -r TGT=$2 + local -r MATCH=$3 + + local -r TOS=0x10 + local -r TOS2=0x20 + local -r ECN=0x3 + + ip $IPVER -netns $NS rule add tos $TOS lookup 300 + ip $IPVER -netns $NS route add table 300 prohibit any + + for ovr in setsock cmsg both diff; do + for p in u U i r; do + [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-C" + [ $ovr == "cmsg" ] && m="-c" + [ $ovr == "both" ] && m="-C $((TOS2)) -c" + [ $ovr == "diff" ] && m="-C $((TOS )) -c" + + $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & + BG=$! + sleep 0.05 + + $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234 + check_result $? 0 "$MATCH $prot $ovr - pass" + + while [ -d /proc/$BG ]; do + $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234 + done + + tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $TOS2" >> /dev/null + check_result $? 0 "$MATCH $prot $ovr - packet data" + rm $TMPF + + [ $ovr == "both" ] && m="-C $((TOS )) -c" + [ $ovr == "diff" ] && m="-C $((TOS2)) -c" + + # Match prohibit rule: expect failure + $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS)) -s $TGT 1234 + check_result $? 1 "$MATCH $prot $ovr - rejection" + + # Match prohibit rule: IPv4 masks ECN: expect failure + if [[ "$IPVER" == "-4" ]]; then + $NSEXE ./cmsg_sender $IPVER -p $p $m "$((TOS | ECN))" -s $TGT 1234 + check_result $? 1 "$MATCH $prot $ovr - rejection (ECN)" + fi + done + done +} + +test_dscp -4 $TGT4 tos +test_dscp -6 $TGT6 class + +# IP_TTL + IPV6_HOPLIMIT +test_ttl_hoplimit() { + local -r IPVER=$1 + local -r TGT=$2 + local -r MATCH=$3 + + local -r LIM=4 + + for ovr in setsock cmsg both diff; do + for p in u U i r; do + [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-L" + [ $ovr == "cmsg" ] && m="-l" + [ $ovr == "both" ] && m="-L $LIM -l" + [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l" + + $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & + BG=$! + sleep 0.05 + + $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234 + check_result $? 0 "$MATCH $prot $ovr - pass" + + while [ -d /proc/$BG ]; do + $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234 + done + + tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $LIM[^0-9]" >> /dev/null + check_result $? 0 "$MATCH $prot $ovr - packet data" + rm $TMPF + done + done +} + +test_ttl_hoplimit -4 $TGT4 ttl +test_ttl_hoplimit -6 $TGT6 hlim + +# IPV6 exthdr +for p in u U i r; do + # Very basic "does it crash" test + for h in h d r; do + $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234 + check_result $? 0 "ExtHdr $prot $ovr - pass" + done +done + +# Summary +if [ $BAD -ne 0 ]; then + echo "FAIL - $BAD/$TOTAL cases failed" + exit 1 +else + echo "OK" + exit 0 +fi diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh deleted file mode 100755 index 8bc23fb4c82b..000000000000 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ /dev/null @@ -1,154 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -source lib.sh - -IP6=2001:db8:1::1/64 -TGT6=2001:db8:1::2 -TMPF=$(mktemp --suffix ".pcap") - -cleanup() -{ - rm -f $TMPF - cleanup_ns $NS -} - -trap cleanup EXIT - -tcpdump -h | grep immediate-mode >> /dev/null -if [ $? -ne 0 ]; then - echo "SKIP - tcpdump with --immediate-mode option required" - exit $ksft_skip -fi - -# Namespaces -setup_ns NS -NSEXE="ip netns exec $NS" - -$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null - -# Connectivity -ip -netns $NS link add type dummy -ip -netns $NS link set dev dummy0 up -ip -netns $NS addr add $IP6 dev dummy0 - -# Test -BAD=0 -TOTAL=0 - -check_result() { - ((TOTAL++)) - if [ $1 -ne $2 ]; then - echo " Case $3 returned $1, expected $2" - ((BAD++)) - fi -} - -# IPV6_DONTFRAG -for ovr in setsock cmsg both diff; do - for df in 0 1; do - for p in u i r; do - [ $p == "u" ] && prot=UDP - [ $p == "i" ] && prot=ICMP - [ $p == "r" ] && prot=RAW - - [ $ovr == "setsock" ] && m="-F $df" - [ $ovr == "cmsg" ] && m="-f $df" - [ $ovr == "both" ] && m="-F $df -f $df" - [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df" - - $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234 - check_result $? $df "DONTFRAG $prot $ovr" - done - done -done - -# IPV6_TCLASS -TOS=0x10 -TOS2=0x20 - -ip -6 -netns $NS rule add tos $TOS lookup 300 -ip -6 -netns $NS route add table 300 prohibit any - -for ovr in setsock cmsg both diff; do - for p in u i r; do - [ $p == "u" ] && prot=UDP - [ $p == "i" ] && prot=ICMP - [ $p == "r" ] && prot=RAW - - [ $ovr == "setsock" ] && m="-C" - [ $ovr == "cmsg" ] && m="-c" - [ $ovr == "both" ] && m="-C $((TOS2)) -c" - [ $ovr == "diff" ] && m="-C $((TOS )) -c" - - $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & - BG=$! - sleep 0.05 - - $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 - check_result $? 0 "TCLASS $prot $ovr - pass" - - while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 - done - - tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null - check_result $? 0 "TCLASS $prot $ovr - packet data" - rm $TMPF - - [ $ovr == "both" ] && m="-C $((TOS )) -c" - [ $ovr == "diff" ] && m="-C $((TOS2)) -c" - - $NSEXE ./cmsg_sender -6 -p $p $m $((TOS)) -s $TGT6 1234 - check_result $? 1 "TCLASS $prot $ovr - rejection" - done -done - -# IPV6_HOPLIMIT -LIM=4 - -for ovr in setsock cmsg both diff; do - for p in u i r; do - [ $p == "u" ] && prot=UDP - [ $p == "i" ] && prot=ICMP - [ $p == "r" ] && prot=RAW - - [ $ovr == "setsock" ] && m="-L" - [ $ovr == "cmsg" ] && m="-l" - [ $ovr == "both" ] && m="-L $LIM -l" - [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l" - - $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & - BG=$! - sleep 0.05 - - $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 - check_result $? 0 "HOPLIMIT $prot $ovr - pass" - - while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 - done - - tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null - check_result $? 0 "HOPLIMIT $prot $ovr - packet data" - rm $TMPF - done -done - -# IPV6 exthdr -for p in u i r; do - # Very basic "does it crash" test - for h in h d r; do - $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234 - check_result $? 0 "ExtHdr $prot $ovr - pass" - done -done - -# Summary -if [ $BAD -ne 0 ]; then - echo "FAIL - $BAD/$TOTAL cases failed" - exit 1 -else - echo "OK" - exit 0 -fi diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index c79e65581dc3..a825e628aee7 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -33,6 +33,7 @@ enum { ERN_RECVERR, ERN_CMSG_RD, ERN_CMSG_RCV, + ERN_SEND_MORE, }; struct option_cmsg_u32 { @@ -46,6 +47,7 @@ struct options { const char *service; unsigned int size; unsigned int num_pkt; + bool msg_more; struct { unsigned int mark; unsigned int dontfrag; @@ -59,6 +61,7 @@ struct options { unsigned int proto; } sock; struct option_cmsg_u32 mark; + struct option_cmsg_u32 priority; struct { bool ena; unsigned int delay; @@ -71,7 +74,7 @@ struct options { struct option_cmsg_u32 tclass; struct option_cmsg_u32 hlimit; struct option_cmsg_u32 exthdr; - } v6; + } cmsg; } opt = { .size = 13, .num_pkt = 1, @@ -93,21 +96,24 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\t\t-S send() size\n" "\t\t-4/-6 Force IPv4 / IPv6 only\n" "\t\t-p prot Socket protocol\n" - "\t\t (u = UDP (default); i = ICMP; r = RAW)\n" + "\t\t (u = UDP (default); i = ICMP; r = RAW;\n" + "\t\t U = UDP with MSG_MORE)\n" "\n" "\t\t-m val Set SO_MARK with given value\n" "\t\t-M val Set SO_MARK via setsockopt\n" + "\t\t-P val Set SO_PRIORITY via setsockopt\n" + "\t\t-Q val Set SO_PRIORITY via cmsg\n" "\t\t-d val Set SO_TXTIME with given delay (usec)\n" "\t\t-t Enable time stamp reporting\n" "\t\t-f val Set don't fragment via cmsg\n" "\t\t-F val Set don't fragment via setsockopt\n" - "\t\t-c val Set TCLASS via cmsg\n" - "\t\t-C val Set TCLASS via setsockopt\n" - "\t\t-l val Set HOPLIMIT via cmsg\n" - "\t\t-L val Set HOPLIMIT via setsockopt\n" + "\t\t-c val Set TOS/TCLASS via cmsg\n" + "\t\t-C val Set TOS/TCLASS via setsockopt\n" + "\t\t-l val Set TTL/HOPLIMIT via cmsg\n" + "\t\t-L val Set TTL/HOPLIMIT via setsockopt\n" "\t\t-H type Add an IPv6 header option\n" - "\t\t (h = HOP; d = DST; r = RTDST)" - ""); + "\t\t (h = HOP; d = DST; r = RTDST)\n" + "\n"); exit(ERN_HELP); } @@ -115,7 +121,7 @@ static void cs_parse_args(int argc, char *argv[]) { int o; - while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:Q:")) != -1) { switch (o) { case 's': opt.silent_send = true; @@ -130,8 +136,11 @@ static void cs_parse_args(int argc, char *argv[]) opt.sock.family = AF_INET6; break; case 'p': - if (*optarg == 'u' || *optarg == 'U') { + if (*optarg == 'u') { opt.sock.proto = IPPROTO_UDP; + } else if (*optarg == 'U') { + opt.sock.proto = IPPROTO_UDP; + opt.msg_more = true; } else if (*optarg == 'i' || *optarg == 'I') { opt.sock.proto = IPPROTO_ICMP; } else if (*optarg == 'r') { @@ -148,6 +157,10 @@ static void cs_parse_args(int argc, char *argv[]) opt.mark.ena = true; opt.mark.val = atoi(optarg); break; + case 'Q': + opt.priority.ena = true; + opt.priority.val = atoi(optarg); + break; case 'M': opt.sockopt.mark = atoi(optarg); break; @@ -162,37 +175,37 @@ static void cs_parse_args(int argc, char *argv[]) opt.ts.ena = true; break; case 'f': - opt.v6.dontfrag.ena = true; - opt.v6.dontfrag.val = atoi(optarg); + opt.cmsg.dontfrag.ena = true; + opt.cmsg.dontfrag.val = atoi(optarg); break; case 'F': opt.sockopt.dontfrag = atoi(optarg); break; case 'c': - opt.v6.tclass.ena = true; - opt.v6.tclass.val = atoi(optarg); + opt.cmsg.tclass.ena = true; + opt.cmsg.tclass.val = atoi(optarg); break; case 'C': opt.sockopt.tclass = atoi(optarg); break; case 'l': - opt.v6.hlimit.ena = true; - opt.v6.hlimit.val = atoi(optarg); + opt.cmsg.hlimit.ena = true; + opt.cmsg.hlimit.val = atoi(optarg); break; case 'L': opt.sockopt.hlimit = atoi(optarg); break; case 'H': - opt.v6.exthdr.ena = true; + opt.cmsg.exthdr.ena = true; switch (optarg[0]) { case 'h': - opt.v6.exthdr.val = IPV6_HOPOPTS; + opt.cmsg.exthdr.val = IPV6_HOPOPTS; break; case 'd': - opt.v6.exthdr.val = IPV6_DSTOPTS; + opt.cmsg.exthdr.val = IPV6_DSTOPTS; break; case 'r': - opt.v6.exthdr.val = IPV6_RTHDRDSTOPTS; + opt.cmsg.exthdr.val = IPV6_RTHDRDSTOPTS; break; default: printf("Error: hdr type: %s\n", optarg); @@ -253,22 +266,25 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_SOCKET, SO_MARK, &opt.mark); ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, - SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag); - ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, - SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass); - ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, - SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit); + SOL_SOCKET, SO_PRIORITY, &opt.priority); + + if (opt.sock.family == AF_INET) { + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IP, IP_TOS, &opt.cmsg.tclass); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IP, IP_TTL, &opt.cmsg.hlimit); + } else { + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_DONTFRAG, &opt.cmsg.dontfrag); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_TCLASS, &opt.cmsg.tclass); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_HOPLIMIT, &opt.cmsg.hlimit); + } if (opt.txtime.ena) { - struct sock_txtime so_txtime = { - .clockid = CLOCK_MONOTONIC, - }; __u64 txtime; - if (setsockopt(fd, SOL_SOCKET, SO_TXTIME, - &so_txtime, sizeof(so_txtime))) - error(ERN_SOCKOPT, errno, "setsockopt TXTIME"); - txtime = time_start_mono.tv_sec * (1000ULL * 1000 * 1000) + time_start_mono.tv_nsec + opt.txtime.delay * 1000; @@ -284,13 +300,6 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) memcpy(CMSG_DATA(cmsg), &txtime, sizeof(txtime)); } if (opt.ts.ena) { - __u32 val = SOF_TIMESTAMPING_SOFTWARE | - SOF_TIMESTAMPING_OPT_TSONLY; - - if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, - &val, sizeof(val))) - error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING"); - cmsg = (struct cmsghdr *)(cbuf + cmsg_len); cmsg_len += CMSG_SPACE(sizeof(__u32)); if (cbuf_sz < cmsg_len) @@ -302,14 +311,14 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) *(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE; } - if (opt.v6.exthdr.ena) { + if (opt.cmsg.exthdr.ena) { cmsg = (struct cmsghdr *)(cbuf + cmsg_len); cmsg_len += CMSG_SPACE(8); if (cbuf_sz < cmsg_len) error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small"); cmsg->cmsg_level = SOL_IPV6; - cmsg->cmsg_type = opt.v6.exthdr.val; + cmsg->cmsg_type = opt.cmsg.exthdr.val; cmsg->cmsg_len = CMSG_LEN(8); *(__u64 *)CMSG_DATA(cmsg) = 0; } @@ -333,16 +342,17 @@ static const char *cs_ts_info2str(unsigned int info) return "unknown"; } -static void +static unsigned long cs_read_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) { struct sock_extended_err *see; struct scm_timestamping *ts; + unsigned long ts_seen = 0; struct cmsghdr *cmsg; int i, err; if (!opt.ts.ena) - return; + return 0; msg->msg_control = cbuf; msg->msg_controllen = cbuf_sz; @@ -396,8 +406,11 @@ cs_read_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) printf(" %5s ts%d %lluus\n", cs_ts_info2str(see->ee_info), i, rel_time); + ts_seen |= 1 << see->ee_info; } } + + return ts_seen; } static void ca_set_sockopts(int fd) @@ -406,22 +419,52 @@ static void ca_set_sockopts(int fd) setsockopt(fd, SOL_SOCKET, SO_MARK, &opt.sockopt.mark, sizeof(opt.sockopt.mark))) error(ERN_SOCKOPT, errno, "setsockopt SO_MARK"); - if (opt.sockopt.dontfrag && - setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG, - &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag))) - error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG"); - if (opt.sockopt.tclass && - setsockopt(fd, SOL_IPV6, IPV6_TCLASS, - &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) - error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS"); - if (opt.sockopt.hlimit && - setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, - &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) - error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); if (opt.sockopt.priority && setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &opt.sockopt.priority, sizeof(opt.sockopt.priority))) error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY"); + + if (opt.sock.family == AF_INET) { + if (opt.sockopt.tclass && + setsockopt(fd, SOL_IP, IP_TOS, + &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) + error(ERN_SOCKOPT, errno, "setsockopt IP_TOS"); + if (opt.sockopt.hlimit && + setsockopt(fd, SOL_IP, IP_TTL, + &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) + error(ERN_SOCKOPT, errno, "setsockopt IP_TTL"); + } else { + if (opt.sockopt.dontfrag && + setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG, + &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG"); + if (opt.sockopt.tclass && + setsockopt(fd, SOL_IPV6, IPV6_TCLASS, + &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS"); + if (opt.sockopt.hlimit && + setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, + &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); + } + + if (opt.txtime.ena) { + struct sock_txtime so_txtime = { + .clockid = CLOCK_MONOTONIC, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_TXTIME, + &so_txtime, sizeof(so_txtime))) + error(ERN_SOCKOPT, errno, "setsockopt TXTIME"); + } + if (opt.ts.ena) { + __u32 val = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_TSONLY; + + if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, + &val, sizeof(val))) + error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING"); + } } int main(int argc, char *argv[]) @@ -494,7 +537,7 @@ int main(int argc, char *argv[]) cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf)); for (i = 0; i < opt.num_pkt; i++) { - err = sendmsg(fd, &msg, 0); + err = sendmsg(fd, &msg, opt.msg_more ? MSG_MORE : 0); if (err < 0) { if (!opt.silent_send) fprintf(stderr, "send failed: %s\n", strerror(errno)); @@ -505,14 +548,28 @@ int main(int argc, char *argv[]) err = ERN_SEND_SHORT; goto err_out; } + if (opt.msg_more) { + err = write(fd, NULL, 0); + if (err < 0) { + fprintf(stderr, "send more: %s\n", strerror(errno)); + err = ERN_SEND_MORE; + goto err_out; + } + } } err = ERN_SUCCESS; if (opt.ts.ena) { - /* Make sure all timestamps have time to loop back */ - usleep(opt.txtime.delay); + unsigned long seen; + int i; - cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + /* Make sure all timestamps have time to loop back */ + for (i = 0; i < 40; i++) { + seen = cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + if (seen & (1 << SCM_TSTAMP_SND)) + break; + usleep(opt.txtime.delay / 20); + } } err_out: diff --git a/tools/testing/selftests/net/cmsg_so_priority.sh b/tools/testing/selftests/net/cmsg_so_priority.sh new file mode 100755 index 000000000000..ee07d8653262 --- /dev/null +++ b/tools/testing/selftests/net/cmsg_so_priority.sh @@ -0,0 +1,151 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +readonly KSFT_SKIP=4 + +IP4=192.0.2.1/24 +TGT4=192.0.2.2 +TGT4_RAW=192.0.2.3 +IP6=2001:db8::1/64 +TGT6=2001:db8::2 +TGT6_RAW=2001:db8::3 +PORT=1234 +TOTAL_TESTS=0 +FAILED_TESTS=0 + +if ! command -v jq &> /dev/null; then + echo "SKIP cmsg_so_priroity.sh test: jq is not installed." >&2 + exit "$KSFT_SKIP" +fi + +check_result() { + ((TOTAL_TESTS++)) + if [ "$1" -ne 0 ]; then + ((FAILED_TESTS++)) + fi +} + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +setup_ns NS + +create_filter() { + local handle=$1 + local vlan_prio=$2 + local ip_type=$3 + local proto=$4 + local dst_ip=$5 + local ip_proto + + if [[ "$proto" == "u" ]]; then + ip_proto="udp" + elif [[ "$ip_type" == "ipv4" && "$proto" == "i" ]]; then + ip_proto="icmp" + elif [[ "$ip_type" == "ipv6" && "$proto" == "i" ]]; then + ip_proto="icmpv6" + fi + + tc -n $NS filter add dev dummy1 \ + egress pref 1 handle "$handle" proto 802.1q \ + flower vlan_prio "$vlan_prio" vlan_ethtype "$ip_type" \ + dst_ip "$dst_ip" ${ip_proto:+ip_proto $ip_proto} \ + action pass +} + +ip -n $NS link set dev lo up +ip -n $NS link add name dummy1 up type dummy + +ip -n $NS link add link dummy1 name dummy1.10 up type vlan id 10 \ + egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +ip -n $NS address add $IP4 dev dummy1.10 +ip -n $NS address add $IP6 dev dummy1.10 nodad + +ip netns exec $NS sysctl -wq net.ipv4.ping_group_range='0 2147483647' + +ip -n $NS neigh add $TGT4 lladdr 00:11:22:33:44:55 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT6 lladdr 00:11:22:33:44:55 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT4_RAW lladdr 00:11:22:33:44:66 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT6_RAW lladdr 00:11:22:33:44:66 nud permanent \ + dev dummy1.10 + +tc -n $NS qdisc add dev dummy1 clsact + +FILTER_COUNTER=10 + +for i in 4 6; do + for proto in u i r; do + echo "Test IPV$i, prot: $proto" + for priority in {0..7}; do + if [[ $i == 4 && $proto == "r" ]]; then + TGT=$TGT4_RAW + elif [[ $i == 6 && $proto == "r" ]]; then + TGT=$TGT6_RAW + elif [ $i == 4 ]; then + TGT=$TGT4 + else + TGT=$TGT6 + fi + + handle="${FILTER_COUNTER}${priority}" + + create_filter $handle $priority ipv$i $proto $TGT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + + if [[ $pkts == 0 ]]; then + check_result 0 + else + echo "prio $priority: expected 0, got $pkts" + check_result 1 + fi + + ip netns exec $NS ./cmsg_sender -$i -Q $priority \ + -p $proto $TGT $PORT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + if [[ $pkts == 1 ]]; then + check_result 0 + else + echo "prio $priority -Q: expected 1, got $pkts" + check_result 1 + fi + + ip netns exec $NS ./cmsg_sender -$i -P $priority \ + -p $proto $TGT $PORT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + if [[ $pkts == 2 ]]; then + check_result 0 + else + echo "prio $priority -P: expected 2, got $pkts" + check_result 1 + fi + done + FILTER_COUNTER=$((FILTER_COUNTER + 10)) + done +done + +if [ $FAILED_TESTS -ne 0 ]; then + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" + exit 1 +else + echo "OK - All $TOTAL_TESTS tests passed" + exit 0 +fi diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh index af85267ad1e3..478af0aefa97 100755 --- a/tools/testing/selftests/net/cmsg_time.sh +++ b/tools/testing/selftests/net/cmsg_time.sh @@ -34,13 +34,28 @@ BAD=0 TOTAL=0 check_result() { + local ret=$1 + local got=$2 + local exp=$3 + local case=$4 + local xfail=$5 + local xf= + local inc= + + if [ "$xfail" == "xfail" ]; then + xf="(XFAIL)" + inc=0 + else + inc=1 + fi + ((TOTAL++)) - if [ $1 -ne 0 ]; then - echo " Case $4 returned $1, expected 0" - ((BAD++)) + if [ $ret -ne 0 ]; then + echo " Case $case returned $ret, expected 0 $xf" + ((BAD+=inc)) elif [ "$2" != "$3" ]; then - echo " Case $4 returned '$2', expected '$3'" - ((BAD++)) + echo " Case $case returned '$got', expected '$exp' $xf" + ((BAD+=inc)) fi } @@ -66,11 +81,14 @@ for i in "-4 $TGT4" "-6 $TGT6"; do awk '/SND/ { if ($3 > 1000) print "OK"; }') check_result $? "$ts" "OK" "$prot - TXTIME abs" + [ "$KSFT_MACHINE_SLOW" = yes ] && xfail=xfail + ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 | awk '/SND/ {snd=$3} /SCHED/ {sch=$3} - END { if (snd - sch > 500) print "OK"; }') - check_result $? "$ts" "OK" "$prot - TXTIME rel" + END { if (snd - sch > 500) print "OK"; + else print snd, "-", sch, "<", 500; }') + check_result $? "$ts" "OK" "$prot - TXTIME rel" $xfail done done diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 5e4390cac17e..3cfef5153823 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -18,6 +18,8 @@ CONFIG_DUMMY=y CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_BTF_MODULES=n CONFIG_VLAN_8021Q=y CONFIG_GENEVE=m CONFIG_IFB=y @@ -26,12 +28,11 @@ CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m +CONFIG_IPV6_MROUTE=y CONFIG_IPV6_SIT=y -CONFIG_IP_DCCP=m CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m @@ -74,7 +75,12 @@ CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y CONFIG_NET_SCH_PRIO=m CONFIG_NFT_COMPAT=m +CONFIG_NF_CONNTRACK_OVS=y CONFIG_NF_FLOW_TABLE=m +CONFIG_OPENVSWITCH=m +CONFIG_OPENVSWITCH_GENEVE=m +CONFIG_OPENVSWITCH_GRE=m +CONFIG_OPENVSWITCH_VXLAN=m CONFIG_PSAMPLE=m CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m @@ -100,3 +106,13 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IPVLAN=m +CONFIG_CAN=m +CONFIG_CAN_DEV=m +CONFIG_CAN_VXCAN=m +CONFIG_NETKIT=y +CONFIG_NET_PKTGEN=m +CONFIG_IPV6_ILA=m +CONFIG_IPV6_RPL_LWTUNNEL=y diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py deleted file mode 100755 index 2d84c7a0be6b..000000000000 --- a/tools/testing/selftests/net/devlink_port_split.py +++ /dev/null @@ -1,309 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 - -from subprocess import PIPE, Popen -import json -import time -import argparse -import collections -import sys - -# -# Test port split configuration using devlink-port lanes attribute. -# The test is skipped in case the attribute is not available. -# -# First, check that all the ports with 1 lane fail to split. -# Second, check that all the ports with more than 1 lane can be split -# to all valid configurations (e.g., split to 2, split to 4 etc.) -# - - -# Kselftest framework requirement - SKIP code is 4 -KSFT_SKIP=4 -Port = collections.namedtuple('Port', 'bus_info name') - - -def run_command(cmd, should_fail=False): - """ - Run a command in subprocess. - Return: Tuple of (stdout, stderr). - """ - - p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) - stdout, stderr = p.communicate() - stdout, stderr = stdout.decode(), stderr.decode() - - if stderr != "" and not should_fail: - print("Error sending command: %s" % cmd) - print(stdout) - print(stderr) - return stdout, stderr - - -class devlink_ports(object): - """ - Class that holds information on the devlink ports, required to the tests; - if_names: A list of interfaces in the devlink ports. - """ - - def get_if_names(dev): - """ - Get a list of physical devlink ports. - Return: Array of tuples (bus_info/port, if_name). - """ - - arr = [] - - cmd = "devlink -j port show" - stdout, stderr = run_command(cmd) - assert stderr == "" - ports = json.loads(stdout)['port'] - - validate_devlink_output(ports, 'flavour') - - for port in ports: - if dev in port: - if ports[port]['flavour'] == 'physical': - arr.append(Port(bus_info=port, name=ports[port]['netdev'])) - - return arr - - def __init__(self, dev): - self.if_names = devlink_ports.get_if_names(dev) - - -def get_max_lanes(port): - """ - Get the $port's maximum number of lanes. - Return: number of lanes, e.g. 1, 2, 4 and 8. - """ - - cmd = "devlink -j port show %s" % port - stdout, stderr = run_command(cmd) - assert stderr == "" - values = list(json.loads(stdout)['port'].values())[0] - - if 'lanes' in values: - lanes = values['lanes'] - else: - lanes = 0 - return lanes - - -def get_split_ability(port): - """ - Get the $port split ability. - Return: split ability, true or false. - """ - - cmd = "devlink -j port show %s" % port.name - stdout, stderr = run_command(cmd) - assert stderr == "" - values = list(json.loads(stdout)['port'].values())[0] - - return values['splittable'] - - -def split(k, port, should_fail=False): - """ - Split $port into $k ports. - If should_fail == True, the split should fail. Otherwise, should pass. - Return: Array of sub ports after splitting. - If the $port wasn't split, the array will be empty. - """ - - cmd = "devlink port split %s count %s" % (port.bus_info, k) - stdout, stderr = run_command(cmd, should_fail=should_fail) - - if should_fail: - if not test(stderr != "", "%s is unsplittable" % port.name): - print("split an unsplittable port %s" % port.name) - return create_split_group(port, k) - else: - if stderr == "": - return create_split_group(port, k) - print("didn't split a splittable port %s" % port.name) - - return [] - - -def unsplit(port): - """ - Unsplit $port. - """ - - cmd = "devlink port unsplit %s" % port - stdout, stderr = run_command(cmd) - test(stderr == "", "Unsplit port %s" % port) - - -def exists(port, dev): - """ - Check if $port exists in the devlink ports. - Return: True is so, False otherwise. - """ - - return any(dev_port.name == port - for dev_port in devlink_ports.get_if_names(dev)) - - -def exists_and_lanes(ports, lanes, dev): - """ - Check if every port in the list $ports exists in the devlink ports and has - $lanes number of lanes after splitting. - Return: True if both are True, False otherwise. - """ - - for port in ports: - max_lanes = get_max_lanes(port) - if not exists(port, dev): - print("port %s doesn't exist in devlink ports" % port) - return False - if max_lanes != lanes: - print("port %s has %d lanes, but %s were expected" - % (port, lanes, max_lanes)) - return False - return True - - -def test(cond, msg): - """ - Check $cond and print a message accordingly. - Return: True is pass, False otherwise. - """ - - if cond: - print("TEST: %-60s [ OK ]" % msg) - else: - print("TEST: %-60s [FAIL]" % msg) - - return cond - - -def create_split_group(port, k): - """ - Create the split group for $port. - Return: Array with $k elements, which are the split port group. - """ - - return list(port.name + "s" + str(i) for i in range(k)) - - -def split_unsplittable_port(port, k): - """ - Test that splitting of unsplittable port fails. - """ - - # split to max - new_split_group = split(k, port, should_fail=True) - - if new_split_group != []: - unsplit(port.bus_info) - - -def split_splittable_port(port, k, lanes, dev): - """ - Test that splitting of splittable port passes correctly. - """ - - new_split_group = split(k, port) - - # Once the split command ends, it takes some time to the sub ifaces' - # to get their names. Use udevadm to continue only when all current udev - # events are handled. - cmd = "udevadm settle" - stdout, stderr = run_command(cmd) - assert stderr == "" - - if new_split_group != []: - test(exists_and_lanes(new_split_group, lanes/k, dev), - "split port %s into %s" % (port.name, k)) - - unsplit(port.bus_info) - - -def validate_devlink_output(devlink_data, target_property=None): - """ - Determine if test should be skipped by checking: - 1. devlink_data contains values - 2. The target_property exist in devlink_data - """ - skip_reason = None - if any(devlink_data.values()): - if target_property: - skip_reason = "{} not found in devlink output, test skipped".format(target_property) - for key in devlink_data: - if target_property in devlink_data[key]: - skip_reason = None - else: - skip_reason = 'devlink output is empty, test skipped' - - if skip_reason: - print(skip_reason) - sys.exit(KSFT_SKIP) - - -def make_parser(): - parser = argparse.ArgumentParser(description='A test for port splitting.') - parser.add_argument('--dev', - help='The devlink handle of the device under test. ' + - 'The default is the first registered devlink ' + - 'handle.') - - return parser - - -def main(cmdline=None): - parser = make_parser() - args = parser.parse_args(cmdline) - - dev = args.dev - if not dev: - cmd = "devlink -j dev show" - stdout, stderr = run_command(cmd) - assert stderr == "" - - validate_devlink_output(json.loads(stdout)) - devs = json.loads(stdout)['dev'] - dev = list(devs.keys())[0] - - cmd = "devlink dev show %s" % dev - stdout, stderr = run_command(cmd) - if stderr != "": - print("devlink device %s can not be found" % dev) - sys.exit(1) - - ports = devlink_ports(dev) - - found_max_lanes = False - for port in ports.if_names: - max_lanes = get_max_lanes(port.name) - - # If max lanes is 0, do not test port splitting at all - if max_lanes == 0: - continue - - # If 1 lane, shouldn't be able to split - elif max_lanes == 1: - test(not get_split_ability(port), - "%s should not be able to split" % port.name) - split_unsplittable_port(port, max_lanes) - - # Else, splitting should pass and all the split ports should exist. - else: - lane = max_lanes - test(get_split_ability(port), - "%s should be able to split" % port.name) - while lane > 1: - split_splittable_port(port, lane, max_lanes, dev) - - lane //= 2 - found_max_lanes = True - - if not found_max_lanes: - print(f"Test not started, no port of device {dev} reports max_lanes") - sys.exit(KSFT_SKIP) - - -if __name__ == "__main__": - main() diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh index 7c4818c971fc..507d0a82f5f0 100755 --- a/tools/testing/selftests/net/drop_monitor_tests.sh +++ b/tools/testing/selftests/net/drop_monitor_tests.sh @@ -77,7 +77,7 @@ sw_drops_test() rm ${dir}/packets.pcap - { kill %% && wait %%; } 2>/dev/null + kill_process %% timeout 5 dwdump -o sw -w ${dir}/packets.pcap (( $(tshark -r ${dir}/packets.pcap \ -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0)) diff --git a/tools/testing/selftests/net/epoll_busy_poll.c b/tools/testing/selftests/net/epoll_busy_poll.c new file mode 100644 index 000000000000..16e457c2f877 --- /dev/null +++ b/tools/testing/selftests/net/epoll_busy_poll.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* Basic per-epoll context busy poll test. + * + * Only tests the ioctls, but should be expanded to test two connected hosts in + * the future + */ + +#define _GNU_SOURCE + +#include <error.h> +#include <errno.h> +#include <inttypes.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/capability.h> + +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/socket.h> + +#include "../kselftest_harness.h" + +/* if the headers haven't been updated, we need to define some things */ +#if !defined(EPOLL_IOC_TYPE) +struct epoll_params { + uint32_t busy_poll_usecs; + uint16_t busy_poll_budget; + uint8_t prefer_busy_poll; + + /* pad the struct to a multiple of 64bits */ + uint8_t __pad; +}; + +#define EPOLL_IOC_TYPE 0x8A +#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) +#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) +#endif + +FIXTURE(invalid_fd) +{ + int invalid_fd; + struct epoll_params params; +}; + +FIXTURE_SETUP(invalid_fd) +{ + int ret; + + ret = socket(AF_UNIX, SOCK_DGRAM, 0); + EXPECT_NE(-1, ret) + TH_LOG("error creating unix socket"); + + self->invalid_fd = ret; +} + +FIXTURE_TEARDOWN(invalid_fd) +{ + int ret; + + ret = close(self->invalid_fd); + EXPECT_EQ(0, ret); +} + +TEST_F(invalid_fd, test_invalid_fd) +{ + int ret; + + ret = ioctl(self->invalid_fd, EPIOCGPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCGPARAMS on invalid epoll FD should error"); + + EXPECT_EQ(ENOTTY, errno) + TH_LOG("EPIOCGPARAMS on invalid epoll FD should set errno to ENOTTY"); + + memset(&self->params, 0, sizeof(struct epoll_params)); + + ret = ioctl(self->invalid_fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS on invalid epoll FD should error"); + + EXPECT_EQ(ENOTTY, errno) + TH_LOG("EPIOCSPARAMS on invalid epoll FD should set errno to ENOTTY"); +} + +FIXTURE(epoll_busy_poll) +{ + int fd; + struct epoll_params params; + struct epoll_params *invalid_params; + cap_t caps; +}; + +FIXTURE_SETUP(epoll_busy_poll) +{ + int ret; + + ret = epoll_create1(0); + EXPECT_NE(-1, ret) + TH_LOG("epoll_create1 failed?"); + + self->fd = ret; + + self->caps = cap_get_proc(); + EXPECT_NE(NULL, self->caps); +} + +FIXTURE_TEARDOWN(epoll_busy_poll) +{ + int ret; + + ret = close(self->fd); + EXPECT_EQ(0, ret); + + ret = cap_free(self->caps); + EXPECT_NE(-1, ret) + TH_LOG("unable to free capabilities"); +} + +TEST_F(epoll_busy_poll, test_get_params) +{ + /* begin by getting the epoll params from the kernel + * + * the default should be default and all fields should be zero'd by the + * kernel, so set params fields to garbage to test this. + */ + int ret = 0; + + self->params.busy_poll_usecs = 0xff; + self->params.busy_poll_budget = 0xff; + self->params.prefer_busy_poll = 1; + self->params.__pad = 0xf; + + ret = ioctl(self->fd, EPIOCGPARAMS, &self->params); + EXPECT_EQ(0, ret) + TH_LOG("ioctl EPIOCGPARAMS should succeed"); + + EXPECT_EQ(0, self->params.busy_poll_usecs) + TH_LOG("EPIOCGPARAMS busy_poll_usecs should have been 0"); + + EXPECT_EQ(0, self->params.busy_poll_budget) + TH_LOG("EPIOCGPARAMS busy_poll_budget should have been 0"); + + EXPECT_EQ(0, self->params.prefer_busy_poll) + TH_LOG("EPIOCGPARAMS prefer_busy_poll should have been 0"); + + EXPECT_EQ(0, self->params.__pad) + TH_LOG("EPIOCGPARAMS __pad should have been 0"); + + self->invalid_params = (struct epoll_params *)0xdeadbeef; + ret = ioctl(self->fd, EPIOCGPARAMS, self->invalid_params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCGPARAMS should error with invalid params"); + + EXPECT_EQ(EFAULT, errno) + TH_LOG("EPIOCGPARAMS with invalid params should set errno to EFAULT"); +} + +TEST_F(epoll_busy_poll, test_set_invalid) +{ + int ret; + + memset(&self->params, 0, sizeof(struct epoll_params)); + + self->params.__pad = 1; + + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS non-zero __pad should error"); + + EXPECT_EQ(EINVAL, errno) + TH_LOG("EPIOCSPARAMS non-zero __pad errno should be EINVAL"); + + self->params.__pad = 0; + self->params.busy_poll_usecs = (uint32_t)INT_MAX + 1; + + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS should error busy_poll_usecs > S32_MAX"); + + EXPECT_EQ(EINVAL, errno) + TH_LOG("EPIOCSPARAMS busy_poll_usecs > S32_MAX errno should be EINVAL"); + + self->params.__pad = 0; + self->params.busy_poll_usecs = 32; + self->params.prefer_busy_poll = 2; + + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS should error prefer_busy_poll > 1"); + + EXPECT_EQ(EINVAL, errno) + TH_LOG("EPIOCSPARAMS prefer_busy_poll > 1 errno should be EINVAL"); + + self->params.__pad = 0; + self->params.busy_poll_usecs = 32; + self->params.prefer_busy_poll = 1; + + /* set budget well above kernel's NAPI_POLL_WEIGHT of 64 */ + self->params.busy_poll_budget = UINT16_MAX; + + /* test harness should run with CAP_NET_ADMIN, but let's make sure */ + cap_flag_value_t tmp; + + ret = cap_get_flag(self->caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &tmp); + EXPECT_EQ(0, ret) + TH_LOG("unable to get CAP_NET_ADMIN cap flag"); + + EXPECT_EQ(CAP_SET, tmp) + TH_LOG("expecting CAP_NET_ADMIN to be set for the test harness"); + + /* at this point we know CAP_NET_ADMIN is available, so setting the + * params with a busy_poll_budget > NAPI_POLL_WEIGHT should succeed + */ + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(0, ret) + TH_LOG("EPIOCSPARAMS should allow busy_poll_budget > NAPI_POLL_WEIGHT"); + + /* remove CAP_NET_ADMIN from our effective set */ + cap_value_t net_admin[] = { CAP_NET_ADMIN }; + + ret = cap_set_flag(self->caps, CAP_EFFECTIVE, 1, net_admin, CAP_CLEAR); + EXPECT_EQ(0, ret) + TH_LOG("couldn't clear CAP_NET_ADMIN"); + + ret = cap_set_proc(self->caps); + EXPECT_EQ(0, ret) + TH_LOG("cap_set_proc should drop CAP_NET_ADMIN"); + + /* this is now expected to fail */ + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS should error busy_poll_budget > NAPI_POLL_WEIGHT"); + + EXPECT_EQ(EPERM, errno) + TH_LOG("EPIOCSPARAMS errno should be EPERM busy_poll_budget > NAPI_POLL_WEIGHT"); + + /* restore CAP_NET_ADMIN to our effective set */ + ret = cap_set_flag(self->caps, CAP_EFFECTIVE, 1, net_admin, CAP_SET); + EXPECT_EQ(0, ret) + TH_LOG("couldn't restore CAP_NET_ADMIN"); + + ret = cap_set_proc(self->caps); + EXPECT_EQ(0, ret) + TH_LOG("cap_set_proc should set CAP_NET_ADMIN"); + + self->invalid_params = (struct epoll_params *)0xdeadbeef; + ret = ioctl(self->fd, EPIOCSPARAMS, self->invalid_params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS should error when epoll_params is invalid"); + + EXPECT_EQ(EFAULT, errno) + TH_LOG("EPIOCSPARAMS should set errno to EFAULT when epoll_params is invalid"); +} + +TEST_F(epoll_busy_poll, test_set_and_get_valid) +{ + int ret; + + memset(&self->params, 0, sizeof(struct epoll_params)); + + self->params.busy_poll_usecs = 25; + self->params.busy_poll_budget = 16; + self->params.prefer_busy_poll = 1; + + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(0, ret) + TH_LOG("EPIOCSPARAMS with valid params should not error"); + + /* check that the kernel returns the same values back */ + + memset(&self->params, 0, sizeof(struct epoll_params)); + + ret = ioctl(self->fd, EPIOCGPARAMS, &self->params); + + EXPECT_EQ(0, ret) + TH_LOG("EPIOCGPARAMS should not error"); + + EXPECT_EQ(25, self->params.busy_poll_usecs) + TH_LOG("params.busy_poll_usecs incorrect"); + + EXPECT_EQ(16, self->params.busy_poll_budget) + TH_LOG("params.busy_poll_budget incorrect"); + + EXPECT_EQ(1, self->params.prefer_busy_poll) + TH_LOG("params.prefer_busy_poll incorrect"); + + EXPECT_EQ(0, self->params.__pad) + TH_LOG("params.__pad was not 0"); +} + +TEST_F(epoll_busy_poll, test_invalid_ioctl) +{ + int invalid_ioctl = EPIOCGPARAMS + 10; + int ret; + + ret = ioctl(self->fd, invalid_ioctl, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("invalid ioctl should return error"); + + EXPECT_EQ(EINVAL, errno) + TH_LOG("invalid ioctl should set errno to EINVAL"); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 386ebd829df5..4fcc38907e48 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -3667,7 +3667,7 @@ ipv6_addr_bind_novrf() # when it really should not a=${NSA_LO_IP6} log_start - show_hint "Tecnically should fail since address is not on device but kernel allows" + show_hint "Technically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address" } @@ -3724,7 +3724,7 @@ ipv6_addr_bind_vrf() # passes when it really should not a=${VRF_IP6} log_start - show_hint "Tecnically should fail since address is not on device but kernel allows" + show_hint "Technically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind" @@ -4304,14 +4304,7 @@ elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" fi -# nettest can be run from PATH or from same directory as this selftest -if ! which nettest >/dev/null; then - PATH=$PWD:$PATH - if ! which nettest >/dev/null; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip - fi -fi +check_gen_prog "nettest" declare -i nfail=0 declare -i nsuccess=0 diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh index d5e3abb8658c..9931a1e36e3d 100755 --- a/tools/testing/selftests/net/fdb_flush.sh +++ b/tools/testing/selftests/net/fdb_flush.sh @@ -583,7 +583,7 @@ vxlan_test_flush_by_remote_attributes() $IP link del dev vx10 $IP link add name vx10 type vxlan dstport "$VXPORT" external - # For multicat FDB entries, the VXLAN driver stores a linked list of + # For multicast FDB entries, the VXLAN driver stores a linked list of # remotes for a given key. Verify that only the expected remotes are # flushed. multicast_fdb_entries_add diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh new file mode 100755 index 000000000000..c159230c9b62 --- /dev/null +++ b/tools/testing/selftests/net/fdb_notify.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ALL_TESTS=" + test_dup_bridge + test_dup_vxlan_self + test_dup_vxlan_master + test_dup_macvlan_self + test_dup_macvlan_master +" + +do_test_dup() +{ + local op=$1; shift + local what=$1; shift + local tmpf + + RET=0 + + tmpf=$(mktemp) + defer rm "$tmpf" + + defer_scope_push + bridge monitor fdb &> "$tmpf" & + defer kill_process $! + + sleep 0.5 + bridge fdb "$op" 00:11:22:33:44:55 vlan 1 "$@" + sleep 0.5 + defer_scope_pop + + local count=$(grep -c -e 00:11:22:33:44:55 $tmpf) + ((count == 1)) + check_err $? "Got $count notifications, expected 1" + + log_test "$what $op: Duplicate notifications" +} + +test_dup_bridge() +{ + ip_link_add br up type bridge vlan_filtering 1 + do_test_dup add "bridge" dev br self + do_test_dup del "bridge" dev br self +} + +test_dup_vxlan_self() +{ + ip_link_add br up type bridge vlan_filtering 1 + ip_link_add vx up type vxlan id 2000 dstport 4789 + ip_link_set_master vx br + + do_test_dup add "vxlan" dev vx self dst 192.0.2.1 + do_test_dup del "vxlan" dev vx self dst 192.0.2.1 +} + +test_dup_vxlan_master() +{ + ip_link_add br up type bridge vlan_filtering 1 + ip_link_add vx up type vxlan id 2000 dstport 4789 + ip_link_set_master vx br + + do_test_dup add "vxlan master" dev vx master + do_test_dup del "vxlan master" dev vx master +} + +test_dup_macvlan_self() +{ + ip_link_add dd up type dummy + ip_link_add mv up link dd type macvlan mode passthru + + do_test_dup add "macvlan self" dev mv self + do_test_dup del "macvlan self" dev mv self +} + +test_dup_macvlan_master() +{ + ip_link_add br up type bridge vlan_filtering 1 + ip_link_add dd up type dummy + ip_link_add mv up link dd type macvlan mode passthru + ip_link_set_master mv br + + do_test_dup add "macvlan master" dev mv self + do_test_dup del "macvlan master" dev mv self +} + +cleanup() +{ + defer_scopes_cleanup +} + +trap cleanup EXIT +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index ac0b2c6a5761..b39f748c2572 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -76,9 +76,16 @@ log_test() printf "TEST: %-60s [ OK ]\n" "${msg}" nsuccess=$((nsuccess+1)) else - ret=1 - nfail=$((nfail+1)) - printf "TEST: %-60s [FAIL]\n" "${msg}" + if [[ $rc -eq $ksft_skip ]]; then + [[ $ret -eq 0 ]] && ret=$ksft_skip + nskip=$((nskip+1)) + printf "TEST: %-60s [SKIP]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + fi + if [ "$VERBOSE" = "1" ]; then echo " rc=$rc, expected $expected" fi @@ -736,7 +743,7 @@ ipv6_fcnal() run_cmd "$IP nexthop add id 52 via 2001:db8:92::3" log_test $? 2 "Create nexthop - gw only" - # gw is not reachable throught given dev + # gw is not reachable through given dev run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1" log_test $? 2 "Create nexthop - invalid gw+dev combination" @@ -923,6 +930,29 @@ ipv6_grp_fcnal() ipv6_grp_refs log_test $? 0 "Nexthop group replace refcounts" + + # + # 16-bit weights. + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1" + run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1" + run_cmd "$IP nexthop add id 66 dev veth1" + + run_cmd "$IP nexthop add id 103 group 62,1000" + if [[ $? == 0 ]]; then + local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535" + run_cmd "$IP nexthop replace $GRP" + check_nexthop "id 103" "$GRP" + rc=$? + else + rc=$ksft_skip + fi + + $IP nexthop flush >/dev/null 2>&1 + + log_test $rc 0 "16-bit weights" } ipv6_res_grp_fcnal() @@ -987,6 +1017,31 @@ ipv6_res_grp_fcnal() check_nexthop_bucket "list id 102" \ "id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62" log_test $? 0 "Nexthop buckets updated after replace - nECMP" + + # + # 16-bit weights. + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1" + run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1" + run_cmd "$IP nexthop add id 66 dev veth1" + + run_cmd "$IP nexthop add id 103 group 62,1000 type resilient buckets 32" + if [[ $? == 0 ]]; then + local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535 $(: + )type resilient buckets 32 idle_timer 0 $(: + )unbalanced_timer 0" + run_cmd "$IP nexthop replace $GRP" + check_nexthop "id 103" "$GRP unbalanced_time 0" + rc=$? + else + rc=$ksft_skip + fi + + $IP nexthop flush >/dev/null 2>&1 + + log_test $rc 0 "16-bit weights" } ipv6_fcnal_runtime() @@ -2475,6 +2530,7 @@ done if [ "$TESTS" != "none" ]; then printf "\nTests passed: %3d\n" ${nsuccess} printf "Tests failed: %3d\n" ${nfail} + printf "Tests skipped: %2d\n" ${nskip} fi exit $ret diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 51157a5559b7..5fbdd2a0b537 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -9,6 +9,7 @@ PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} RTABLE=100 RTABLE_PEER=101 +RTABLE_VRF=102 GW_IP4=192.51.100.2 SRC_IP=192.51.100.3 GW_IP6=2001:db8:1::2 @@ -17,7 +18,14 @@ SRC_IP6=2001:db8:1::3 DEV_ADDR=192.51.100.1 DEV_ADDR6=2001:db8:1::1 DEV=dummy0 -TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect" +TESTS=" + fib_rule6 + fib_rule4 + fib_rule6_connect + fib_rule4_connect + fib_rule6_vrf + fib_rule4_vrf +" SELFTEST_PATH="" @@ -29,11 +37,11 @@ log_test() if [ ${rc} -eq ${expected} ]; then nsuccess=$((nsuccess+1)) - printf "\n TEST: %-50s [ OK ]\n" "${msg}" + printf " TEST: %-60s [ OK ]\n" "${msg}" else ret=1 nfail=$((nfail+1)) - printf "\n TEST: %-50s [FAIL]\n" "${msg}" + printf " TEST: %-60s [FAIL]\n" "${msg}" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" @@ -43,39 +51,6 @@ log_test() fi } -log_section() -{ - echo - echo "######################################################################" - echo "TEST SECTION: $*" - echo "######################################################################" -} - -check_nettest() -{ - if which nettest > /dev/null 2>&1; then - return 0 - fi - - # Add the selftest directory to PATH if not already done - if [ "${SELFTEST_PATH}" = "" ]; then - SELFTEST_PATH="$(dirname $0)" - PATH="${PATH}:${SELFTEST_PATH}" - - # Now retry with the new path - if which nettest > /dev/null 2>&1; then - return 0 - fi - - if [ "${ret}" -eq 0 ]; then - ret="${ksft_skip}" - fi - echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')" - fi - - return 1 -} - setup() { set -e @@ -130,6 +105,17 @@ cleanup_peer() ip netns del $peerns } +setup_vrf() +{ + $IP link add name vrf0 up type vrf table $RTABLE_VRF + $IP link set dev $DEV master vrf0 +} + +cleanup_vrf() +{ + $IP link del dev vrf0 +} + fib_check_iproute_support() { ip rule help 2>&1 | grep -q $1 @@ -163,12 +149,17 @@ fib_rule6_test_match_n_redirect() { local match="$1" local getmatch="$2" - local description="$3" + local getnomatch="$3" + local description="$4" + local nomatch_description="$5" $IP -6 rule add $match table $RTABLE $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE" log_test $? 0 "rule6 check: $description" + $IP -6 route get $GW_IP6 $getnomatch 2>&1 | grep -q "table $RTABLE" + log_test $? 1 "rule6 check: $nomatch_description" + fib_rule6_del_by_pref "$match" log_test $? 0 "rule6 del by pref: $description" } @@ -189,18 +180,27 @@ fib_rule6_test_reject() fib_rule6_test() { + local ext_name=$1; shift + local getnomatch local getmatch local match local cnt + echo + echo "IPv6 FIB rule tests $ext_name" + # setup the fib rule redirect route $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink match="oif $DEV" - fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table" + getnomatch="oif lo" + fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "oif redirect to table" "oif no redirect to table" match="from $SRC_IP6 iif $DEV" - fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table" + getnomatch="from $SRC_IP6 iif lo" + fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "iif redirect to table" "iif no redirect to table" # Reject dsfield (tos) options which have ECN bits set for cnt in $(seq 1 3); do @@ -214,38 +214,175 @@ fib_rule6_test() # Using option 'tos' instead of 'dsfield' as old iproute2 # versions don't support 'dsfield' in ip rule show. getmatch="tos $cnt" + getnomatch="tos 0x20" fib_rule6_test_match_n_redirect "$match" "$getmatch" \ - "$getmatch redirect to table" + "$getnomatch" "$getmatch redirect to table" \ + "$getnomatch no redirect to table" + done + + # Re-test TOS matching, but with input routes since they are handled + # differently from output routes. + match="tos 0x10" + for cnt in "0x10" "0x11" "0x12" "0x13"; do + getmatch="tos $cnt" + getnomatch="tos 0x20" + fib_rule6_test_match_n_redirect "$match" \ + "from $SRC_IP6 iif $DEV $getmatch" \ + "from $SRC_IP6 iif $DEV $getnomatch" \ + "iif $getmatch redirect to table" \ + "iif $getnomatch no redirect to table" done match="fwmark 0x64" getmatch="mark 0x64" - fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + getnomatch="mark 0x63" + fib_rule6_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \ + "fwmark redirect to table" "fwmark no redirect to table" fib_check_iproute_support "uidrange" "uid" if [ $? -eq 0 ]; then match="uidrange 100-100" getmatch="uid 100" - fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + getnomatch="uid 101" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "uid redirect to table" \ + "uid no redirect to table" fi fib_check_iproute_support "sport" "sport" if [ $? -eq 0 ]; then match="sport 666 dport 777" - fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + getnomatch="sport 667 dport 778" + fib_rule6_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "sport and dport redirect to table" \ + "sport and dport no redirect to table" + + match="sport 100-200 dport 300-400" + getmatch="sport 100 dport 400" + getnomatch="sport 100 dport 401" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" \ + "sport and dport range redirect to table" \ + "sport and dport range no redirect to table" + fi + + ip rule help 2>&1 | grep sport | grep -q MASK + if [ $? -eq 0 ]; then + match="sport 0x0f00/0xff00 dport 0x000f/0x00ff" + getmatch="sport 0x0f11 dport 0x220f" + getnomatch="sport 0x1f11 dport 0x221f" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "sport and dport masked redirect to table" \ + "sport and dport masked no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto tcp" - fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match" + getnomatch="ipproto udp" + fib_rule6_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto tcp match" "ipproto udp no match" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto ipv6-icmp" - fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match" + getnomatch="ipproto tcp" + fib_rule6_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto ipv6-icmp match" \ + "ipproto ipv6-tcp no match" fi + + fib_check_iproute_support "dscp" "tos" + if [ $? -eq 0 ]; then + match="dscp 0x3f" + getmatch="tos 0xfc" + getnomatch="tos 0xf4" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp redirect to table" \ + "dscp no redirect to table" + + match="dscp 0x3f" + getmatch="from $SRC_IP6 iif $DEV tos 0xfc" + getnomatch="from $SRC_IP6 iif $DEV tos 0xf4" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp redirect to table" \ + "iif dscp no redirect to table" + fi + + ip rule help 2>&1 | grep -q "DSCP\[/MASK\]" + if [ $? -eq 0 ]; then + match="dscp 0x0f/0x0f" + tosmatch=$(printf 0x"%x" $((0x1f << 2))) + tosnomatch=$(printf 0x"%x" $((0x1e << 2))) + getmatch="tos $tosmatch" + getnomatch="tos $tosnomatch" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp masked redirect to table" \ + "dscp masked no redirect to table" + + match="dscp 0x0f/0x0f" + getmatch="from $SRC_IP6 iif $DEV tos $tosmatch" + getnomatch="from $SRC_IP6 iif $DEV tos $tosnomatch" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp masked redirect to table" \ + "iif dscp masked no redirect to table" + fi + + fib_check_iproute_support "flowlabel" "flowlabel" + if [ $? -eq 0 ]; then + match="flowlabel 0xfffff" + getmatch="flowlabel 0xfffff" + getnomatch="flowlabel 0xf" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "flowlabel redirect to table" \ + "flowlabel no redirect to table" + + match="flowlabel 0xfffff" + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif flowlabel redirect to table" \ + "iif flowlabel no redirect to table" + + match="flowlabel 0x08000/0x08000" + getmatch="flowlabel 0xfffff" + getnomatch="flowlabel 0xf7fff" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "flowlabel masked redirect to table" \ + "flowlabel masked no redirect to table" + + match="flowlabel 0x08000/0x08000" + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif flowlabel masked redirect to table" \ + "iif flowlabel masked no redirect to table" + fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP6 iif vrf0" + getmatch="from $SRC_IP6 iif $DEV" + getnomatch="from $SRC_IP6 iif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi +} + +fib_rule6_vrf_test() +{ + setup_vrf + fib_rule6_test "- with VRF" + cleanup_vrf } # Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly @@ -254,10 +391,8 @@ fib_rule6_connect_test() { local dsfield - if ! check_nettest; then - echo "SKIP: Could not run test without nettest tool" - return - fi + echo + echo "IPv6 FIB rule connect tests" setup_peer $IP -6 rule add dsfield 0x04 table $RTABLE_PEER @@ -275,7 +410,45 @@ fib_rule6_connect_test() log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" done + # Check that UDP and TCP connections fail when using a DS Field that + # does not match the previously configured FIB rule. + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \ + -Q 0x20 -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dsfield udp no connect (dsfield 0x20)" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0x20 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dsfield tcp no connect (dsfield 0x20)" + $IP -6 rule del dsfield 0x04 table $RTABLE_PEER + + ip rule help 2>&1 | grep -q dscp + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing dscp match" + cleanup_peer + return + fi + + $IP -6 rule add dscp 0x3f table $RTABLE_PEER + + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xfc \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dscp udp connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xfc \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dscp tcp connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xf4 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dscp udp no connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xf4 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dscp tcp no connect" + + $IP -6 rule del dscp 0x3f table $RTABLE_PEER + cleanup_peer } @@ -295,12 +468,17 @@ fib_rule4_test_match_n_redirect() { local match="$1" local getmatch="$2" - local description="$3" + local getnomatch="$3" + local description="$4" + local nomatch_description="$5" $IP rule add $match table $RTABLE $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE" log_test $? 0 "rule4 check: $description" + $IP route get $GW_IP4 $getnomatch 2>&1 | grep -q "table $RTABLE" + log_test $? 1 "rule4 check: $nomatch_description" + fib_rule4_del_by_pref "$match" log_test $? 0 "rule4 del by pref: $description" } @@ -321,23 +499,28 @@ fib_rule4_test_reject() fib_rule4_test() { + local ext_name=$1; shift + local getnomatch local getmatch local match local cnt + echo + echo "IPv4 FIB rule tests $ext_name" + # setup the fib rule redirect route $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink match="oif $DEV" - fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table" + getnomatch="oif lo" + fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "oif redirect to table" "oif no redirect to table" - # need enable forwarding and disable rp_filter temporarily as all the - # addresses are in the same subnet and egress device == ingress device. ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1 - ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 match="from $SRC_IP iif $DEV" - fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" - ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0 + getnomatch="from $SRC_IP iif lo" + fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "iif redirect to table" "iif no redirect to table" # Reject dsfield (tos) options which have ECN bits set for cnt in $(seq 1 3); do @@ -351,38 +534,145 @@ fib_rule4_test() # Using option 'tos' instead of 'dsfield' as old iproute2 # versions don't support 'dsfield' in ip rule show. getmatch="tos $cnt" + getnomatch="tos 0x20" fib_rule4_test_match_n_redirect "$match" "$getmatch" \ - "$getmatch redirect to table" + "$getnomatch" "$getmatch redirect to table" \ + "$getnomatch no redirect to table" + done + + # Re-test TOS matching, but with input routes since they are handled + # differently from output routes. + match="tos 0x10" + for cnt in "0x10" "0x11" "0x12" "0x13"; do + getmatch="tos $cnt" + getnomatch="tos 0x20" + fib_rule4_test_match_n_redirect "$match" \ + "from $SRC_IP iif $DEV $getmatch" \ + "from $SRC_IP iif $DEV $getnomatch" \ + "iif $getmatch redirect to table" \ + "iif $getnomatch no redirect to table" done match="fwmark 0x64" getmatch="mark 0x64" - fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + getnomatch="mark 0x63" + fib_rule4_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \ + "fwmark redirect to table" "fwmark no redirect to table" fib_check_iproute_support "uidrange" "uid" if [ $? -eq 0 ]; then match="uidrange 100-100" getmatch="uid 100" - fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + getnomatch="uid 101" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "uid redirect to table" \ + "uid no redirect to table" fi fib_check_iproute_support "sport" "sport" if [ $? -eq 0 ]; then match="sport 666 dport 777" - fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + getnomatch="sport 667 dport 778" + fib_rule4_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "sport and dport redirect to table" \ + "sport and dport no redirect to table" + + match="sport 100-200 dport 300-400" + getmatch="sport 100 dport 400" + getnomatch="sport 100 dport 401" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" \ + "sport and dport range redirect to table" \ + "sport and dport range no redirect to table" + fi + + ip rule help 2>&1 | grep sport | grep -q MASK + if [ $? -eq 0 ]; then + match="sport 0x0f00/0xff00 dport 0x000f/0x00ff" + getmatch="sport 0x0f11 dport 0x220f" + getnomatch="sport 0x1f11 dport 0x221f" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "sport and dport masked redirect to table" \ + "sport and dport masked no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto tcp" - fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match" + getnomatch="ipproto udp" + fib_rule4_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto tcp match" \ + "ipproto udp no match" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto icmp" - fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match" + getnomatch="ipproto tcp" + fib_rule4_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto icmp match" \ + "ipproto tcp no match" + fi + + fib_check_iproute_support "dscp" "tos" + if [ $? -eq 0 ]; then + match="dscp 0x3f" + getmatch="tos 0xfc" + getnomatch="tos 0xf4" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp redirect to table" \ + "dscp no redirect to table" + + match="dscp 0x3f" + getmatch="from $SRC_IP iif $DEV tos 0xfc" + getnomatch="from $SRC_IP iif $DEV tos 0xf4" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp redirect to table" \ + "iif dscp no redirect to table" + fi + + ip rule help 2>&1 | grep -q "DSCP\[/MASK\]" + if [ $? -eq 0 ]; then + match="dscp 0x0f/0x0f" + tosmatch=$(printf 0x"%x" $((0x1f << 2))) + tosnomatch=$(printf 0x"%x" $((0x1e << 2))) + getmatch="tos $tosmatch" + getnomatch="tos $tosnomatch" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp masked redirect to table" \ + "dscp masked no redirect to table" + + match="dscp 0x0f/0x0f" + getmatch="from $SRC_IP iif $DEV tos $tosmatch" + getnomatch="from $SRC_IP iif $DEV tos $tosnomatch" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp masked redirect to table" \ + "iif dscp masked no redirect to table" fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP iif vrf0" + getmatch="from $SRC_IP iif $DEV" + getnomatch="from $SRC_IP iif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi +} + +fib_rule4_vrf_test() +{ + setup_vrf + fib_rule4_test "- with VRF" + cleanup_vrf } # Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken @@ -391,10 +681,8 @@ fib_rule4_connect_test() { local dsfield - if ! check_nettest; then - echo "SKIP: Could not run test without nettest tool" - return - fi + echo + echo "IPv4 FIB rule connect tests" setup_peer $IP -4 rule add dsfield 0x04 table $RTABLE_PEER @@ -412,16 +700,46 @@ fib_rule4_connect_test() log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" done + # Check that UDP and TCP connections fail when using a DS Field that + # does not match the previously configured FIB rule. + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0x20 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dsfield udp no connect (dsfield 0x20)" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0x20 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dsfield tcp no connect (dsfield 0x20)" + $IP -4 rule del dsfield 0x04 table $RTABLE_PEER - cleanup_peer -} -run_fibrule_tests() -{ - log_section "IPv4 fib rule" - fib_rule4_test - log_section "IPv6 fib rule" - fib_rule6_test + ip rule help 2>&1 | grep -q dscp + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing dscp match" + cleanup_peer + return + fi + + $IP -4 rule add dscp 0x3f table $RTABLE_PEER + + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xfc \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dscp udp connect" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0xfc \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dscp tcp connect" + + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xf4 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dscp udp no connect" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0xf4 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dscp tcp no connect" + + $IP -4 rule del dscp 0x3f table $RTABLE_PEER + + cleanup_peer } ################################################################################ # usage @@ -457,6 +775,8 @@ if [ ! -x "$(command -v ip)" ]; then exit $ksft_skip fi +check_gen_prog "nettest" + # start clean cleanup &> /dev/null setup @@ -467,6 +787,8 @@ do fib_rule4_test|fib_rule4) fib_rule4_test;; fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;; fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;; + fib_rule6_vrf_test|fib_rule6_vrf) fib_rule6_vrf_test;; + fib_rule4_vrf_test|fib_rule4_vrf) fib_rule4_vrf_test;; help) echo "Test names: $TESTS"; exit 0;; diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 73895711cdf4..a94b73a53f72 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -11,7 +11,7 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \ ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \ - ipv4_mpath_list ipv6_mpath_list" + ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance" VERBOSE=0 PAUSE_ON_FAIL=no @@ -689,7 +689,7 @@ fib6_notify_test() log_test $ret 0 "ipv6 route add notify" - { kill %% && wait %%; } 2>/dev/null + kill_process %% #rm errors.txt @@ -736,7 +736,7 @@ fib_notify_test() log_test $ret 0 "ipv4 route add notify" - { kill %% && wait %%; } 2>/dev/null + kill_process %% rm errors.txt @@ -1085,6 +1085,35 @@ route_setup() set +e } +forwarding_cleanup() +{ + cleanup_ns $ns3 + + route_cleanup +} + +# extend route_setup with an ns3 reachable through ns2 over both devices +forwarding_setup() +{ + forwarding_cleanup + + route_setup + + setup_ns ns3 + + ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2 + ip -netns $ns3 link set veth5 up + ip -netns $ns2 link set veth6 up + + ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24 + ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24 + ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2 + + ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad + ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad + ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2 +} + # assumption is that basic add of a single path route works # otherwise just adding an address on an interface is broken ipv6_rt_add() @@ -1737,53 +1766,53 @@ ipv4_rt_dsfield() # DSCP 0x10 should match the specific route, no matter the ECN bits $IP route get fibmatch 172.16.102.1 dsfield 0x10 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT" $IP route get fibmatch 172.16.102.1 dsfield 0x11 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)" $IP route get fibmatch 172.16.102.1 dsfield 0x12 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)" $IP route get fibmatch 172.16.102.1 dsfield 0x13 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:CE" # Unknown DSCP should match the generic route, no matter the ECN bits $IP route get fibmatch 172.16.102.1 dsfield 0x14 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT" $IP route get fibmatch 172.16.102.1 dsfield 0x15 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)" $IP route get fibmatch 172.16.102.1 dsfield 0x16 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)" $IP route get fibmatch 172.16.102.1 dsfield 0x17 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE" # Null DSCP should match the generic route, no matter the ECN bits $IP route get fibmatch 172.16.102.1 dsfield 0x00 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT" $IP route get fibmatch 172.16.102.1 dsfield 0x01 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)" $IP route get fibmatch 172.16.102.1 dsfield 0x02 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)" $IP route get fibmatch 172.16.102.1 dsfield 0x03 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:CE" } @@ -2328,7 +2357,7 @@ ipv4_mangle_test() $IP route del table 123 172.16.101.0/24 dev veth1 $IP rule del pref 100 - { kill %% && wait %%; } 2>/dev/null + kill_process %% rm $tmp_file route_cleanup @@ -2386,7 +2415,7 @@ ipv6_mangle_test() $IP -6 route del table 123 2001:db8:101::/64 dev veth1 $IP -6 rule del pref 100 - { kill %% && wait %%; } 2>/dev/null + kill_process %% rm $tmp_file route_cleanup @@ -2531,9 +2560,6 @@ ipv4_mpath_list_test() run_cmd "ip -n $ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') @@ -2600,6 +2626,93 @@ ipv6_mpath_list_test() route_cleanup } +tc_set_flower_counter__saddr_syn() { + tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2" +} + +ip_mpath_balance_dep_check() +{ + if [ ! -x "$(command -v socat)" ]; then + echo "socat command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v jq)" ]; then + echo "jq command not found. Skipping test" + return 1 + fi +} + +ip_mpath_balance() { + local -r ipver=$1 + local -r daddr=$2 + local -r num_conn=20 + + for i in $(seq 1 $num_conn); do + ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null & + sleep 0.02 + echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000 + done + + local -r syn0="$(tc_get_flower_counter $ns1 veth1)" + local -r syn1="$(tc_get_flower_counter $ns1 veth3)" + local -r syns=$((syn0+syn1)) + + [ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)" + + [[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]] +} + +ipv4_mpath_balance_test() +{ + echo + echo "IPv4 multipath load balance test" + + ip_mpath_balance_dep_check || return 1 + forwarding_setup + + $IP route add 172.16.105.1 \ + nexthop via 172.16.101.2 \ + nexthop via 172.16.103.2 + + ip netns exec $ns1 \ + sysctl -q -w net.ipv4.fib_multipath_hash_policy=1 + + tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1 + tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1 + + ip_mpath_balance -4 172.16.105.1 + + log_test $? 0 "IPv4 multipath loadbalance" + + forwarding_cleanup +} + +ipv6_mpath_balance_test() +{ + echo + echo "IPv6 multipath load balance test" + + ip_mpath_balance_dep_check || return 1 + forwarding_setup + + $IP route add 2001:db8:105::1\ + nexthop via 2001:db8:101::2 \ + nexthop via 2001:db8:103::2 + + ip netns exec $ns1 \ + sysctl -q -w net.ipv6.fib_multipath_hash_policy=1 + + tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1 + tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1 + + ip_mpath_balance -6 "[2001:db8:105::1]" + + log_test $? 0 "IPv6 multipath loadbalance" + + forwarding_cleanup +} + ################################################################################ # usage @@ -2683,6 +2796,8 @@ do fib6_gc_test|ipv6_gc) fib6_gc_test;; ipv4_mpath_list) ipv4_mpath_list_test;; ipv6_mpath_list) ipv6_mpath_list_test;; + ipv4_mpath_balance) ipv4_mpath_balance_test;; + ipv6_mpath_balance) ipv6_mpath_balance_test;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 535865b3d1d6..00bde7b6f39e 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -15,18 +15,12 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ bridge_vlan_unaware.sh \ custom_multipath_hash.sh \ dual_vxlan_bridge.sh \ - ethtool_extended_state.sh \ - ethtool_mm.sh \ - ethtool_rmon.sh \ - ethtool.sh \ gre_custom_multipath_hash.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ gre_multipath_nh_res.sh \ gre_multipath_nh.sh \ gre_multipath.sh \ - hw_stats_l3.sh \ - hw_stats_l3_gre.sh \ ip6_forward_instats_vrf.sh \ ip6gre_custom_multipath_hash.sh \ ip6gre_flat_key.sh \ @@ -43,8 +37,9 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ ipip_hier_gre_key.sh \ ipip_hier_gre_keys.sh \ ipip_hier_gre.sh \ + lib_sh_test.sh \ local_termination.sh \ - loopback.sh \ + min_max_mtu.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ mirror_gre_bridge_1d_vlan.sh \ @@ -76,6 +71,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ router_broadcast.sh \ router_mpath_nh_res.sh \ router_mpath_nh.sh \ + router_mpath_seed.sh \ router_multicast.sh \ router_multipath.sh \ router_nh.sh \ @@ -109,11 +105,11 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ + vxlan_reserved.sh \ vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh TEST_FILES := devlink_lib.sh \ - ethtool_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ ip6gre_lib.sh \ @@ -131,6 +127,7 @@ TEST_FILES := devlink_lib.sh \ tc_common.sh TEST_INCLUDES := \ - ../lib.sh + ../lib.sh \ + $(wildcard ../lib/sh/*.sh) include ../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README index b8a2af8fcfb7..7b41cff993ad 100644 --- a/tools/testing/selftests/net/forwarding/README +++ b/tools/testing/selftests/net/forwarding/README @@ -6,7 +6,7 @@ to easily create and test complex environments. Unfortunately, these namespaces can not be used with actual switching ASICs, as their ports can not be migrated to other network namespaces -(NETIF_F_NETNS_LOCAL) and most of them probably do not support the +(dev->netns_immutable) and most of them probably do not support the L1-separation provided by namespaces. However, a similar kind of flexibility can be achieved by using VRFs and @@ -56,3 +56,36 @@ o Checks shall be added to lib.sh for any external dependencies. o Code shall be checked using ShellCheck [1] prior to submission. 1. https://www.shellcheck.net/ + +Customization +============= + +The forwarding selftests framework uses a number of variables that +influence its behavior and tools it invokes, and how it invokes them, in +various ways. A number of these variables can be overridden. The way these +overridable variables are specified is typically one of the following two +syntaxes: + + : "${VARIABLE:=default_value}" + VARIABLE=${VARIABLE:=default_value} + +Any of these variables can be overridden. Notably net/forwarding/lib.sh and +net/lib.sh contain a number of overridable variables. + +One way of overriding these variables is through the environment: + + PAUSE_ON_FAIL=yes ./some_test.sh + +The variable NETIFS is special. Since it is an array variable, there is no +way to pass it through the environment. Its value can instead be given as +consecutive arguments to the selftest: + + ./some_test.sh swp{1..8} + +A way to customize variables in a persistent fashion is to create a file +named forwarding.config in this directory. lib.sh sources the file if +present, so it can contain any shell code. Typically it will contain +assignments of variables whose value should be overridden. + +forwarding.config.sample is available in the directory as an example of +how forwarding.config might look. diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh index 0760a34b7114..a21b7085da2e 100755 --- a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh +++ b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh @@ -178,6 +178,22 @@ fdb_del() check_err $? "Failed to remove a FDB entry of type ${type}" } +check_fdb_n_learned_support() +{ + if ! ip link help bridge 2>&1 | grep -q "fdb_max_learned"; then + echo "SKIP: iproute2 too old, missing bridge max learned support" + exit $ksft_skip + fi + + ip link add dev br0 type bridge + local learned=$(fdb_get_n_learned) + ip link del dev br0 + if [ "$learned" == "null" ]; then + echo "SKIP: kernel too old; bridge fdb_n_learned feature not supported." + exit $ksft_skip + fi +} + check_accounting_one_type() { local type=$1 is_counted=$2 overrides_learned=$3 @@ -274,6 +290,8 @@ check_limit() done } +check_fdb_n_learned_support + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 2aa66d2a1702..d4e7dd659354 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,10 +1,24 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ - v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ - v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \ - v3exc_timeout_test v3star_ex_auto_add_test" +ALL_TESTS=" + v2reportleave_test + v3include_test + v3inc_allow_test + v3inc_is_include_test + v3inc_is_exclude_test + v3inc_to_exclude_test + v3exc_allow_test + v3exc_is_include_test + v3exc_is_exclude_test + v3exc_to_exclude_test + v3inc_block_test + v3exc_block_test + v3exc_timeout_test + v3star_ex_auto_add_test + v2per_vlan_snooping_port_stp_test + v2per_vlan_snooping_vlan_stp_test +" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -478,10 +492,10 @@ v3exc_timeout_test() RET=0 local X=("192.0.2.20" "192.0.2.30") - # GMI should be 3 seconds + # GMI should be 5 seconds ip link set dev br0 type bridge mcast_query_interval 100 \ mcast_query_response_interval 100 \ - mcast_membership_interval 300 + mcast_membership_interval 500 v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP ip link set dev br0 type bridge mcast_query_interval 500 \ @@ -489,7 +503,7 @@ v3exc_timeout_test() mcast_membership_interval 1500 $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q - sleep 3 + sleep 5 bridge -j -d -s mdb show dev br0 \ | jq -e ".[].mdb[] | \ select(.grp == \"$TEST_GROUP\" and \ @@ -554,6 +568,64 @@ v3star_ex_auto_add_test() v3cleanup $swp2 $TEST_GROUP } +v2per_vlan_snooping_stp_test() +{ + local is_port=$1 + + local msg="port" + [[ $is_port -ne 1 ]] && msg="vlan" + + ip link set br0 up type bridge vlan_filtering 1 \ + mcast_igmp_version 2 \ + mcast_snooping 1 \ + mcast_vlan_snooping 1 \ + mcast_querier 1 \ + mcast_stats_enabled 1 + bridge vlan global set vid 1 dev br0 \ + mcast_snooping 1 \ + mcast_querier 1 \ + mcast_query_interval 100 \ + mcast_startup_query_count 0 + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4 + sleep 5 + local tx_s=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]') + + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3 + sleep 5 + local tx_e=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]') + + RET=0 + local tx=$(expr $tx_e - $tx_s) + test $tx -gt 0 + check_err $? "No IGMP queries after STP state becomes forwarding" + log_test "per vlan snooping with $msg stp state change" + + # restore settings + bridge vlan global set vid 1 dev br0 \ + mcast_querier 0 \ + mcast_query_interval 12500 \ + mcast_startup_query_count 2 + ip link set br0 up type bridge vlan_filtering 0 \ + mcast_vlan_snooping 0 \ + mcast_stats_enabled 0 +} + +v2per_vlan_snooping_port_stp_test() +{ + v2per_vlan_snooping_stp_test 1 +} + +v2per_vlan_snooping_vlan_stp_test() +{ + v2per_vlan_snooping_stp_test 0 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index d9d587454d20..8c1597ebc2d3 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -149,7 +149,7 @@ cfg_test_host_common() check_err $? "Failed to add $name host entry" bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null - check_fail $? "Managed to replace $name host entry" + check_err $? "Failed to replace $name host entry" bridge mdb del dev br0 port br0 grp $grp $state vid 10 bridge mdb get dev br0 grp $grp vid 10 &> /dev/null diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index e2b9ff773c6b..4cacef5a813a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -1,10 +1,23 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ - mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ - mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \ - mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test" +ALL_TESTS=" + mldv2include_test + mldv2inc_allow_test + mldv2inc_is_include_test + mldv2inc_is_exclude_test + mldv2inc_to_exclude_test + mldv2exc_allow_test + mldv2exc_is_include_test + mldv2exc_is_exclude_test + mldv2exc_to_exclude_test + mldv2inc_block_test + mldv2exc_block_test + mldv2exc_timeout_test + mldv2star_ex_auto_add_test + mldv2per_vlan_snooping_port_stp_test + mldv2per_vlan_snooping_vlan_stp_test +" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -478,10 +491,10 @@ mldv2exc_timeout_test() RET=0 local X=("2001:db8:1::20" "2001:db8:1::30") - # GMI should be 3 seconds + # GMI should be 5 seconds ip link set dev br0 type bridge mcast_query_interval 100 \ mcast_query_response_interval 100 \ - mcast_membership_interval 300 + mcast_membership_interval 500 mldv2exclude_prepare $h1 ip link set dev br0 type bridge mcast_query_interval 500 \ @@ -489,7 +502,7 @@ mldv2exc_timeout_test() mcast_membership_interval 1500 $MZ $h1 -c 1 $MZPKT_ALLOW2 -q - sleep 3 + sleep 5 bridge -j -d -s mdb show dev br0 \ | jq -e ".[].mdb[] | \ select(.grp == \"$TEST_GROUP\" and \ @@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test() mldv2cleanup $swp2 } +mldv2per_vlan_snooping_stp_test() +{ + local is_port=$1 + + local msg="port" + [[ $is_port -ne 1 ]] && msg="vlan" + + ip link set br0 up type bridge vlan_filtering 1 \ + mcast_mld_version 2 \ + mcast_snooping 1 \ + mcast_vlan_snooping 1 \ + mcast_querier 1 \ + mcast_stats_enabled 1 + bridge vlan global set vid 1 dev br0 \ + mcast_mld_version 2 \ + mcast_snooping 1 \ + mcast_querier 1 \ + mcast_query_interval 100 \ + mcast_startup_query_count 0 + + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4 + sleep 5 + local tx_s=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["mld_queries"]["tx_v2"]') + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3 + sleep 5 + local tx_e=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["mld_queries"]["tx_v2"]') + + RET=0 + local tx=$(expr $tx_e - $tx_s) + test $tx -gt 0 + check_err $? "No MLD queries after STP state becomes forwarding" + log_test "per vlan snooping with $msg stp state change" + + # restore settings + bridge vlan global set vid 1 dev br0 \ + mcast_querier 0 \ + mcast_query_interval 12500 \ + mcast_startup_query_count 2 \ + mcast_mld_version 1 + ip link set br0 up type bridge vlan_filtering 0 \ + mcast_vlan_snooping 0 \ + mcast_stats_enabled 0 +} + +mldv2per_vlan_snooping_port_stp_test() +{ + mldv2per_vlan_snooping_stp_test 1 +} + +mldv2per_vlan_snooping_vlan_stp_test() +{ + mldv2per_vlan_snooping_stp_test 0 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 64bd00fe9a4f..e59fba366a0a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -142,6 +142,152 @@ extern_learn() bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null } +other_tpid() +{ + local mac=de:ad:be:ef:13:37 + + # Test that packets with TPID 802.1ad VID 3 + TPID 802.1Q VID 5 are + # classified as untagged by a bridge with vlan_protocol 802.1Q, and + # are processed in the PVID of the ingress port (here 1). Not VID 3, + # and not VID 5. + RET=0 + + tc qdisc add dev $h2 clsact + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + ip link set $h2 promisc on + ethtool -K $h2 rx-vlan-filter off rx-vlan-stag-filter off + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + # Match on 'self' addresses as well, for those drivers which + # do not push their learned addresses to the bridge software + # database + bridge -j fdb show $swp1 | \ + jq -e ".[] | select(.mac == \"$(mac_get $h1)\") | select(.vlan == 1)" &> /dev/null + check_err $? "FDB entry was not learned when it should" + + log_test "FDB entry in PVID for VLAN-tagged with other TPID" + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was not forwarded when it should" + log_test "Reception of VLAN with other TPID as untagged" + + bridge vlan del dev $swp1 vid 1 + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was forwarded when should not" + log_test "Reception of VLAN with other TPID as untagged (no PVID)" + + bridge vlan add dev $swp1 vid 1 pvid untagged + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + +8021p_do() +{ + local should_fail=$1; shift + local mac=de:ad:be:ef:13:37 + + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err_fail $should_fail $? "802.1p-tagged reception" + + tc filter del dev $h2 ingress pref 1 +} + +8021p() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with the default_pvid, 1, packets tagged with VID 0 are + # accepted. + 8021p_do 0 + + # Test that packets tagged with VID 0 are still accepted after changing + # the default_pvid. + ip link set br0 type bridge vlan_default_pvid 10 + 8021p_do 0 + + log_test "Reception of 802.1p-tagged traffic" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + +send_untagged_and_8021p() +{ + ping_do $h1 192.0.2.2 + check_fail $? + + 8021p_do 1 +} + +drop_untagged() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with no PVID, untagged and 802.1p-tagged traffic is + # dropped. + ip link set br0 type bridge vlan_default_pvid 1 + + # First we reconfigure the default_pvid, 1, as a non-PVID VLAN. + bridge vlan add dev $swp1 vid 1 untagged + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Next we try to delete VID 1 altogether + bridge vlan del dev $swp1 vid 1 + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Set up the bridge without a default_pvid, then check that the 8021q + # module, when the bridge port goes down and then up again, does not + # accidentally re-enable untagged packet reception. + ip link set br0 type bridge vlan_default_pvid 0 + ip link set $swp1 down + ip link set $swp1 up + setup_wait + send_untagged_and_8021p + + # Remove swp1 as a bridge port and let it rejoin the bridge while it + # has no default_pvid. + ip link set $swp1 nomaster + ip link set $swp1 master br0 + send_untagged_and_8021p + + # Restore settings + ip link set br0 type bridge vlan_default_pvid 1 + + log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh index 1c8a26046589..2b5700b61ffa 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding pvid_change" NUM_NETIFS=4 source lib.sh @@ -77,12 +77,16 @@ cleanup() ping_ipv4() { - ping_test $h1 192.0.2.2 + local msg=$1 + + ping_test $h1 192.0.2.2 "$msg" } ping_ipv6() { - ping6_test $h1 2001:db8:1::2 + local msg=$1 + + ping6_test $h1 2001:db8:1::2 "$msg" } learning() @@ -95,6 +99,21 @@ flooding() flood_test $swp2 $h1 $h2 } +pvid_change() +{ + # Test that the changing of the VLAN-aware PVID does not affect + # VLAN-unaware forwarding + bridge vlan add vid 3 dev $swp1 pvid untagged + + ping_ipv4 " with bridge port $swp1 PVID changed" + ping_ipv6 " with bridge port $swp1 PVID changed" + + bridge vlan del vid 3 dev $swp1 + + ping_ipv4 " with bridge port $swp1 PVID deleted" + ping_ipv6 " with bridge port $swp1 PVID deleted" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 8d7a1a004b7c..18fd69d8d937 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -1,6 +1,7 @@ CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_BRIDGE_IGMP_SNOOPING=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh index 1783c10215e5..7d531f7091e6 100755 --- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -224,10 +224,10 @@ send_dst_ipv6() send_flowlabel() { # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec v$h1 \ - $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1 - done + ip vrf exec v$h1 sh -c \ + "for _ in {1..16384}; do \ + $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + done" } send_src_udp6() diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index f1de525cfa55..18afa89ebbcc 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -122,6 +122,8 @@ devlink_reload() still_pending=$(devlink resource show "$DEVLINK_DEV" | \ grep -c "size_new") check_err $still_pending "Failed reload - There are still unset sizes" + + udevadm settle } declare -A DEVLINK_ORIG @@ -499,7 +501,7 @@ devlink_trap_drop_cleanup() local pref=$1; shift local handle=$1; shift - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower } diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh deleted file mode 100755 index aa2eafb7b243..000000000000 --- a/tools/testing/selftests/net/forwarding/ethtool.sh +++ /dev/null @@ -1,301 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -ALL_TESTS=" - same_speeds_autoneg_off - different_speeds_autoneg_off - combination_of_neg_on_and_off - advertise_subset_of_speeds - check_highest_speed_is_chosen - different_speeds_autoneg_on -" -NUM_NETIFS=2 -source lib.sh -source ethtool_lib.sh - -h1_create() -{ - simple_if_init $h1 192.0.2.1/24 -} - -h1_destroy() -{ - simple_if_fini $h1 192.0.2.1/24 -} - -h2_create() -{ - simple_if_init $h2 192.0.2.2/24 -} - -h2_destroy() -{ - simple_if_fini $h2 192.0.2.2/24 -} - -setup_prepare() -{ - h1=${NETIFS[p1]} - h2=${NETIFS[p2]} - - h1_create - h2_create -} - -cleanup() -{ - pre_cleanup - - h2_destroy - h1_destroy -} - -same_speeds_autoneg_off() -{ - # Check that when each of the reported speeds is forced, the links come - # up and are operational. - local -a speeds_arr=($(common_speeds_get $h1 $h2 0 0)) - - for speed in "${speeds_arr[@]}"; do - RET=0 - ethtool_set $h1 speed $speed autoneg off - ethtool_set $h2 speed $speed autoneg off - - setup_wait_dev_with_timeout $h1 - setup_wait_dev_with_timeout $h2 - ping_do $h1 192.0.2.2 - check_err $? "speed $speed autoneg off" - log_test "force of same speed autoneg off" - log_info "speed = $speed" - done - - ethtool -s $h2 autoneg on - ethtool -s $h1 autoneg on -} - -different_speeds_autoneg_off() -{ - # Test that when we force different speeds, links are not up and ping - # fails. - RET=0 - - local -a speeds_arr=($(different_speeds_get $h1 $h2 0 0)) - local speed1=${speeds_arr[0]} - local speed2=${speeds_arr[1]} - - ethtool_set $h1 speed $speed1 autoneg off - ethtool_set $h2 speed $speed2 autoneg off - - setup_wait_dev_with_timeout $h1 - setup_wait_dev_with_timeout $h2 - ping_do $h1 192.0.2.2 - check_fail $? "ping with different speeds" - - log_test "force of different speeds autoneg off" - - ethtool -s $h2 autoneg on - ethtool -s $h1 autoneg on -} - -combination_of_neg_on_and_off() -{ - # Test that when one device is forced to a speed supported by both - # endpoints and the other device is configured to autoneg on, the links - # are up and ping passes. - local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1)) - - for speed in "${speeds_arr[@]}"; do - RET=0 - ethtool_set $h1 speed $speed autoneg off - - setup_wait_dev_with_timeout $h1 - setup_wait_dev_with_timeout $h2 - ping_do $h1 192.0.2.2 - check_err $? "h1-speed=$speed autoneg off, h2 autoneg on" - log_test "one side with autoneg off and another with autoneg on" - log_info "force speed = $speed" - done - - ethtool -s $h1 autoneg on -} - -hex_speed_value_get() -{ - local speed=$1; shift - - local shift_size=${speed_values[$speed]} - speed=$((0x1 << $"shift_size")) - printf "%#x" "$speed" -} - -subset_of_common_speeds_get() -{ - local dev1=$1; shift - local dev2=$1; shift - local adver=$1; shift - - local -a speeds_arr=($(common_speeds_get $dev1 $dev2 0 $adver)) - local speed_to_advertise=0 - local speed_to_remove=${speeds_arr[0]} - speed_to_remove+='base' - - local -a speeds_mode_arr=($(common_speeds_get $dev1 $dev2 1 $adver)) - - for speed in ${speeds_mode_arr[@]}; do - if [[ $speed != $speed_to_remove* ]]; then - speed=$(hex_speed_value_get $speed) - speed_to_advertise=$(($speed_to_advertise | \ - $speed)) - fi - - done - - # Convert to hex. - printf "%#x" "$speed_to_advertise" -} - -speed_to_advertise_get() -{ - # The function returns the hex number that is composed by OR-ing all - # the modes corresponding to the provided speed. - local speed_without_mode=$1; shift - local supported_speeds=("$@"); shift - local speed_to_advertise=0 - - speed_without_mode+='base' - - for speed in ${supported_speeds[@]}; do - if [[ $speed == $speed_without_mode* ]]; then - speed=$(hex_speed_value_get $speed) - speed_to_advertise=$(($speed_to_advertise | \ - $speed)) - fi - - done - - # Convert to hex. - printf "%#x" "$speed_to_advertise" -} - -advertise_subset_of_speeds() -{ - # Test that when one device advertises a subset of speeds and another - # advertises a specific speed (but all modes of this speed), the links - # are up and ping passes. - RET=0 - - local speed_1_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1) - ethtool_set $h1 advertise $speed_1_to_advertise - - if [ $RET != 0 ]; then - log_test "advertise subset of speeds" - return - fi - - local -a speeds_arr_without_mode=($(common_speeds_get $h1 $h2 0 1)) - # Check only speeds that h1 advertised. Remove the first speed. - unset speeds_arr_without_mode[0] - local -a speeds_arr_with_mode=($(common_speeds_get $h1 $h2 1 1)) - - for speed_value in ${speeds_arr_without_mode[@]}; do - RET=0 - local speed_2_to_advertise=$(speed_to_advertise_get $speed_value \ - "${speeds_arr_with_mode[@]}") - ethtool_set $h2 advertise $speed_2_to_advertise - - setup_wait_dev_with_timeout $h1 - setup_wait_dev_with_timeout $h2 - ping_do $h1 192.0.2.2 - check_err $? "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)" - - log_test "advertise subset of speeds" - log_info "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise" - done - - ethtool -s $h2 autoneg on - ethtool -s $h1 autoneg on -} - -check_highest_speed_is_chosen() -{ - # Test that when one device advertises a subset of speeds, the other - # chooses the highest speed. This test checks configuration without - # traffic. - RET=0 - - local max_speed - local chosen_speed - local speed_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1) - - ethtool_set $h1 advertise $speed_to_advertise - - if [ $RET != 0 ]; then - log_test "check highest speed" - return - fi - - local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1)) - - max_speed=${speeds_arr[0]} - for current in ${speeds_arr[@]}; do - if [[ $current -gt $max_speed ]]; then - max_speed=$current - fi - done - - setup_wait_dev_with_timeout $h1 - setup_wait_dev_with_timeout $h2 - chosen_speed=$(ethtool $h1 | grep 'Speed:') - chosen_speed=${chosen_speed%"Mb/s"*} - chosen_speed=${chosen_speed#*"Speed: "} - ((chosen_speed == max_speed)) - check_err $? "h1 advertise $speed_to_advertise, h2 sync to speed $chosen_speed" - - log_test "check highest speed" - - ethtool -s $h2 autoneg on - ethtool -s $h1 autoneg on -} - -different_speeds_autoneg_on() -{ - # Test that when we configure links to advertise different speeds, - # links are not up and ping fails. - RET=0 - - local -a speeds=($(different_speeds_get $h1 $h2 1 1)) - local speed1=${speeds[0]} - local speed2=${speeds[1]} - - speed1=$(hex_speed_value_get $speed1) - speed2=$(hex_speed_value_get $speed2) - - ethtool_set $h1 advertise $speed1 - ethtool_set $h2 advertise $speed2 - - if (($RET)); then - setup_wait_dev_with_timeout $h1 - setup_wait_dev_with_timeout $h2 - ping_do $h1 192.0.2.2 - check_fail $? "ping with different speeds autoneg on" - fi - - log_test "advertise different speeds autoneg on" - - ethtool -s $h2 autoneg on - ethtool -s $h1 autoneg on -} - -skip_on_veth - -trap cleanup EXIT - -setup_prepare -setup_wait - -declare -gA speed_values -eval "speed_values=($(speeds_arr_get))" - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh deleted file mode 100755 index 17f89c3b7c02..000000000000 --- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh +++ /dev/null @@ -1,117 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -ALL_TESTS=" - autoneg - autoneg_force_mode - no_cable -" - -NUM_NETIFS=2 -source lib.sh -source ethtool_lib.sh - -TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms - -setup_prepare() -{ - swp1=${NETIFS[p1]} - swp2=${NETIFS[p2]} - swp3=$NETIF_NO_CABLE -} - -ethtool_ext_state() -{ - local dev=$1; shift - local expected_ext_state=$1; shift - local expected_ext_substate=${1:-""}; shift - - local ext_state=$(ethtool $dev | grep "Link detected" \ - | cut -d "(" -f2 | cut -d ")" -f1) - local ext_substate=$(echo $ext_state | cut -sd "," -f2 \ - | sed -e 's/^[[:space:]]*//') - ext_state=$(echo $ext_state | cut -d "," -f1) - - if [[ $ext_state != $expected_ext_state ]]; then - echo "Expected \"$expected_ext_state\", got \"$ext_state\"" - return 1 - fi - if [[ $ext_substate != $expected_ext_substate ]]; then - echo "Expected \"$expected_ext_substate\", got \"$ext_substate\"" - return 1 - fi -} - -autoneg() -{ - local msg - - RET=0 - - ip link set dev $swp1 up - - msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ - "Autoneg" "No partner detected") - check_err $? "$msg" - - log_test "Autoneg, No partner detected" - - ip link set dev $swp1 down -} - -autoneg_force_mode() -{ - local msg - - RET=0 - - ip link set dev $swp1 up - ip link set dev $swp2 up - - local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0)) - local speed1=${speeds_arr[0]} - local speed2=${speeds_arr[1]} - - ethtool_set $swp1 speed $speed1 autoneg off - ethtool_set $swp2 speed $speed2 autoneg off - - msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ - "Autoneg" "No partner detected during force mode") - check_err $? "$msg" - - msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \ - "Autoneg" "No partner detected during force mode") - check_err $? "$msg" - - log_test "Autoneg, No partner detected during force mode" - - ethtool -s $swp2 autoneg on - ethtool -s $swp1 autoneg on - - ip link set dev $swp2 down - ip link set dev $swp1 down -} - -no_cable() -{ - local msg - - RET=0 - - ip link set dev $swp3 up - - msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable") - check_err $? "$msg" - - log_test "No cable" - - ip link set dev $swp3 down -} - -skip_on_veth - -setup_prepare - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh deleted file mode 100644 index b9bfb45085af..000000000000 --- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh +++ /dev/null @@ -1,120 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -speeds_arr_get() -{ - cmd='/ETHTOOL_LINK_MODE_[^[:space:]]*_BIT[[:space:]]+=[[:space:]]+/ \ - {sub(/,$/, "") \ - sub(/ETHTOOL_LINK_MODE_/,"") \ - sub(/_BIT/,"") \ - sub(/_Full/,"/Full") \ - sub(/_Half/,"/Half");\ - print "["$1"]="$3}' - - awk "${cmd}" /usr/include/linux/ethtool.h -} - -ethtool_set() -{ - local cmd="$@" - local out=$(ethtool -s $cmd 2>&1 | wc -l) - - check_err $out "error in configuration. $cmd" -} - -dev_linkmodes_params_get() -{ - local dev=$1; shift - local adver=$1; shift - local -a linkmodes_params - local param_count - local arr - - if (($adver)); then - mode="Advertised link modes" - else - mode="Supported link modes" - fi - - local -a dev_linkmodes=($(dev_speeds_get $dev 1 $adver)) - for ((i=0; i<${#dev_linkmodes[@]}; i++)); do - linkmodes_params[$i]=$(echo -e "${dev_linkmodes[$i]}" | \ - # Replaces all non numbers with spaces - sed -e 's/[^0-9]/ /g' | \ - # Squeeze spaces in sequence to 1 space - tr -s ' ') - # Count how many numbers were found in the linkmode - param_count=$(echo "${linkmodes_params[$i]}" | wc -w) - if [[ $param_count -eq 1 ]]; then - linkmodes_params[$i]="${linkmodes_params[$i]} 1" - elif [[ $param_count -ge 3 ]]; then - arr=(${linkmodes_params[$i]}) - # Take only first two params - linkmodes_params[$i]=$(echo "${arr[@]:0:2}") - fi - done - echo ${linkmodes_params[@]} -} - -dev_speeds_get() -{ - local dev=$1; shift - local with_mode=$1; shift - local adver=$1; shift - local speeds_str - - if (($adver)); then - mode="Advertised link modes" - else - mode="Supported link modes" - fi - - speeds_str=$(ethtool "$dev" | \ - # Snip everything before the link modes section. - sed -n '/'"$mode"':/,$p' | \ - # Quit processing the rest at the start of the next section. - # When checking, skip the header of this section (hence the 2,). - sed -n '2,${/^[\t][^ \t]/q};p' | \ - # Drop the section header of the current section. - cut -d':' -f2) - - local -a speeds_arr=($speeds_str) - if [[ $with_mode -eq 0 ]]; then - for ((i=0; i<${#speeds_arr[@]}; i++)); do - speeds_arr[$i]=${speeds_arr[$i]%base*} - done - fi - echo ${speeds_arr[@]} -} - -common_speeds_get() -{ - dev1=$1; shift - dev2=$1; shift - with_mode=$1; shift - adver=$1; shift - - local -a dev1_speeds=($(dev_speeds_get $dev1 $with_mode $adver)) - local -a dev2_speeds=($(dev_speeds_get $dev2 $with_mode $adver)) - - comm -12 \ - <(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \ - <(printf '%s\n' "${dev2_speeds[@]}" | sort -u) -} - -different_speeds_get() -{ - local dev1=$1; shift - local dev2=$1; shift - local with_mode=$1; shift - local adver=$1; shift - - local -a speeds_arr - - speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver)) - if [[ ${#speeds_arr[@]} < 2 ]]; then - check_err 1 "cannot check different speeds. There are not enough speeds" - fi - - echo ${speeds_arr[0]} ${speeds_arr[1]} -} diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh deleted file mode 100755 index 50d5bfb17ef1..000000000000 --- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh +++ /dev/null @@ -1,340 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -ALL_TESTS=" - manual_with_verification_h1_to_h2 - manual_with_verification_h2_to_h1 - manual_without_verification_h1_to_h2 - manual_without_verification_h2_to_h1 - manual_failed_verification_h1_to_h2 - manual_failed_verification_h2_to_h1 - lldp -" - -NUM_NETIFS=2 -REQUIRE_MZ=no -PREEMPTIBLE_PRIO=0 -source lib.sh - -traffic_test() -{ - local if=$1; shift - local src=$1; shift - local num_pkts=10000 - local before= - local after= - local delta= - - if [ ${has_pmac_stats[$if]} = false ]; then - src="aggregate" - fi - - before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src) - - $MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO - - after=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src) - - delta=$((after - before)) - - # Allow an extra 1% tolerance for random packets sent by the stack - [ $delta -ge $num_pkts ] && [ $delta -le $((num_pkts + 100)) ] -} - -manual_with_verification() -{ - local tx=$1; shift - local rx=$1; shift - - RET=0 - - # It isn't completely clear from IEEE 802.3-2018 Figure 99-5: Transmit - # Processing state diagram whether the "send_r" variable (send response - # to verification frame) should be taken into consideration while the - # MAC Merge TX direction is disabled. That being said, at least the - # NXP ENETC does not, and requires tx-enabled on in order to respond to - # the link partner's verification frames. - ethtool --set-mm $rx tx-enabled on - ethtool --set-mm $tx verify-enabled on tx-enabled on - - # Wait for verification to finish - sleep 1 - - ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ - grep -q 'SUCCEEDED' - check_err "$?" "Verification did not succeed" - - ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' - check_err "$?" "pMAC TX is not active" - - traffic_test $tx "pmac" - check_err "$?" "Traffic did not get sent through $tx's pMAC" - - ethtool --set-mm $tx verify-enabled off tx-enabled off - ethtool --set-mm $rx tx-enabled off - - log_test "Manual configuration with verification: $tx to $rx" -} - -manual_with_verification_h1_to_h2() -{ - manual_with_verification $h1 $h2 -} - -manual_with_verification_h2_to_h1() -{ - manual_with_verification $h2 $h1 -} - -manual_without_verification() -{ - local tx=$1; shift - local rx=$1; shift - - RET=0 - - ethtool --set-mm $tx verify-enabled off tx-enabled on - - ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ - grep -q 'DISABLED' - check_err "$?" "Verification is not disabled" - - ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' - check_err "$?" "pMAC TX is not active" - - traffic_test $tx "pmac" - check_err "$?" "Traffic did not get sent through $tx's pMAC" - - ethtool --set-mm $tx verify-enabled off tx-enabled off - - log_test "Manual configuration without verification: $tx to $rx" -} - -manual_without_verification_h1_to_h2() -{ - manual_without_verification $h1 $h2 -} - -manual_without_verification_h2_to_h1() -{ - manual_without_verification $h2 $h1 -} - -manual_failed_verification() -{ - local tx=$1; shift - local rx=$1; shift - - RET=0 - - ethtool --set-mm $rx pmac-enabled off - ethtool --set-mm $tx verify-enabled on tx-enabled on - - # Wait for verification to time out - sleep 1 - - ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ - grep -q 'SUCCEEDED' - check_fail "$?" "Verification succeeded when it shouldn't have" - - ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' - check_fail "$?" "pMAC TX is active when it shouldn't have" - - traffic_test $tx "emac" - check_err "$?" "Traffic did not get sent through $tx's eMAC" - - ethtool --set-mm $tx verify-enabled off tx-enabled off - ethtool --set-mm $rx pmac-enabled on - - log_test "Manual configuration with failed verification: $tx to $rx" -} - -manual_failed_verification_h1_to_h2() -{ - manual_failed_verification $h1 $h2 -} - -manual_failed_verification_h2_to_h1() -{ - manual_failed_verification $h2 $h1 -} - -smallest_supported_add_frag_size() -{ - local iface=$1 - local rx_min_frag_size= - - rx_min_frag_size=$(ethtool --json --show-mm $iface | \ - jq '.[]."rx-min-frag-size"') - - if [ $rx_min_frag_size -le 60 ]; then - echo 0 - elif [ $rx_min_frag_size -le 124 ]; then - echo 1 - elif [ $rx_min_frag_size -le 188 ]; then - echo 2 - elif [ $rx_min_frag_size -le 252 ]; then - echo 3 - else - echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP" - exit 1 - fi -} - -expected_add_frag_size() -{ - local iface=$1 - local requested=$2 - local min=$(smallest_supported_add_frag_size $iface) - - [ $requested -le $min ] && echo $min || echo $requested -} - -lldp_change_add_frag_size() -{ - local add_frag_size=$1 - local pattern= - - lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null - # Wait for TLVs to be received - sleep 2 - pattern=$(printf "Additional fragment size: %d" \ - $(expected_add_frag_size $h1 $add_frag_size)) - lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern" -} - -lldp() -{ - RET=0 - - systemctl start lldpad - - # Configure the interfaces to receive and transmit LLDPDUs - lldptool -L -i $h1 adminStatus=rxtx >/dev/null - lldptool -L -i $h2 adminStatus=rxtx >/dev/null - - # Enable the transmission of Additional Ethernet Capabilities TLV - lldptool -T -i $h1 -V addEthCaps enableTx=yes >/dev/null - lldptool -T -i $h2 -V addEthCaps enableTx=yes >/dev/null - - # Wait for TLVs to be received - sleep 2 - - lldptool -i $h1 -t -n -V addEthCaps | \ - grep -q "Preemption capability active" - check_err "$?" "$h1 pMAC TX is not active" - - lldptool -i $h2 -t -n -V addEthCaps | \ - grep -q "Preemption capability active" - check_err "$?" "$h2 pMAC TX is not active" - - lldp_change_add_frag_size 3 - check_err "$?" "addFragSize 3" - - lldp_change_add_frag_size 2 - check_err "$?" "addFragSize 2" - - lldp_change_add_frag_size 1 - check_err "$?" "addFragSize 1" - - lldp_change_add_frag_size 0 - check_err "$?" "addFragSize 0" - - traffic_test $h1 "pmac" - check_err "$?" "Traffic did not get sent through $h1's pMAC" - - traffic_test $h2 "pmac" - check_err "$?" "Traffic did not get sent through $h2's pMAC" - - systemctl stop lldpad - - log_test "LLDP" -} - -h1_create() -{ - ip link set dev $h1 up - - tc qdisc add dev $h1 root mqprio num_tc 4 map 0 1 2 3 \ - queues 1@0 1@1 1@2 1@3 \ - fp P E E E \ - hw 1 - - ethtool --set-mm $h1 pmac-enabled on tx-enabled off verify-enabled off -} - -h2_create() -{ - ip link set dev $h2 up - - ethtool --set-mm $h2 pmac-enabled on tx-enabled off verify-enabled off - - tc qdisc add dev $h2 root mqprio num_tc 4 map 0 1 2 3 \ - queues 1@0 1@1 1@2 1@3 \ - fp P E E E \ - hw 1 -} - -h1_destroy() -{ - ethtool --set-mm $h1 pmac-enabled off tx-enabled off verify-enabled off - - tc qdisc del dev $h1 root - - ip link set dev $h1 down -} - -h2_destroy() -{ - tc qdisc del dev $h2 root - - ethtool --set-mm $h2 pmac-enabled off tx-enabled off verify-enabled off - - ip link set dev $h2 down -} - -setup_prepare() -{ - h1=${NETIFS[p1]} - h2=${NETIFS[p2]} - - h1_create - h2_create -} - -cleanup() -{ - pre_cleanup - - h2_destroy - h1_destroy -} - -check_ethtool_mm_support -check_tc_fp_support -require_command lldptool -bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually" - -for netif in ${NETIFS[@]}; do - ethtool --show-mm $netif 2>&1 &> /dev/null - if [[ $? -ne 0 ]]; then - echo "SKIP: $netif does not support MAC Merge" - exit $ksft_skip - fi - - if check_ethtool_pmac_std_stats_support $netif eth-mac; then - has_pmac_stats[$netif]=true - else - has_pmac_stats[$netif]=false - echo "$netif does not report pMAC statistics, falling back to aggregate" - fi -done - -trap cleanup EXIT - -setup_prepare -setup_wait - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh deleted file mode 100755 index 41a34a61f763..000000000000 --- a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh +++ /dev/null @@ -1,143 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -ALL_TESTS=" - rmon_rx_histogram - rmon_tx_histogram -" - -NUM_NETIFS=2 -source lib.sh - -ETH_FCS_LEN=4 -ETH_HLEN=$((6+6+2)) - -declare -A netif_mtu - -ensure_mtu() -{ - local iface=$1; shift - local len=$1; shift - local current=$(ip -j link show dev $iface | jq -r '.[0].mtu') - local required=$((len - ETH_HLEN - ETH_FCS_LEN)) - - if [ $current -lt $required ]; then - ip link set dev $iface mtu $required || return 1 - fi -} - -bucket_test() -{ - local iface=$1; shift - local neigh=$1; shift - local set=$1; shift - local bucket=$1; shift - local len=$1; shift - local num_rx=10000 - local num_tx=20000 - local expected= - local before= - local after= - local delta= - - # Mausezahn does not include FCS bytes in its length - but the - # histogram counters do - len=$((len - ETH_FCS_LEN)) - - before=$(ethtool --json -S $iface --groups rmon | \ - jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") - - # Send 10k one way and 20k in the other, to detect counters - # mapped to the wrong direction - $MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us - $MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us - - after=$(ethtool --json -S $iface --groups rmon | \ - jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") - - delta=$((after - before)) - - expected=$([ $set = rx ] && echo $num_rx || echo $num_tx) - - # Allow some extra tolerance for other packets sent by the stack - [ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ] -} - -rmon_histogram() -{ - local iface=$1; shift - local neigh=$1; shift - local set=$1; shift - local nbuckets=0 - local step= - - RET=0 - - while read -r -a bucket; do - step="$set-pkts${bucket[0]}to${bucket[1]} on $iface" - - for if in $iface $neigh; do - if ! ensure_mtu $if ${bucket[0]}; then - log_test_skip "$if does not support the required MTU for $step" - return - fi - done - - if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then - check_err 1 "$step failed" - return 1 - fi - log_test "$step" - nbuckets=$((nbuckets + 1)) - done < <(ethtool --json -S $iface --groups rmon | \ - jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null) - - if [ $nbuckets -eq 0 ]; then - log_test_skip "$iface does not support $set histogram counters" - return - fi -} - -rmon_rx_histogram() -{ - rmon_histogram $h1 $h2 rx - rmon_histogram $h2 $h1 rx -} - -rmon_tx_histogram() -{ - rmon_histogram $h1 $h2 tx - rmon_histogram $h2 $h1 tx -} - -setup_prepare() -{ - h1=${NETIFS[p1]} - h2=${NETIFS[p2]} - - for iface in $h1 $h2; do - netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu') - ip link set dev $iface up - done -} - -cleanup() -{ - pre_cleanup - - for iface in $h2 $h1; do - ip link set dev $iface \ - mtu ${netif_mtu[$iface]} \ - down - done -} - -check_ethtool_counter_group_support -trap cleanup EXIT - -setup_prepare -setup_wait - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index 1fc4f0242fc5..f1ca95e79a65 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -3,51 +3,28 @@ ############################################################################## # Topology description. p1 looped back to p2, p3 to p4 and so on. -declare -A NETIFS -NETIFS[p1]=veth0 -NETIFS[p2]=veth1 -NETIFS[p3]=veth2 -NETIFS[p4]=veth3 -NETIFS[p5]=veth4 -NETIFS[p6]=veth5 -NETIFS[p7]=veth6 -NETIFS[p8]=veth7 -NETIFS[p9]=veth8 -NETIFS[p10]=veth9 +NETIFS=( + [p1]=veth0 + [p2]=veth1 + [p3]=veth2 + [p4]=veth3 + [p5]=veth4 + [p6]=veth5 + [p7]=veth6 + [p8]=veth7 + [p9]=veth8 + [p10]=veth9 +) # Port that does not have a cable connected. NETIF_NO_CABLE=eth8 ############################################################################## -# Defines +# In addition to the topology-related variables, it is also possible to override +# in this file other variables that net/lib.sh, net/forwarding/lib.sh or other +# libraries or selftests use. E.g.: -# IPv4 ping utility name -PING=ping -# IPv6 ping utility name. Some distributions use 'ping' for IPv6. PING6=ping6 -# Packet generator. Some distributions use 'mz'. MZ=mausezahn -# mausezahn delay between transmissions in microseconds. -MZ_DELAY=0 -# Time to wait after interfaces participating in the test are all UP WAIT_TIME=5 -# Whether to pause on failure or not. -PAUSE_ON_FAIL=no -# Whether to pause on cleanup or not. -PAUSE_ON_CLEANUP=no -# Type of network interface to create -NETIF_TYPE=veth -# Whether to create virtual interfaces (veth) or not -NETIF_CREATE=yes -# Timeout (in seconds) before ping exits regardless of how many packets have -# been sent or received -PING_TIMEOUT=5 -# Minimum ageing_time (in centiseconds) supported by hardware -LOW_AGEING_TIME=1000 -# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process -# filter by software/hardware -TC_FLAG=skip_hw -# IPv6 traceroute utility name. -TROUTE6=traceroute6 - diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh index 9788bd0f6e8b..dda11a4a9450 100755 --- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -319,10 +319,10 @@ send_dst_ipv6() send_flowlabel() { # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec v$h1 \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 - done + ip vrf exec v$h1 sh -c \ + "for _ in {1..16384}; do \ + $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + done" } send_src_udp6() diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh deleted file mode 100755 index 48584a51388f..000000000000 --- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh +++ /dev/null @@ -1,340 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# +--------------------+ +----------------------+ -# | H1 | | H2 | -# | | | | -# | $h1.200 + | | + $h2.200 | -# | 192.0.2.1/28 | | | | 192.0.2.18/28 | -# | 2001:db8:1::1/64 | | | | 2001:db8:2::1/64 | -# | | | | | | -# | $h1 + | | + $h2 | -# | | | | | | -# +------------------|-+ +-|--------------------+ -# | | -# +------------------|-------------------------|--------------------+ -# | SW | | | -# | | | | -# | $rp1 + + $rp2 | -# | | | | -# | $rp1.200 + + $rp2.200 | -# | 192.0.2.2/28 192.0.2.17/28 | -# | 2001:db8:1::2/64 2001:db8:2::2/64 | -# | | -# +-----------------------------------------------------------------+ - -ALL_TESTS=" - ping_ipv4 - ping_ipv6 - test_stats_rx_ipv4 - test_stats_tx_ipv4 - test_stats_rx_ipv6 - test_stats_tx_ipv6 - respin_enablement - test_stats_rx_ipv4 - test_stats_tx_ipv4 - test_stats_rx_ipv6 - test_stats_tx_ipv6 - reapply_config - ping_ipv4 - ping_ipv6 - test_stats_rx_ipv4 - test_stats_tx_ipv4 - test_stats_rx_ipv6 - test_stats_tx_ipv6 - test_stats_report_rx - test_stats_report_tx - test_destroy_enabled - test_double_enable -" -NUM_NETIFS=4 -source lib.sh - -h1_create() -{ - simple_if_init $h1 - vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64 - ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 - ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 -} - -h1_destroy() -{ - ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 - ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 - vlan_destroy $h1 200 - simple_if_fini $h1 -} - -h2_create() -{ - simple_if_init $h2 - vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64 - ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 - ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2 -} - -h2_destroy() -{ - ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2 - ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 - vlan_destroy $h2 200 - simple_if_fini $h2 -} - -router_rp1_200_create() -{ - ip link add name $rp1.200 link $rp1 type vlan id 200 - ip link set dev $rp1.200 addrgenmode eui64 - ip link set dev $rp1.200 up - ip address add dev $rp1.200 192.0.2.2/28 - ip address add dev $rp1.200 2001:db8:1::2/64 - ip stats set dev $rp1.200 l3_stats on -} - -router_rp1_200_destroy() -{ - ip stats set dev $rp1.200 l3_stats off - ip address del dev $rp1.200 2001:db8:1::2/64 - ip address del dev $rp1.200 192.0.2.2/28 - ip link del dev $rp1.200 -} - -router_create() -{ - ip link set dev $rp1 up - router_rp1_200_create - - ip link set dev $rp2 up - vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64 -} - -router_destroy() -{ - vlan_destroy $rp2 200 - ip link set dev $rp2 down - - router_rp1_200_destroy - ip link set dev $rp1 down -} - -setup_prepare() -{ - h1=${NETIFS[p1]} - rp1=${NETIFS[p2]} - - rp2=${NETIFS[p3]} - h2=${NETIFS[p4]} - - rp1mac=$(mac_get $rp1) - rp2mac=$(mac_get $rp2) - - vrf_prepare - - h1_create - h2_create - - router_create - - forwarding_enable -} - -cleanup() -{ - pre_cleanup - - forwarding_restore - - router_destroy - - h2_destroy - h1_destroy - - vrf_cleanup -} - -ping_ipv4() -{ - ping_test $h1.200 192.0.2.18 " IPv4" -} - -ping_ipv6() -{ - ping_test $h1.200 2001:db8:2::1 " IPv6" -} - -send_packets_rx_ipv4() -{ - # Send 21 packets instead of 20, because the first one might trap and go - # through the SW datapath, which might not bump the HW counter. - $MZ $h1.200 -c 21 -d 20msec -p 100 \ - -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \ - -q -t udp sp=54321,dp=12345 -} - -send_packets_rx_ipv6() -{ - $MZ $h1.200 -6 -c 21 -d 20msec -p 100 \ - -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \ - -q -t udp sp=54321,dp=12345 -} - -send_packets_tx_ipv4() -{ - $MZ $h2.200 -c 21 -d 20msec -p 100 \ - -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \ - -q -t udp sp=54321,dp=12345 -} - -send_packets_tx_ipv6() -{ - $MZ $h2.200 -6 -c 21 -d 20msec -p 100 \ - -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \ - -q -t udp sp=54321,dp=12345 -} - -___test_stats() -{ - local dir=$1; shift - local prot=$1; shift - - local a - local b - - a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets) - send_packets_${dir}_${prot} - "$@" - b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ - hw_stats_get l3_stats $rp1.200 ${dir} packets) - check_err $? "Traffic not reflected in the counter: $a -> $b" -} - -__test_stats() -{ - local dir=$1; shift - local prot=$1; shift - - RET=0 - ___test_stats "$dir" "$prot" - log_test "Test $dir packets: $prot" -} - -test_stats_rx_ipv4() -{ - __test_stats rx ipv4 -} - -test_stats_tx_ipv4() -{ - __test_stats tx ipv4 -} - -test_stats_rx_ipv6() -{ - __test_stats rx ipv6 -} - -test_stats_tx_ipv6() -{ - __test_stats tx ipv6 -} - -# Make sure everything works well even after stats have been disabled and -# reenabled on the same device without touching the L3 configuration. -respin_enablement() -{ - log_info "Turning stats off and on again" - ip stats set dev $rp1.200 l3_stats off - ip stats set dev $rp1.200 l3_stats on -} - -# For the initial run, l3_stats is enabled on a completely set up netdevice. Now -# do it the other way around: enabling the L3 stats on an L2 netdevice, and only -# then apply the L3 configuration. -reapply_config() -{ - log_info "Reapplying configuration" - - router_rp1_200_destroy - - ip link add name $rp1.200 link $rp1 type vlan id 200 - ip link set dev $rp1.200 addrgenmode none - ip stats set dev $rp1.200 l3_stats on - ip link set dev $rp1.200 addrgenmode eui64 - ip link set dev $rp1.200 up - ip address add dev $rp1.200 192.0.2.2/28 - ip address add dev $rp1.200 2001:db8:1::2/64 -} - -__test_stats_report() -{ - local dir=$1; shift - local prot=$1; shift - - local a - local b - - RET=0 - - a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets) - send_packets_${dir}_${prot} - ip address flush dev $rp1.200 - b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ - hw_stats_get l3_stats $rp1.200 ${dir} packets) - check_err $? "Traffic not reflected in the counter: $a -> $b" - log_test "Test ${dir} packets: stats pushed on loss of L3" - - ip stats set dev $rp1.200 l3_stats off - ip link del dev $rp1.200 - router_rp1_200_create -} - -test_stats_report_rx() -{ - __test_stats_report rx ipv4 -} - -test_stats_report_tx() -{ - __test_stats_report tx ipv4 -} - -test_destroy_enabled() -{ - RET=0 - - ip link del dev $rp1.200 - router_rp1_200_create - - log_test "Destroy l3_stats-enabled netdev" -} - -test_double_enable() -{ - RET=0 - ___test_stats rx ipv4 \ - ip stats set dev $rp1.200 l3_stats on - log_test "Test stat retention across a spurious enablement" -} - -trap cleanup EXIT - -setup_prepare -setup_wait - -used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info | - jq '.[].info.l3_stats.used') -kind=$(ip -j -d link show dev $rp1 | - jq -r '.[].linkinfo.info_kind') -if [[ $used != true ]]; then - if [[ $kind == veth ]]; then - log_test_skip "l3_stats not offloaded on veth interface" - EXIT_STATUS=$ksft_skip - else - RET=1 log_test "l3_stats not offloaded" - fi -else - tests_run -fi - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh deleted file mode 100755 index 7594bbb49029..000000000000 --- a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# Test L3 stats on IP-in-IP GRE tunnel without key. - -# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more -# details. - -ALL_TESTS=" - ping_ipv4 - test_stats_rx - test_stats_tx -" -NUM_NETIFS=6 -source lib.sh -source ipip_lib.sh - -setup_prepare() -{ - h1=${NETIFS[p1]} - ol1=${NETIFS[p2]} - - ul1=${NETIFS[p3]} - ul2=${NETIFS[p4]} - - ol2=${NETIFS[p5]} - h2=${NETIFS[p6]} - - ol1mac=$(mac_get $ol1) - - forwarding_enable - vrf_prepare - h1_create - h2_create - sw1_flat_create gre $ol1 $ul1 - sw2_flat_create gre $ol2 $ul2 - ip stats set dev g1a l3_stats on - ip stats set dev g2a l3_stats on -} - -cleanup() -{ - pre_cleanup - - ip stats set dev g1a l3_stats off - ip stats set dev g2a l3_stats off - - sw2_flat_destroy $ol2 $ul2 - sw1_flat_destroy $ol1 $ul1 - h2_destroy - h1_destroy - - vrf_cleanup - forwarding_restore -} - -ping_ipv4() -{ - RET=0 - - ping_test $h1 192.0.2.18 " gre flat" -} - -send_packets_ipv4() -{ - # Send 21 packets instead of 20, because the first one might trap and go - # through the SW datapath, which might not bump the HW counter. - $MZ $h1 -c 21 -d 20msec -p 100 \ - -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \ - -q -t udp sp=54321,dp=12345 -} - -test_stats() -{ - local dev=$1; shift - local dir=$1; shift - - local a - local b - - RET=0 - - a=$(hw_stats_get l3_stats $dev $dir packets) - send_packets_ipv4 - b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ - hw_stats_get l3_stats $dev $dir packets) - check_err $? "Traffic not reflected in the counter: $a -> $b" - - log_test "Test $dir packets: $prot" -} - -test_stats_tx() -{ - test_stats g1a tx -} - -test_stats_rx() -{ - test_stats g2a rx -} - -skip_on_veth - -trap cleanup EXIT - -setup_prepare -setup_wait - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh index 2ab9eaaa5532..e28b4a079e52 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -321,10 +321,10 @@ send_dst_ipv6() send_flowlabel() { # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec v$h1 \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 - done + ip vrf exec v$h1 sh -c \ + "for _ in {1..16384}; do \ + $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + done" } send_src_udp6() diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh index 96c97064f2d3..becc7c3fc809 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_flat gre_mtu_change + gre_flat_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change } +gre_flat_remote_change() +{ + flat_remote_change + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 (new remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 (new remote)" + + flat_remote_restore + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 (old remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh index ff9fb0db9bd1..e5335116a2fd 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_flat gre_mtu_change + gre_flat_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change } +gre_flat_remote_change() +{ + flat_remote_change + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key (new remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key (new remote)" + + flat_remote_restore + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key (old remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh index 12c138785242..7e0cbfdefab0 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_flat gre_mtu_change + gre_flat_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_flat_remote_change() +{ + flat_remote_change + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey (new remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey (new remote)" + + flat_remote_restore + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey (old remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh index 83b55c30a5c3..e0844495f3d1 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_hier gre_mtu_change + gre_hier_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_hier_remote_change() +{ + hier_remote_change + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 (new remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 (new remote)" + + hier_remote_restore + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 (old remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh index 256607916d92..741bc9c928eb 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_hier gre_mtu_change + gre_hier_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_hier_remote_change() +{ + hier_remote_change + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key (new remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key (new remote)" + + hier_remote_restore + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key (old remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh index ad1bcd6334a8..ad9eab4b1367 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_hier gre_mtu_change + gre_hier_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_hier_remote_change() +{ + hier_remote_change + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey (new remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey (new remote)" + + hier_remote_restore + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey (old remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh index 24f4ab328bd2..2d91281dc5b7 100644 --- a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh @@ -436,3 +436,83 @@ test_mtu_change() check_err $? log_test "ping GRE IPv6, packet size 1800 after MTU change" } + +topo_flat_remote_change() +{ + local old1=$1; shift + local new1=$1; shift + local old2=$1; shift + local new2=$1; shift + + ip link set dev g1a type ip6gre local $new1 remote $new2 + __addr_add_del g1a add "$new1/128" + __addr_add_del g1a del "$old1/128" + ip -6 route add $new2/128 via 2001:db8:10::2 + ip -6 route del $old2/128 + + ip link set dev g2a type ip6gre local $new2 remote $new1 + __addr_add_del g2a add "$new2/128" + __addr_add_del g2a del "$old2/128" + ip -6 route add vrf v$ol2 $new1/128 via 2001:db8:10::1 + ip -6 route del vrf v$ol2 $old1/128 +} + +flat_remote_change() +{ + local old1=2001:db8:3::1 + local new1=2001:db8:3::10 + local old2=2001:db8:3::2 + local new2=2001:db8:3::20 + + topo_flat_remote_change $old1 $new1 $old2 $new2 +} + +flat_remote_restore() +{ + local old1=2001:db8:3::10 + local new1=2001:db8:3::1 + local old2=2001:db8:3::20 + local new2=2001:db8:3::2 + + topo_flat_remote_change $old1 $new1 $old2 $new2 +} + +topo_hier_remote_change() +{ + local old1=$1; shift + local new1=$1; shift + local old2=$1; shift + local new2=$1; shift + + __addr_add_del dummy1 del "$old1/64" + __addr_add_del dummy1 add "$new1/64" + ip link set dev g1a type ip6gre local $new1 remote $new2 + ip -6 route add vrf v$ul1 $new2/128 via 2001:db8:10::2 + ip -6 route del vrf v$ul1 $old2/128 + + __addr_add_del dummy2 del "$old2/64" + __addr_add_del dummy2 add "$new2/64" + ip link set dev g2a type ip6gre local $new2 remote $new1 + ip -6 route add vrf v$ul2 $new1/128 via 2001:db8:10::1 + ip -6 route del vrf v$ul2 $old1/128 +} + +hier_remote_change() +{ + local old1=2001:db8:3::1 + local new1=2001:db8:3::10 + local old2=2001:db8:3::2 + local new2=2001:db8:3::20 + + topo_hier_remote_change $old1 $new1 $old2 $new2 +} + +hier_remote_restore() +{ + local old1=2001:db8:3::10 + local new1=2001:db8:3::1 + local old2=2001:db8:3::20 + local new2=2001:db8:3::2 + + topo_hier_remote_change $old1 $new1 $old2 $new2 +} diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh index 30f36a57bae6..01e62c4ac94d 100644 --- a/tools/testing/selftests/net/forwarding/ipip_lib.sh +++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh @@ -141,7 +141,6 @@ # | $h2 + | # | 192.0.2.18/28 | # +---------------------------+ -source lib.sh h1_create() { diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index e579c2e0c462..508f3c700d71 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -2,68 +2,133 @@ # SPDX-License-Identifier: GPL-2.0 ############################################################################## +# Topology description. p1 looped back to p2, p3 to p4 and so on. + +declare -A NETIFS=( + [p1]=veth0 + [p2]=veth1 + [p3]=veth2 + [p4]=veth3 + [p5]=veth4 + [p6]=veth5 + [p7]=veth6 + [p8]=veth7 + [p9]=veth8 + [p10]=veth9 +) + +# Port that does not have a cable connected. +: "${NETIF_NO_CABLE:=eth8}" + +############################################################################## # Defines -# Can be overridden by the configuration file. -PING=${PING:=ping} -PING6=${PING6:=ping6} -MZ=${MZ:=mausezahn} -MZ_DELAY=${MZ_DELAY:=0} -ARPING=${ARPING:=arping} -TEAMD=${TEAMD:=teamd} -WAIT_TIME=${WAIT_TIME:=5} -PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} -PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no} -NETIF_TYPE=${NETIF_TYPE:=veth} -NETIF_CREATE=${NETIF_CREATE:=yes} -MCD=${MCD:=smcrouted} -MC_CLI=${MC_CLI:=smcroutectl} -PING_COUNT=${PING_COUNT:=10} -PING_TIMEOUT=${PING_TIMEOUT:=5} -WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} -INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600} -LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000} -REQUIRE_JQ=${REQUIRE_JQ:=yes} -REQUIRE_MZ=${REQUIRE_MZ:=yes} -REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no} -STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no} -TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=} -TROUTE6=${TROUTE6:=traceroute6} +# Networking utilities. +: "${PING:=ping}" +: "${PING6:=ping6}" # Some distros just use ping. +: "${ARPING:=arping}" +: "${TROUTE6:=traceroute6}" + +# Packet generator. +: "${MZ:=mausezahn}" # Some distributions use 'mz'. +: "${MZ_DELAY:=0}" + +# Host configuration tools. +: "${TEAMD:=teamd}" +: "${MCD:=smcrouted}" +: "${MC_CLI:=smcroutectl}" + +# Constants for netdevice bring-up: +# Default time in seconds to wait for an interface to come up before giving up +# and bailing out. Used during initial setup. +: "${INTERFACE_TIMEOUT:=600}" +# Like INTERFACE_TIMEOUT, but default for ad-hoc waiting in testing scripts. +: "${WAIT_TIMEOUT:=20}" +# Time to wait after interfaces participating in the test are all UP. +: "${WAIT_TIME:=5}" + +# Whether to pause on, respectively, after a failure and before cleanup. +: "${PAUSE_ON_CLEANUP:=no}" + +# Whether to create virtual interfaces, and what netdevice type they should be. +: "${NETIF_CREATE:=yes}" +: "${NETIF_TYPE:=veth}" + +# Constants for ping tests: +# How many packets should be sent. +: "${PING_COUNT:=10}" +# Timeout (in seconds) before ping exits regardless of how many packets have +# been sent or received +: "${PING_TIMEOUT:=5}" + +# Minimum ageing_time (in centiseconds) supported by hardware +: "${LOW_AGEING_TIME:=1000}" + +# Whether to check for availability of certain tools. +: "${REQUIRE_JQ:=yes}" +: "${REQUIRE_MZ:=yes}" +: "${REQUIRE_MTOOLS:=no}" +: "${REQUIRE_TEAMD:=no}" + +# Whether to override MAC addresses on interfaces participating in the test. +: "${STABLE_MAC_ADDRS:=no}" + +# Flags for tcpdump +: "${TCPDUMP_EXTRA_FLAGS:=}" + +# Flags for TC filters. +: "${TC_FLAG:=skip_hw}" + +# Whether the machine is "slow" -- i.e. might be incapable of running tests +# involving heavy traffic. This might be the case on a debug kernel, a VM, or +# e.g. a low-power board. +: "${KSFT_MACHINE_SLOW:=no}" -net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") +############################################################################## +# Find netifs by test-specified driver name -if [[ -f $net_forwarding_dir/forwarding.config ]]; then - source "$net_forwarding_dir/forwarding.config" -fi +driver_name_get() +{ + local dev=$1; shift + local driver_path="/sys/class/net/$dev/device/driver" -source "$net_forwarding_dir/../lib.sh" + if [[ -L $driver_path ]]; then + basename `realpath $driver_path` + fi +} -# timeout in seconds -slowwait() +netif_find_driver() { - local timeout=$1; shift + local ifnames=`ip -j link show | jq -r ".[].ifname"` + local count=0 - local start_time="$(date -u +%s)" - while true + for ifname in $ifnames do - local out - out=$("$@") - local ret=$? - if ((!ret)); then - echo -n "$out" - return 0 + local driver_name=`driver_name_get $ifname` + if [[ ! -z $driver_name && $driver_name == $NETIF_FIND_DRIVER ]]; then + count=$((count + 1)) + NETIFS[p$count]="$ifname" fi - - local current_time="$(date -u +%s)" - if ((current_time - start_time > timeout)); then - echo -n "$out" - return 1 - fi - - sleep 0.1 done } +# Whether to find netdevice according to the driver speficied by the importer +: "${NETIF_FIND_DRIVER:=}" + +if [[ $NETIF_FIND_DRIVER ]]; then + unset NETIFS + declare -A NETIFS + netif_find_driver +fi + +net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +if [[ -f $net_forwarding_dir/forwarding.config ]]; then + source "$net_forwarding_dir/forwarding.config" +fi + +source "$net_forwarding_dir/../lib.sh" + ############################################################################## # Sanity checks @@ -205,32 +270,38 @@ check_port_mab_support() fi } -skip_on_veth() +if [[ "$(id -u)" -ne 0 ]]; then + echo "SKIP: need root privileges" + exit $ksft_skip +fi + +check_driver() { - local kind=$(ip -j -d link show dev ${NETIFS[p1]} | - jq -r '.[].linkinfo.info_kind') + local dev=$1; shift + local expected=$1; shift + local driver_name=`driver_name_get $dev` - if [[ $kind == veth ]]; then - echo "SKIP: Test cannot be run with veth pairs" + if [[ $driver_name != $expected ]]; then + echo "SKIP: expected driver $expected for $dev, got $driver_name instead" exit $ksft_skip fi } -if [[ "$(id -u)" -ne 0 ]]; then - echo "SKIP: need root privileges" - exit $ksft_skip -fi - if [[ "$CHECK_TC" = "yes" ]]; then check_tc_version fi -require_command() +# IPv6 support was added in v3.0 +check_mtools_version() { - local cmd=$1; shift + local version="$(msend -v)" + local major + + version=${version##msend version } + major=$(echo $version | cut -d. -f1) - if [[ ! -x "$(command -v "$cmd")" ]]; then - echo "SKIP: $cmd not installed" + if [ $major -lt 3 ]; then + echo "SKIP: expected mtools version 3.0, got $version" exit $ksft_skip fi } @@ -241,16 +312,14 @@ fi if [[ "$REQUIRE_MZ" = "yes" ]]; then require_command $MZ fi +if [[ "$REQUIRE_TEAMD" = "yes" ]]; then + require_command $TEAMD +fi if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then - # https://github.com/vladimiroltean/mtools/ - # patched for IPv6 support + # https://github.com/troglobit/mtools require_command msend require_command mreceive -fi - -if [[ ! -v NUM_NETIFS ]]; then - echo "SKIP: importer does not define \"NUM_NETIFS\"" - exit $ksft_skip + check_mtools_version fi ############################################################################## @@ -271,6 +340,23 @@ done ############################################################################## # Network interfaces configuration +if [[ ! -v NUM_NETIFS ]]; then + echo "SKIP: importer does not define \"NUM_NETIFS\"" + exit $ksft_skip +fi + +if (( NUM_NETIFS > ${#NETIFS[@]} )); then + echo "SKIP: Importer requires $NUM_NETIFS NETIFS, but only ${#NETIFS[@]} are defined (${NETIFS[@]})" + exit $ksft_skip +fi + +for i in $(seq ${#NETIFS[@]}); do + if [[ ! ${NETIFS[p$i]} ]]; then + echo "SKIP: NETIFS[p$i] not given" + exit $ksft_skip + fi +done + create_netif_veth() { local i @@ -353,89 +439,6 @@ done ############################################################################## # Helpers -# Exit status to return at the end. Set in case one of the tests fails. -EXIT_STATUS=0 -# Per-test return value. Clear at the beginning of each test. -RET=0 - -check_err() -{ - local err=$1 - local msg=$2 - - if [[ $RET -eq 0 && $err -ne 0 ]]; then - RET=$err - retmsg=$msg - fi -} - -check_fail() -{ - local err=$1 - local msg=$2 - - if [[ $RET -eq 0 && $err -eq 0 ]]; then - RET=1 - retmsg=$msg - fi -} - -check_err_fail() -{ - local should_fail=$1; shift - local err=$1; shift - local what=$1; shift - - if ((should_fail)); then - check_fail $err "$what succeeded, but should have failed" - else - check_err $err "$what failed" - fi -} - -log_test() -{ - local test_name=$1 - local opt_str=$2 - - if [[ $# -eq 2 ]]; then - opt_str="($opt_str)" - fi - - if [[ $RET -ne 0 ]]; then - EXIT_STATUS=1 - printf "TEST: %-60s [FAIL]\n" "$test_name $opt_str" - if [[ ! -z "$retmsg" ]]; then - printf "\t%s\n" "$retmsg" - fi - if [ "${PAUSE_ON_FAIL}" = "yes" ]; then - echo "Hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 - fi - return 1 - fi - - printf "TEST: %-60s [ OK ]\n" "$test_name $opt_str" - return 0 -} - -log_test_skip() -{ - local test_name=$1 - local opt_str=$2 - - printf "TEST: %-60s [SKIP]\n" "$test_name $opt_str" - return 0 -} - -log_info() -{ - local msg=$1 - - echo "INFO: $msg" -} - not() { "$@" @@ -487,33 +490,6 @@ wait_for_trap() "$@" | grep -q trap } -until_counter_is() -{ - local expr=$1; shift - local current=$("$@") - - echo $((current)) - ((current $expr)) -} - -busywait_for_counter() -{ - local timeout=$1; shift - local delta=$1; shift - - local base=$("$@") - busywait "$timeout" until_counter_is ">= $((base + delta))" "$@" -} - -slowwait_for_counter() -{ - local timeout=$1; shift - local delta=$1; shift - - local base=$("$@") - slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@" -} - setup_wait_dev() { local dev=$1; shift @@ -562,6 +538,19 @@ setup_wait() sleep $WAIT_TIME } +wait_for_dev() +{ + local dev=$1; shift + local timeout=${1:-$WAIT_TIMEOUT}; shift + + slowwait $timeout ip link show dev $dev &> /dev/null + if (( $? )); then + check_err 1 + log_test wait_for_dev "Interface $dev did not appear." + exit $EXIT_STATUS + fi +} + cmd_jq() { local cmd=$1 @@ -819,29 +808,6 @@ link_stats_rx_errors_get() link_stats_get $1 rx errors } -tc_rule_stats_get() -{ - local dev=$1; shift - local pref=$1; shift - local dir=$1; shift - local selector=${1:-.packets}; shift - - tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \ - | jq ".[1].options.actions[].stats$selector" -} - -tc_rule_handle_stats_get() -{ - local id=$1; shift - local handle=$1; shift - local selector=${1:-.packets}; shift - local netns=${1:-""}; shift - - tc $netns -j -s filter show $id \ - | jq ".[] | select(.options.handle == $handle) | \ - .options.actions[0].stats$selector" -} - ethtool_stats_get() { local dev=$1; shift @@ -960,11 +926,37 @@ packets_rate() echo $(((t1 - t0) / interval)) } -mac_get() +ether_addr_to_u64() { - local if_name=$1 + local addr="$1" + local order="$((1 << 40))" + local val=0 + local byte + + addr="${addr//:/ }" + + for byte in $addr; do + byte="0x$byte" + val=$((val + order * byte)) + order=$((order >> 8)) + done + + printf "0x%x" $val +} + +u64_to_ether_addr() +{ + local val=$1 + local byte + local i - ip -j link show dev $if_name | jq -r '.[]["address"]' + for ((i = 40; i >= 0; i -= 8)); do + byte=$(((val & (0xff << i)) >> i)) + printf "%02x" $byte + if [ $i -ne 0 ]; then + printf ":" + fi + done } ipv6_lladdr_get() @@ -988,12 +980,19 @@ bridge_ageing_time_get() } declare -A SYSCTL_ORIG +sysctl_save() +{ + local key=$1; shift + + SYSCTL_ORIG[$key]=$(sysctl -n $key) +} + sysctl_set() { local key=$1; shift local value=$1; shift - SYSCTL_ORIG[$key]=$(sysctl -n $key) + sysctl_save "$key" sysctl -qw $key="$value" } @@ -1072,22 +1071,6 @@ trap_uninstall() tc filter del dev $dev $direction pref 1 flower } -slow_path_trap_install() -{ - # For slow-path testing, we need to install a trap to get to - # slow path the packets that would otherwise be switched in HW. - if [ "${tcflags/skip_hw}" != "$tcflags" ]; then - trap_install "$@" - fi -} - -slow_path_trap_uninstall() -{ - if [ "${tcflags/skip_hw}" != "$tcflags" ]; then - trap_uninstall "$@" - fi -} - __icmp_capture_add_del() { local add_del=$1; shift @@ -1104,22 +1087,34 @@ __icmp_capture_add_del() icmp_capture_install() { - __icmp_capture_add_del add 100 "" "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del add 100 "" "$tundev" "$filter" } icmp_capture_uninstall() { - __icmp_capture_add_del del 100 "" "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del del 100 "" "$tundev" "$filter" } icmp6_capture_install() { - __icmp_capture_add_del add 100 v6 "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del add 100 v6 "$tundev" "$filter" } icmp6_capture_uninstall() { - __icmp_capture_add_del del 100 v6 "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del del 100 v6 "$tundev" "$filter" } __vlan_capture_add_del() @@ -1137,12 +1132,18 @@ __vlan_capture_add_del() vlan_capture_install() { - __vlan_capture_add_del add 100 "$@" + local dev=$1; shift + local filter=$1; shift + + __vlan_capture_add_del add 100 "$dev" "$filter" } vlan_capture_uninstall() { - __vlan_capture_add_del del 100 "$@" + local dev=$1; shift + local filter=$1; shift + + __vlan_capture_add_del del 100 "$dev" "$filter" } __dscp_capture_add_del() @@ -1198,13 +1199,10 @@ matchall_sink_create() action drop } -tests_run() +cleanup() { - local current_test - - for current_test in ${TESTS:-$ALL_TESTS}; do - $current_test - done + pre_cleanup + defer_scopes_cleanup } multipath_eval() @@ -1502,40 +1500,68 @@ __start_traffic() local sip=$1; shift local dip=$1; shift local dmac=$1; shift + local -a mz_args=("$@") $MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \ - -a own -b $dmac -t "$proto" -q "$@" & + -a own -b $dmac -t "$proto" -q "${mz_args[@]}" & sleep 1 } start_traffic_pktsize() { local pktsize=$1; shift + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") - __start_traffic $pktsize udp "$@" + __start_traffic $pktsize udp "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } start_tcp_traffic_pktsize() { local pktsize=$1; shift + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") - __start_traffic $pktsize tcp "$@" + __start_traffic $pktsize tcp "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } start_traffic() { - start_traffic_pktsize 8000 "$@" + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") + + start_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } start_tcp_traffic() { - start_tcp_traffic_pktsize 8000 "$@" + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") + + start_tcp_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } stop_traffic() { - # Suppress noise from killing mausezahn. - { kill %% && wait %%; } 2>/dev/null + local pid=${1-%%}; shift + + kill_process "$pid" } declare -A cappid @@ -2011,6 +2037,8 @@ bail_on_lldpad() { local reason1="$1"; shift local reason2="$1"; shift + local caller=${FUNCNAME[1]} + local src=${BASH_SOURCE[1]} if systemctl is-active --quiet lldpad; then @@ -2031,7 +2059,8 @@ bail_on_lldpad() an environment variable ALLOW_LLDPAD to a non-empty string. EOF - exit 1 + log_test_skip $src:$caller + exit $EXIT_STATUS else return fi @@ -2044,3 +2073,22 @@ absval() echo $((v > 0 ? v : -v)) } + +has_unicast_flt() +{ + local dev=$1; shift + local mac_addr=$(mac_get $dev) + local tmp=$(ether_addr_to_u64 $mac_addr) + local promisc + + ip link set $dev up + ip link add link $dev name macvlan-tmp type macvlan mode private + ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1))) + ip link set macvlan-tmp up + + promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity') + + ip link del macvlan-tmp + + [[ $promisc == 1 ]] && echo "no" || echo "yes" +} diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh new file mode 100755 index 000000000000..ff2accccaf4d --- /dev/null +++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh @@ -0,0 +1,208 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This tests the operation of lib.sh itself. + +ALL_TESTS=" + test_ret + test_exit_status +" +NUM_NETIFS=0 +source lib.sh + +# Simulated checks. + +do_test() +{ + local msg=$1; shift + + "$@" + check_err $? "$msg" +} + +tpass() +{ + do_test "tpass" true +} + +tfail() +{ + do_test "tfail" false +} + +txfail() +{ + FAIL_TO_XFAIL=yes do_test "txfail" false +} + +# Simulated tests. + +pass() +{ + RET=0 + do_test "true" true + log_test "true" +} + +fail() +{ + RET=0 + do_test "false" false + log_test "false" +} + +xfail() +{ + RET=0 + FAIL_TO_XFAIL=yes do_test "xfalse" false + log_test "xfalse" +} + +skip() +{ + RET=0 + log_test_skip "skip" +} + +slow_xfail() +{ + RET=0 + xfail_on_slow do_test "slow_false" false + log_test "slow_false" +} + +# lib.sh tests. + +ret_tests_run() +{ + local t + + RET=0 + retmsg= + for t in "$@"; do + $t + done + echo "$retmsg" + return $RET +} + +ret_subtest() +{ + local expect_ret=$1; shift + local expect_retmsg=$1; shift + local -a tests=( "$@" ) + + local status_names=(pass fail xfail xpass skip) + local ret + local out + + RET=0 + + # Run this in a subshell, so that our environment is intact. + out=$(ret_tests_run "${tests[@]}") + ret=$? + + (( ret == expect_ret )) + check_err $? "RET=$ret expected $expect_ret" + + [[ $out == $expect_retmsg ]] + check_err $? "retmsg=$out expected $expect_retmsg" + + log_test "RET $(echo ${tests[@]}) -> ${status_names[$ret]}" +} + +test_ret() +{ + ret_subtest $ksft_pass "" + + ret_subtest $ksft_pass "" tpass + ret_subtest $ksft_fail "tfail" tfail + ret_subtest $ksft_xfail "txfail" txfail + + ret_subtest $ksft_pass "" tpass tpass + ret_subtest $ksft_fail "tfail" tpass tfail + ret_subtest $ksft_xfail "txfail" tpass txfail + + ret_subtest $ksft_fail "tfail" tfail tpass + ret_subtest $ksft_xfail "txfail" txfail tpass + + ret_subtest $ksft_fail "tfail" tfail tfail + ret_subtest $ksft_fail "tfail" tfail txfail + + ret_subtest $ksft_fail "tfail" txfail tfail + + ret_subtest $ksft_xfail "txfail" txfail txfail +} + +exit_status_tests_run() +{ + EXIT_STATUS=0 + tests_run > /dev/null + return $EXIT_STATUS +} + +exit_status_subtest() +{ + local expect_exit_status=$1; shift + local tests=$1; shift + local what=$1; shift + + local status_names=(pass fail xfail xpass skip) + local exit_status + local out + + RET=0 + + # Run this in a subshell, so that our environment is intact. + out=$(TESTS="$tests" exit_status_tests_run) + exit_status=$? + + (( exit_status == expect_exit_status )) + check_err $? "EXIT_STATUS=$exit_status, expected $expect_exit_status" + + log_test "EXIT_STATUS $tests$what -> ${status_names[$exit_status]}" +} + +test_exit_status() +{ + exit_status_subtest $ksft_pass ":" + + exit_status_subtest $ksft_pass "pass" + exit_status_subtest $ksft_fail "fail" + exit_status_subtest $ksft_pass "xfail" + exit_status_subtest $ksft_skip "skip" + + exit_status_subtest $ksft_pass "pass pass" + exit_status_subtest $ksft_fail "pass fail" + exit_status_subtest $ksft_pass "pass xfail" + exit_status_subtest $ksft_skip "pass skip" + + exit_status_subtest $ksft_fail "fail pass" + exit_status_subtest $ksft_pass "xfail pass" + exit_status_subtest $ksft_skip "skip pass" + + exit_status_subtest $ksft_fail "fail fail" + exit_status_subtest $ksft_fail "fail xfail" + exit_status_subtest $ksft_fail "fail skip" + + exit_status_subtest $ksft_fail "xfail fail" + exit_status_subtest $ksft_fail "skip fail" + + exit_status_subtest $ksft_pass "xfail xfail" + exit_status_subtest $ksft_skip "xfail skip" + exit_status_subtest $ksft_skip "skip xfail" + + exit_status_subtest $ksft_skip "skip skip" + + KSFT_MACHINE_SLOW=yes \ + exit_status_subtest $ksft_pass "slow_xfail" ": slow" + + KSFT_MACHINE_SLOW=no \ + exit_status_subtest $ksft_fail "slow_xfail" ": fast" +} + +trap pre_cleanup EXIT + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index c5b0cbc85b3e..ecd34f364125 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -1,11 +1,12 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="standalone bridge" +ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \ + vlan_over_vlan_unaware_bridged_port vlan_over_vlan_aware_bridged_port \ + vlan_over_vlan_unaware_bridge vlan_over_vlan_aware_bridge" NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes -REQUIRE_MZ=no source lib.sh @@ -37,9 +38,68 @@ UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05" UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06" UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07" -NON_IP_MC="01:02:03:04:05:06" -NON_IP_PKT="00:04 48:45:4c:4f" -BC="ff:ff:ff:ff:ff:ff" +PTP_1588_L2_SYNC=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00" +PTP_1588_L2_FOLLOW_UP=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c5 f1 17 97 ed f0" +PTP_1588_L2_PDELAY_REQ=" \ +01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:f7 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 06 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00" +PTP_1588_IPV4_SYNC=" \ +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \ +01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \ +02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" +PTP_1588_IPV4_FOLLOW_UP=" +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \ +01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \ +c6 0f 1d 9a 61 87" +PTP_1588_IPV4_PDELAY_REQ=" \ +01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \ +00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \ +00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_SYNC=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \ +7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_FOLLOW_UP=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \ +00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c6 2a 32 09 bd 74 00 00" +PTP_1588_IPV6_PDELAY_REQ=" \ +33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \ +5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \ +63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" # Disable promisc to ensure we don't receive unknown MAC DA packets export TCPDUMP_EXTRA_FLAGS="-pl" @@ -47,13 +107,15 @@ export TCPDUMP_EXTRA_FLAGS="-pl" h1=${NETIFS[p1]} h2=${NETIFS[p2]} -send_non_ip() +send_raw() { - local if_name=$1 - local smac=$2 - local dmac=$3 + local if_name=$1; shift + local pkt="$1"; shift + local smac=$(mac_get $if_name) + + pkt="${pkt/00:00:de:ad:be:ef/$smac}" - $MZ -q $if_name "$dmac $smac $NON_IP_PKT" + $MZ -q $if_name "$pkt" } send_uc_ipv4() @@ -68,10 +130,11 @@ send_uc_ipv4() check_rcv() { - local if_name=$1 - local type=$2 - local pattern=$3 - local should_receive=$4 + local if_name=$1; shift + local type=$1; shift + local pattern=$1; shift + local should_receive=$1; shift + local test_name="$1"; shift local should_fail= [ $should_receive = true ] && should_fail=0 || should_fail=1 @@ -81,7 +144,7 @@ check_rcv() check_err_fail "$should_fail" "$?" "reception" - log_test "$if_name: $type" + log_test "$test_name: $type" } mc_route_prepare() @@ -104,44 +167,78 @@ mc_route_destroy() run_test() { - local rcv_if_name=$1 - local smac=$(mac_get $h1) + local send_if_name=$1; shift + local rcv_if_name=$1; shift + local skip_ptp=$1; shift + local no_unicast_flt=$1; shift + local test_name="$1"; shift + local smac=$(mac_get $send_if_name) local rcv_dmac=$(mac_get $rcv_if_name) + local should_receive tcpdump_start $rcv_if_name - mc_route_prepare $h1 + mc_route_prepare $send_if_name mc_route_prepare $rcv_if_name - send_uc_ipv4 $h1 $rcv_dmac - send_uc_ipv4 $h1 $MACVLAN_ADDR - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1 + send_uc_ipv4 $send_if_name $rcv_dmac + send_uc_ipv4 $send_if_name $MACVLAN_ADDR + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR1 ip link set dev $rcv_if_name promisc on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR2 ip link set dev $rcv_if_name promisc off mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR - mc_send $h1 $JOINED_IPV4_MC_ADDR + mc_send $send_if_name $JOINED_IPV4_MC_ADDR mc_leave mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR - mc_send $h1 $JOINED_IPV6_MC_ADDR + mc_send $send_if_name $JOINED_IPV6_MC_ADDR mc_leave - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR1 ip link set dev $rcv_if_name allmulticast on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR3 ip link set dev $rcv_if_name allmulticast off mc_route_destroy $rcv_if_name - mc_route_destroy $h1 + mc_route_destroy $send_if_name + + if [ $skip_ptp = false ]; then + ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_SYNC" + send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP" + ip maddress del 01:1b:19:00:00:00 dev $rcv_if_name + + ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_PDELAY_REQ" + ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name + + mc_join $rcv_if_name 224.0.1.129 + send_raw $send_if_name "$PTP_1588_IPV4_SYNC" + send_raw $send_if_name "$PTP_1588_IPV4_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name 224.0.0.107 + send_raw $send_if_name "$PTP_1588_IPV4_PDELAY_REQ" + mc_leave + + mc_join $rcv_if_name ff0e::181 + send_raw $send_if_name "$PTP_1588_IPV6_SYNC" + send_raw $send_if_name "$PTP_1588_IPV6_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name ff02::6b + send_raw $send_if_name "$PTP_1588_IPV6_PDELAY_REQ" + mc_leave + fi sleep 1 @@ -149,55 +246,99 @@ run_test() check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \ "$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \ "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ - false + $should_receive "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, allmulti" \ + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name \ + "Unicast IPv4 to unknown MAC address, allmulti" \ "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ - false + $should_receive "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - check_rcv $rcv_if_name "Multicast IPv4 to unknown group" \ - "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ - false + xfail \ + check_rcv $rcv_if_name \ + "Multicast IPv4 to unknown group" \ + "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ + false "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to joined group" \ "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" - check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ - "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ - false + xfail \ + check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ + "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ + false "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" + + if [ $skip_ptp = false ]; then + check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + fi tcpdump_cleanup $rcv_if_name } @@ -222,62 +363,217 @@ h2_destroy() simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 } +h1_vlan_create() +{ + simple_if_init $h1 + vlan_create $h1 100 v$h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_vlan_destroy() +{ + vlan_destroy $h1 100 + simple_if_fini $h1 +} + +h2_vlan_create() +{ + simple_if_init $h2 + vlan_create $h2 100 v$h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_vlan_destroy() +{ + vlan_destroy $h2 100 + simple_if_fini $h2 +} + bridge_create() { - ip link add br0 type bridge + local vlan_filtering=$1 + + ip link add br0 type bridge vlan_filtering $vlan_filtering ip link set br0 address $BRIDGE_ADDR ip link set br0 up ip link set $h2 master br0 ip link set $h2 up - - simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 } bridge_destroy() { - simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 - ip link del br0 } -standalone() +macvlan_create() { - h1_create - h2_create + local lower=$1 - ip link add link $h2 name macvlan0 type macvlan mode private + ip link add link $lower name macvlan0 type macvlan mode private ip link set macvlan0 address $MACVLAN_ADDR ip link set macvlan0 up +} - run_test $h2 - +macvlan_destroy() +{ ip link del macvlan0 +} + +standalone() +{ + local no_unicast_flt=true + local skip_ptp=false + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + h1_create + h2_create + macvlan_create $h2 + + run_test $h1 $h2 $skip_ptp $no_unicast_flt "$h2" + + macvlan_destroy h2_destroy h1_destroy } -bridge() +test_bridge() { + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=true + h1_create - bridge_create + bridge_create $vlan_filtering + simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0 - ip link add link br0 name macvlan0 type macvlan mode private - ip link set macvlan0 address $MACVLAN_ADDR - ip link set macvlan0 up + run_test $h1 br0 $skip_ptp $no_unicast_flt \ + "vlan_filtering=$vlan_filtering bridge" - run_test br0 + macvlan_destroy + simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 + bridge_destroy + h1_destroy +} - ip link del macvlan0 +vlan_unaware_bridge() +{ + test_bridge 0 +} + +vlan_aware_bridge() +{ + test_bridge 1 +} + +test_vlan() +{ + local no_unicast_flt=true + local skip_ptp=false + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + + h1_vlan_create + h2_vlan_create + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt "VLAN upper" + macvlan_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_bridged_port() +{ + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=false + + # br_manage_promisc() will not force a single vlan_filtering port to + # promiscuous mode, so we should still expect unicast filtering to take + # place if the device can do it. + if [ $(has_unicast_flt $h2) = yes ] && [ $vlan_filtering = 1 ]; then + no_unicast_flt=false + fi + + h1_vlan_create + h2_vlan_create + bridge_create $vlan_filtering + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridged port" + + macvlan_destroy bridge_destroy - h1_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridged_port() +{ + vlan_over_bridged_port 0 +} + +vlan_over_vlan_aware_bridged_port() +{ + vlan_over_bridged_port 1 +} + +vlan_over_bridge() +{ + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=true + + h1_vlan_create + bridge_create $vlan_filtering + simple_if_init br0 + vlan_create br0 100 vbr0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0.100 + + if [ $vlan_filtering = 1 ]; then + bridge vlan add dev $h2 vid 100 master + bridge vlan add dev br0 vid 100 self + fi + + run_test $h1.100 br0.100 $skip_ptp $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridge" + + if [ $vlan_filtering = 1 ]; then + bridge vlan del dev br0 vid 100 self + bridge vlan del dev $h2 vid 100 master + fi + + macvlan_destroy + vlan_destroy br0 100 + simple_if_fini br0 + bridge_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridge() +{ + vlan_over_bridge 0 +} + +vlan_over_vlan_aware_bridge() +{ + vlan_over_bridge 1 } cleanup() { pre_cleanup + + ip link set $h2 down + ip link set $h1 down + vrf_cleanup } diff --git a/tools/testing/selftests/net/forwarding/loopback.sh b/tools/testing/selftests/net/forwarding/loopback.sh deleted file mode 100755 index 8f4057310b5b..000000000000 --- a/tools/testing/selftests/net/forwarding/loopback.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -ALL_TESTS="loopback_test" -NUM_NETIFS=2 -source tc_common.sh -source lib.sh - -h1_create() -{ - simple_if_init $h1 192.0.2.1/24 - tc qdisc add dev $h1 clsact -} - -h1_destroy() -{ - tc qdisc del dev $h1 clsact - simple_if_fini $h1 192.0.2.1/24 -} - -h2_create() -{ - simple_if_init $h2 -} - -h2_destroy() -{ - simple_if_fini $h2 -} - -loopback_test() -{ - RET=0 - - tc filter add dev $h1 ingress protocol arp pref 1 handle 101 flower \ - skip_hw arp_op reply arp_tip 192.0.2.1 action drop - - $MZ $h1 -c 1 -t arp -q - - tc_check_packets "dev $h1 ingress" 101 1 - check_fail $? "Matched on a filter without loopback setup" - - ethtool -K $h1 loopback on - check_err $? "Failed to enable loopback" - - setup_wait_dev $h1 - - $MZ $h1 -c 1 -t arp -q - - tc_check_packets "dev $h1 ingress" 101 1 - check_err $? "Did not match on filter with loopback" - - ethtool -K $h1 loopback off - check_err $? "Failed to disable loopback" - - $MZ $h1 -c 1 -t arp -q - - tc_check_packets "dev $h1 ingress" 101 2 - check_fail $? "Matched on a filter after loopback was removed" - - tc filter del dev $h1 ingress protocol arp pref 1 handle 101 flower - - log_test "loopback" -} - -setup_prepare() -{ - h1=${NETIFS[p1]} - h2=${NETIFS[p2]} - - vrf_prepare - - h1_create - h2_create - - if ethtool -k $h1 | grep loopback | grep -q fixed; then - log_test "SKIP: dev $h1 does not support loopback feature" - exit $ksft_skip - fi -} - -cleanup() -{ - pre_cleanup - - h2_destroy - h1_destroy - - vrf_cleanup -} - -trap cleanup EXIT - -setup_prepare -setup_wait - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/min_max_mtu.sh b/tools/testing/selftests/net/forwarding/min_max_mtu.sh new file mode 100755 index 000000000000..97bb8b221bed --- /dev/null +++ b/tools/testing/selftests/net/forwarding/min_max_mtu.sh @@ -0,0 +1,283 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +# | H1 | +# | | +# | $h1.10 + | +# | 192.0.2.2/24 | | +# | 2001:db8:1::2/64 | | +# | | | +# | $h1 + | +# | | | +# +------------------|-+ +# | +# +------------------|-+ +# | SW | | +# | $swp1 + | +# | | | +# | $swp1.10 + | +# | 192.0.2.1/24 | +# | 2001:db8:1::1/64 | +# | | +# +--------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + max_mtu_config_test + max_mtu_traffic_test + min_mtu_config_test + min_mtu_traffic_test +" + +NUM_NETIFS=2 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.2/24 2001:db8:1::2/64 +} + +h1_destroy() +{ + vlan_destroy $h1 10 192.0.2.2/24 2001:db8:1::2/64 + simple_if_fini $h1 +} + +switch_create() +{ + ip li set dev $swp1 up + vlan_create $swp1 10 "" 192.0.2.1/24 2001:db8:1::1/64 +} + +switch_destroy() +{ + ip li set dev $swp1 down + vlan_destroy $swp1 10 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + + switch_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.10 192.0.2.1 +} + +ping_ipv6() +{ + ping6_test $h1.10 2001:db8:1::1 +} + +min_max_mtu_get_if() +{ + local dev=$1; shift + local min_max=$1; shift + + ip -d -j link show $dev | jq ".[].$min_max" +} + +ensure_compatible_min_max_mtu() +{ + local min_max=$1; shift + + local mtu=$(min_max_mtu_get_if ${NETIFS[p1]} $min_max) + local i + + for ((i = 2; i <= NUM_NETIFS; ++i)); do + local current_mtu=$(min_max_mtu_get_if ${NETIFS[p$i]} $min_max) + + if [ $current_mtu -ne $mtu ]; then + return 1 + fi + done +} + +mtu_set_if() +{ + local dev=$1; shift + local mtu=$1; shift + local should_fail=${1:-0}; shift + + mtu_set $dev $mtu 2>/dev/null + check_err_fail $should_fail $? "Set MTU $mtu for $dev" +} + +mtu_set_all_if() +{ + local mtu=$1; shift + local i + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + mtu_set_if ${NETIFS[p$i]} $mtu + mtu_set_if ${NETIFS[p$i]}.10 $mtu + done +} + +mtu_restore_all_if() +{ + local i + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + mtu_restore ${NETIFS[p$i]}.10 + mtu_restore ${NETIFS[p$i]} + done +} + +mtu_test_ping4() +{ + local mtu=$1; shift + local should_fail=$1; shift + + # Ping adds 8 bytes for ICMP header and 20 bytes for IP header + local ping_headers_len=$((20 + 8)) + local pkt_size=$((mtu - ping_headers_len)) + + ping_do $h1.10 192.0.2.1 "-s $pkt_size -M do" + check_err_fail $should_fail $? "Ping, packet size: $pkt_size" +} + +mtu_test_ping6() +{ + local mtu=$1; shift + local should_fail=$1; shift + + # Ping adds 8 bytes for ICMP header and 40 bytes for IPv6 header + local ping6_headers_len=$((40 + 8)) + local pkt_size=$((mtu - ping6_headers_len)) + + ping6_do $h1.10 2001:db8:1::1 "-s $pkt_size -M do" + check_err_fail $should_fail $? "Ping6, packet size: $pkt_size" +} + +max_mtu_config_test() +{ + local i + + RET=0 + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + local dev=${NETIFS[p$i]} + local max_mtu=$(min_max_mtu_get_if $dev "max_mtu") + local should_fail + + should_fail=0 + mtu_set_if $dev $max_mtu $should_fail + mtu_restore $dev + + should_fail=1 + mtu_set_if $dev $((max_mtu + 1)) $should_fail + mtu_restore $dev + done + + log_test "Test maximum MTU configuration" +} + +max_mtu_traffic_test() +{ + local should_fail + local max_mtu + + RET=0 + + if ! ensure_compatible_min_max_mtu "max_mtu"; then + log_test_xfail "Topology has incompatible maximum MTU values" + return + fi + + max_mtu=$(min_max_mtu_get_if ${NETIFS[p1]} "max_mtu") + + should_fail=0 + mtu_set_all_if $max_mtu + mtu_test_ping4 $max_mtu $should_fail + mtu_test_ping6 $max_mtu $should_fail + mtu_restore_all_if + + should_fail=1 + mtu_set_all_if $((max_mtu - 1)) + mtu_test_ping4 $max_mtu $should_fail + mtu_test_ping6 $max_mtu $should_fail + mtu_restore_all_if + + log_test "Test traffic, packet size is maximum MTU" +} + +min_mtu_config_test() +{ + local i + + RET=0 + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + local dev=${NETIFS[p$i]} + local min_mtu=$(min_max_mtu_get_if $dev "min_mtu") + local should_fail + + should_fail=0 + mtu_set_if $dev $min_mtu $should_fail + mtu_restore $dev + + should_fail=1 + mtu_set_if $dev $((min_mtu - 1)) $should_fail + mtu_restore $dev + done + + log_test "Test minimum MTU configuration" +} + +min_mtu_traffic_test() +{ + local should_fail=0 + local min_mtu + + RET=0 + + if ! ensure_compatible_min_max_mtu "min_mtu"; then + log_test_xfail "Topology has incompatible minimum MTU values" + return + fi + + min_mtu=$(min_max_mtu_get_if ${NETIFS[p1]} "min_mtu") + mtu_set_all_if $min_mtu + mtu_test_ping4 $min_mtu $should_fail + # Do not test minimum MTU with IPv6, as IPv6 requires higher MTU. + + mtu_restore_all_if + + log_test "Test traffic, packet size is minimum MTU" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh index 0266443601bc..921c733ee04f 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh @@ -74,7 +74,7 @@ test_span_gre_mac() RET=0 - mirror_install $swp1 $direction $tundev "matchall $tcflags" + mirror_install $swp1 $direction $tundev "matchall" icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac" mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10 @@ -82,29 +82,29 @@ test_span_gre_mac() icmp_capture_uninstall h3-${tundev} mirror_uninstall $swp1 $direction - log_test "$direction $what: envelope MAC ($tcflags)" + log_test "$direction $what: envelope MAC" } test_two_spans() { RET=0 - mirror_install $swp1 ingress gt4 "matchall $tcflags" - mirror_install $swp1 egress gt6 "matchall $tcflags" - quick_test_span_gre_dir gt4 ingress - quick_test_span_gre_dir gt6 egress + mirror_install $swp1 ingress gt4 "matchall" + mirror_install $swp1 egress gt6 "matchall" + quick_test_span_gre_dir gt4 8 0 + quick_test_span_gre_dir gt6 0 8 mirror_uninstall $swp1 ingress - fail_test_span_gre_dir gt4 ingress - quick_test_span_gre_dir gt6 egress + fail_test_span_gre_dir gt4 8 0 + quick_test_span_gre_dir gt6 0 8 - mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_install $swp1 ingress gt4 "matchall" mirror_uninstall $swp1 egress - quick_test_span_gre_dir gt4 ingress - fail_test_span_gre_dir gt6 egress + quick_test_span_gre_dir gt4 8 0 + fail_test_span_gre_dir gt6 0 8 mirror_uninstall $swp1 ingress - log_test "two simultaneously configured mirrors ($tcflags)" + log_test "two simultaneously configured mirrors" } test_gretap() @@ -131,30 +131,11 @@ test_ip6gretap_mac() test_span_gre_mac gt6 egress "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh index 6c257ec03756..e3cd48e18eeb 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh @@ -196,32 +196,11 @@ test_ip6gretap() full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL" } -test_all() -{ - RET=0 - - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh index 04fd14b0a9b7..6c7bd33332c2 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh @@ -108,30 +108,11 @@ test_ip6gretap() full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh index f35313c76fac..909ec956a5e5 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -104,30 +104,11 @@ test_ip6gretap_stp() full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh index 0cf4c47a46f9..40ac9dd3aff1 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh @@ -104,30 +104,11 @@ test_ip6gretap() full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" } -tests() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -tests - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - tests -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index c53148b1dc63..a20d22d1df36 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -49,6 +49,7 @@ ALL_TESTS=" test_mirror_gretap_second " +REQUIRE_TEAMD="yes" NUM_NETIFS=6 source lib.sh source mirror_lib.sh @@ -227,10 +228,10 @@ test_lag_slave() RET=0 tc filter add dev $swp1 ingress pref 999 \ - proto 802.1q flower vlan_ethtype arp $tcflags \ + proto 802.1q flower vlan_ethtype arp \ action pass mirror_install $swp1 ingress gt4 \ - "proto 802.1q flower vlan_id 333 $tcflags" + "proto 802.1q flower vlan_id 333" # Test connectivity through $up_dev when $down_dev is set down. ip link set dev $down_dev down @@ -239,7 +240,7 @@ test_lag_slave() setup_wait_dev $host_dev $ARPING -I br1 192.0.2.130 -qfc 1 sleep 2 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 10 + mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10" # Test lack of connectivity when both slaves are down. ip link set dev $up_dev down @@ -252,7 +253,7 @@ test_lag_slave() mirror_uninstall $swp1 ingress tc filter del dev $swp1 ingress pref 999 - log_test "$what ($tcflags)" + log_test "$what" } test_mirror_gretap_first() @@ -265,30 +266,11 @@ test_mirror_gretap_second() test_lag_slave $h4 $swp4 $swp3 "mirror to gretap: LAG second slave" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh index 5ea9d63915f7..65ae9d960c18 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -73,7 +73,7 @@ test_span_gre_ttl() RET=0 mirror_install $swp1 ingress $tundev \ - "prot ip flower $tcflags ip_prot icmp" + "prot ip flower ip_prot icmp" tc filter add dev $h3 ingress pref 77 prot $prot \ flower skip_hw ip_ttl 50 action pass @@ -81,13 +81,13 @@ test_span_gre_ttl() ip link set dev $tundev type $type ttl 50 sleep 2 - mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 ">= 10" ip link set dev $tundev type $type ttl 100 tc filter del dev $h3 ingress pref 77 mirror_uninstall $swp1 ingress - log_test "$what: TTL change ($tcflags)" + log_test "$what: TTL change" } test_span_gre_tun_up() @@ -98,15 +98,15 @@ test_span_gre_tun_up() RET=0 ip link set dev $tundev down - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev up - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: tunnel down/up ($tcflags)" + log_test "$what: tunnel down/up" } test_span_gre_egress_up() @@ -118,8 +118,8 @@ test_span_gre_egress_up() RET=0 ip link set dev $swp3 down - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev # After setting the device up, wait for neighbor to get resolved so that # we can expect mirroring to work. @@ -127,10 +127,10 @@ test_span_gre_egress_up() setup_wait_dev $swp3 ping -c 1 -I $swp3 $remote_ip &>/dev/null - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: egress down/up ($tcflags)" + log_test "$what: egress down/up" } test_span_gre_remote_ip() @@ -144,14 +144,14 @@ test_span_gre_remote_ip() RET=0 ip link set dev $tundev type $type remote $wrong_ip - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev type $type remote $correct_ip - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: remote address change ($tcflags)" + log_test "$what: remote address change" } test_span_gre_tun_del() @@ -165,10 +165,10 @@ test_span_gre_tun_del() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev ip link del dev $tundev - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev tunnel_create $tundev $type $local_ip $remote_ip \ ttl 100 tos inherit $flags @@ -176,11 +176,11 @@ test_span_gre_tun_del() # Recreating the tunnel doesn't reestablish mirroring, so reinstall it # and verify it works for the follow-up tests. mirror_uninstall $swp1 ingress - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: tunnel deleted ($tcflags)" + log_test "$what: tunnel deleted" } test_span_gre_route_del() @@ -192,18 +192,18 @@ test_span_gre_route_del() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev ip route del $route dev $edev - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev ip route add $route dev $edev - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: underlay route removal ($tcflags)" + log_test "$what: underlay route removal" } test_ttl() @@ -244,30 +244,11 @@ test_route_del() test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh index 09389f3b9369..3a84f3ab5856 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh @@ -64,12 +64,19 @@ cleanup() test_span_gre_dir_acl() { - test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 + local tundev=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + test_span_gre_dir_ips "$tundev" "$forward_type" \ + "$backward_type" 192.0.2.3 192.0.2.4 } fail_test_span_gre_dir_acl() { - fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 + local tundev=$1; shift + + fail_test_span_gre_dir_ips "$tundev" 192.0.2.3 192.0.2.4 } full_test_span_gre_dir_acl() @@ -84,16 +91,15 @@ full_test_span_gre_dir_acl() RET=0 mirror_install $swp1 $direction $tundev \ - "protocol ip flower $tcflags dst_ip $match_dip" - fail_test_span_gre_dir $tundev $direction - test_span_gre_dir_acl "$tundev" "$direction" \ - "$forward_type" "$backward_type" + "protocol ip flower dst_ip $match_dip" + fail_test_span_gre_dir $tundev + test_span_gre_dir_acl "$tundev" "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction # Test lack of mirroring after ACL mirror is uninstalled. - fail_test_span_gre_dir_acl "$tundev" "$direction" + fail_test_span_gre_dir_acl "$tundev" - log_test "$direction $what ($tcflags)" + log_test "$direction $what" } test_gretap() @@ -108,30 +114,11 @@ test_ip6gretap() full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh index 9edf4cb104a8..ff7049582d35 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh @@ -37,8 +37,14 @@ # | \ / | # | \____________________________________________/ | # | | | -# | + lag2 (team) | -# | 192.0.2.130/28 | +# | + lag2 (team) ------> + gt4-dst (gretap) | +# | 192.0.2.130/28 loc=192.0.2.130 | +# | rem=192.0.2.129 | +# | ttl=100 | +# | tos=inherit | +# | | +# | | +# | | # | | # +---------------------------------------------------------------------------+ @@ -47,12 +53,10 @@ ALL_TESTS=" test_mirror_gretap_second " +REQUIRE_TEAMD="yes" NUM_NETIFS=6 source lib.sh source mirror_lib.sh -source mirror_gre_lib.sh - -require_command $ARPING vlan_host_create() { @@ -122,16 +126,21 @@ h3_create() { vrf_create vrf-h3 ip link set dev vrf-h3 up - tc qdisc add dev $h3 clsact - tc qdisc add dev $h4 clsact h3_create_team + + tunnel_create gt4-dst gretap 192.0.2.130 192.0.2.129 \ + ttl 100 tos inherit + ip link set dev gt4-dst master vrf-h3 + tc qdisc add dev gt4-dst clsact } h3_destroy() { + tc qdisc del dev gt4-dst clsact + ip link set dev gt4-dst nomaster + tunnel_destroy gt4-dst + h3_destroy_team - tc qdisc del dev $h4 clsact - tc qdisc del dev $h3 clsact ip link set dev vrf-h3 down vrf_destroy vrf-h3 } @@ -188,18 +197,12 @@ setup_prepare() h2_create h3_create switch_create - - trap_install $h3 ingress - trap_install $h4 ingress } cleanup() { pre_cleanup - trap_uninstall $h4 ingress - trap_uninstall $h3 ingress - switch_destroy h3_destroy h2_destroy @@ -218,7 +221,8 @@ test_lag_slave() RET=0 mirror_install $swp1 ingress gt4 \ - "proto 802.1q flower vlan_id 333 $tcflags" + "proto 802.1q flower vlan_id 333" + vlan_capture_install gt4-dst "vlan_ethtype ipv4 ip_proto icmp type 8" # Move $down_dev away from the team. That will prompt change in # txability of the connected device, without changing its upness. The @@ -226,13 +230,14 @@ test_lag_slave() # other slave. ip link set dev $down_dev nomaster sleep 2 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $up_dev 1 10 + mirror_test vrf-h1 192.0.2.1 192.0.2.18 gt4-dst 100 10 # Test lack of connectivity when neither slave is txable. ip link set dev $up_dev nomaster sleep 2 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0 + mirror_test vrf-h1 192.0.2.1 192.0.2.18 gt4-dst 100 0 + + vlan_capture_uninstall gt4-dst mirror_uninstall $swp1 ingress # Recreate H3's team device, because mlxsw, which this test is @@ -243,7 +248,7 @@ test_lag_slave() # Wait for ${h,swp}{3,4}. setup_wait - log_test "$what ($tcflags)" + log_test "$what" } test_mirror_gretap_first() @@ -256,30 +261,11 @@ test_mirror_gretap_second() test_lag_slave $h4 $h3 "mirror to gretap: LAG second slave" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh index 0c36546e131e..20078cc55f24 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh @@ -5,22 +5,34 @@ source "$net_forwarding_dir/mirror_lib.sh" quick_test_span_gre_dir_ips() { local tundev=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift - do_test_span_dir_ips 10 h3-$tundev "$@" + do_test_span_dir_ips 10 h3-$tundev "$ip1" "$ip2" \ + "$forward_type" "$backward_type" } fail_test_span_gre_dir_ips() { local tundev=$1; shift + local ip1=$1; shift + local ip2=$1; shift - do_test_span_dir_ips 0 h3-$tundev "$@" + do_test_span_dir_ips 0 h3-$tundev "$ip1" "$ip2" } test_span_gre_dir_ips() { local tundev=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local ip1=$1; shift + local ip2=$1; shift - test_span_dir_ips h3-$tundev "$@" + test_span_dir_ips h3-$tundev "$forward_type" \ + "$backward_type" "$ip1" "$ip2" } full_test_span_gre_dir_ips() @@ -35,12 +47,12 @@ full_test_span_gre_dir_ips() RET=0 - mirror_install $swp1 $direction $tundev "matchall $tcflags" - test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + mirror_install $swp1 $direction $tundev "matchall" + test_span_dir_ips "h3-$tundev" "$forward_type" \ "$backward_type" "$ip1" "$ip2" mirror_uninstall $swp1 $direction - log_test "$direction $what ($tcflags)" + log_test "$direction $what" } full_test_span_gre_dir_vlan_ips() @@ -56,45 +68,63 @@ full_test_span_gre_dir_vlan_ips() RET=0 - mirror_install $swp1 $direction $tundev "matchall $tcflags" + mirror_install $swp1 $direction $tundev "matchall" - test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + test_span_dir_ips "h3-$tundev" "$forward_type" \ "$backward_type" "$ip1" "$ip2" tc filter add dev $h3 ingress pref 77 prot 802.1q \ flower $vlan_match \ action pass - mirror_test v$h1 $ip1 $ip2 $h3 77 10 + mirror_test v$h1 $ip1 $ip2 $h3 77 '>= 10' tc filter del dev $h3 ingress pref 77 mirror_uninstall $swp1 $direction - log_test "$direction $what ($tcflags)" + log_test "$direction $what" } quick_test_span_gre_dir() { - quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift + + quick_test_span_gre_dir_ips "$tundev" 192.0.2.1 192.0.2.2 \ + "$forward_type" "$backward_type" } fail_test_span_gre_dir() { - fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 -} + local tundev=$1; shift -test_span_gre_dir() -{ - test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 + fail_test_span_gre_dir_ips "$tundev" 192.0.2.1 192.0.2.2 } full_test_span_gre_dir() { - full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + + full_test_span_gre_dir_ips "$tundev" "$direction" "$forward_type" \ + "$backward_type" "$what" 192.0.2.1 192.0.2.2 } full_test_span_gre_dir_vlan() { - full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local direction=$1; shift + local vlan_match=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + + full_test_span_gre_dir_vlan_ips "$tundev" "$direction" "$vlan_match" \ + "$forward_type" "$backward_type" \ + "$what" 192.0.2.1 192.0.2.2 } full_test_span_gre_stp_ips() @@ -104,27 +134,39 @@ full_test_span_gre_stp_ips() local what=$1; shift local ip1=$1; shift local ip2=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift local h3mac=$(mac_get $h3) RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir_ips $tundev $ip1 $ip2 \ + "$forward_type" "$backward_type" bridge link set dev $nbpdev state disabled sleep 1 - fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + fail_test_span_gre_dir_ips $tundev $ip1 $ip2 bridge link set dev $nbpdev state forwarding sleep 1 - quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + quick_test_span_gre_dir_ips $tundev $ip1 $ip2 \ + "$forward_type" "$backward_type" mirror_uninstall $swp1 ingress - log_test "$what: STP state ($tcflags)" + log_test "$what: STP state" } full_test_span_gre_stp() { - full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local nbpdev=$1; shift + local what=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift + + full_test_span_gre_stp_ips "$tundev" "$nbpdev" "$what" \ + 192.0.2.1 192.0.2.2 \ + "$forward_type" "$backward_type" } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh index fc0508e40fca..2cbfbecf25c8 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh @@ -60,41 +60,32 @@ test_span_gre_neigh() local addr=$1; shift local tundev=$1; shift local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift local what=$1; shift RET=0 ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55 - mirror_install $swp1 $direction $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 $direction $tundev "matchall" + fail_test_span_gre_dir $tundev "$forward_type" "$backward_type" ip neigh del dev $swp3 $addr - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction - log_test "$direction $what: neighbor change ($tcflags)" + log_test "$direction $what: neighbor change" } test_gretap() { - test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap" - test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap" + test_span_gre_neigh 192.0.2.130 gt4 ingress 8 0 "mirror to gretap" + test_span_gre_neigh 192.0.2.130 gt4 egress 0 8 "mirror to gretap" } test_ip6gretap() { - test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap" - test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap" -} - -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress + test_span_gre_neigh 2001:db8:2::2 gt6 ingress 8 0 "mirror to ip6gretap" + test_span_gre_neigh 2001:db8:2::2 gt6 egress 0 8 "mirror to ip6gretap" } trap cleanup EXIT @@ -102,14 +93,6 @@ trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh index 6f9ef1820e93..34bc646938e3 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh @@ -75,42 +75,31 @@ cleanup() test_gretap() { RET=0 - mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_install $swp1 ingress gt4 "matchall" # For IPv4, test that there's no mirroring without the route directing # the traffic to tunnel remote address. Then add it and test that # mirroring starts. For IPv6 we can't test this due to the limitation # that routes for locally-specified IPv6 addresses can't be added. - fail_test_span_gre_dir gt4 ingress + fail_test_span_gre_dir gt4 ip route add 192.0.2.130/32 via 192.0.2.162 - quick_test_span_gre_dir gt4 ingress + quick_test_span_gre_dir gt4 ip route del 192.0.2.130/32 via 192.0.2.162 mirror_uninstall $swp1 ingress - log_test "mirror to gre with next-hop remote ($tcflags)" + log_test "mirror to gre with next-hop remote" } test_ip6gretap() { RET=0 - mirror_install $swp1 ingress gt6 "matchall $tcflags" - quick_test_span_gre_dir gt6 ingress + mirror_install $swp1 ingress gt6 "matchall" + quick_test_span_gre_dir gt6 mirror_uninstall $swp1 ingress - log_test "mirror to ip6gre with next-hop remote ($tcflags)" -} - -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress + log_test "mirror to ip6gre with next-hop remote" } trap cleanup EXIT @@ -118,14 +107,6 @@ trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh index 88cecdb9a861..63689928cb51 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh @@ -63,30 +63,11 @@ test_gretap() full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index c8a9b5bd841f..1b902cc579f6 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -153,21 +153,21 @@ test_span_gre_forbidden_cpu() RET=0 # Run the pass-test first, to prime neighbor table. - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev # Now forbid the VLAN at the bridge and see it fail. bridge vlan del dev br1 vid 555 self sleep 1 - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev bridge vlan add dev br1 vid 555 self sleep 1 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: vlan forbidden at a bridge ($tcflags)" + log_test "$what: vlan forbidden at a bridge" } test_gretap_forbidden_cpu() @@ -187,22 +187,22 @@ test_span_gre_forbidden_egress() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev bridge vlan del dev $swp3 vid 555 sleep 1 - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev bridge vlan add dev $swp3 vid 555 # Re-prime FDB $ARPING -I br1.555 192.0.2.130 -fqc 1 sleep 1 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: vlan forbidden at a bridge egress ($tcflags)" + log_test "$what: vlan forbidden at a bridge egress" } test_gretap_forbidden_egress() @@ -223,30 +223,30 @@ test_span_gre_untagged_egress() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" + mirror_install $swp1 ingress $tundev "matchall" - quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_gre_dir $tundev + quick_test_span_vlan_dir $h3 555 "$ul_proto" h3_addr_add_del del $h3.555 bridge vlan add dev $swp3 vid 555 pvid untagged h3_addr_add_del add $h3 sleep 5 - quick_test_span_gre_dir $tundev ingress - fail_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_gre_dir $tundev + fail_test_span_vlan_dir $h3 555 "$ul_proto" h3_addr_add_del del $h3 bridge vlan add dev $swp3 vid 555 h3_addr_add_del add $h3.555 sleep 5 - quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_gre_dir $tundev + quick_test_span_vlan_dir $h3 555 "$ul_proto" mirror_uninstall $swp1 ingress - log_test "$what: vlan untagged at a bridge egress ($tcflags)" + log_test "$what: vlan untagged at a bridge egress" } test_gretap_untagged_egress() @@ -267,19 +267,19 @@ test_span_gre_fdb_roaming() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev while ((RET == 0)); do bridge fdb del dev $swp3 $h3mac vlan 555 master 2>/dev/null bridge fdb add dev $swp2 $h3mac vlan 555 master static sleep 1 - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev if ! bridge fdb sh dev $swp2 vlan 555 master \ | grep -q $h3mac; then printf "TEST: %-60s [RETRY]\n" \ - "$what: MAC roaming ($tcflags)" + "$what: MAC roaming" # ARP or ND probably reprimed the FDB while the test # was running. We would get a spurious failure. RET=0 @@ -292,11 +292,11 @@ test_span_gre_fdb_roaming() # Re-prime FDB $ARPING -I br1.555 192.0.2.130 -fqc 1 sleep 1 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: MAC roaming ($tcflags)" + log_test "$what: MAC roaming" } test_gretap_fdb_roaming() @@ -319,30 +319,11 @@ test_ip6gretap_stp() full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index 3e8ebeff3019..6bf9d5ae933c 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -44,14 +44,17 @@ mirror_test() local type="icmp echoreq" fi + if [[ -z ${expect//[[:digit:]]/} ]]; then + expect="== $expect" + fi + local t0=$(tc_rule_stats_get $dev $pref) $MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ -c 10 -d 100msec -t $type sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) - # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) + ((delta $expect)) check_err $? "Expected to capture $expect packets, got $delta." } @@ -59,36 +62,42 @@ do_test_span_dir_ips() { local expect=$1; shift local dev=$1; shift - local direction=$1; shift local ip1=$1; shift local ip2=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift - icmp_capture_install $dev + icmp_capture_install $dev "type $forward_type" mirror_test v$h1 $ip1 $ip2 $dev 100 $expect + icmp_capture_uninstall $dev + + icmp_capture_install $dev "type $backward_type" mirror_test v$h2 $ip2 $ip1 $dev 100 $expect icmp_capture_uninstall $dev } quick_test_span_dir_ips() { - do_test_span_dir_ips 10 "$@" -} + local dev=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift -fail_test_span_dir_ips() -{ - do_test_span_dir_ips 0 "$@" + do_test_span_dir_ips 10 "$dev" "$ip1" "$ip2" \ + "$forward_type" "$backward_type" } test_span_dir_ips() { local dev=$1; shift - local direction=$1; shift local forward_type=$1; shift local backward_type=$1; shift local ip1=$1; shift local ip2=$1; shift - quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2" + quick_test_span_dir_ips "$dev" "$ip1" "$ip2" \ + "$forward_type" "$backward_type" icmp_capture_install $dev "type $forward_type" mirror_test v$h1 $ip1 $ip2 $dev 100 10 @@ -99,14 +108,14 @@ test_span_dir_ips() icmp_capture_uninstall $dev } -fail_test_span_dir() -{ - fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 -} - test_span_dir() { - test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 + local dev=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + test_span_dir_ips "$dev" "$forward_type" "$backward_type" \ + 192.0.2.1 192.0.2.2 } do_test_span_vlan_dir_ips() @@ -114,7 +123,6 @@ do_test_span_vlan_dir_ips() local expect=$1; shift local dev=$1; shift local vid=$1; shift - local direction=$1; shift local ul_proto=$1; shift local ip1=$1; shift local ip2=$1; shift @@ -123,27 +131,50 @@ do_test_span_vlan_dir_ips() # The traffic is meant for local box anyway, so will be trapped to # kernel. vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype $ul_proto" - mirror_test v$h1 $ip1 $ip2 $dev 100 $expect - mirror_test v$h2 $ip2 $ip1 $dev 100 $expect + mirror_test v$h1 $ip1 $ip2 $dev 100 "$expect" + mirror_test v$h2 $ip2 $ip1 $dev 100 "$expect" vlan_capture_uninstall $dev } quick_test_span_vlan_dir_ips() { - do_test_span_vlan_dir_ips 10 "$@" + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + do_test_span_vlan_dir_ips '>= 10' "$dev" "$vid" "$ul_proto" \ + "$ip1" "$ip2" } fail_test_span_vlan_dir_ips() { - do_test_span_vlan_dir_ips 0 "$@" + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + do_test_span_vlan_dir_ips 0 "$dev" "$vid" "$ul_proto" "$ip1" "$ip2" } quick_test_span_vlan_dir() { - quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + + quick_test_span_vlan_dir_ips "$dev" "$vid" "$ul_proto" \ + 192.0.2.1 192.0.2.2 } fail_test_span_vlan_dir() { - fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + + fail_test_span_vlan_dir_ips "$dev" "$vid" "$ul_proto" \ + 192.0.2.1 192.0.2.2 } diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh index 0b44e148235e..2f150a414d38 100755 --- a/tools/testing/selftests/net/forwarding/mirror_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh @@ -40,12 +40,16 @@ setup_prepare() vlan_create $h2 111 v$h2 192.0.2.18/28 bridge vlan add dev $swp2 vid 111 + + trap_install $h3 ingress } cleanup() { pre_cleanup + trap_uninstall $h3 ingress + vlan_destroy $h2 111 vlan_destroy $h1 111 vlan_destroy $h3 555 @@ -63,11 +67,11 @@ test_vlan_dir() RET=0 - mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" - test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type" + mirror_install $swp1 $direction $swp3.555 "matchall" + test_span_dir "$h3.555" "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction - log_test "$direction mirror to vlan ($tcflags)" + log_test "$direction mirror to vlan" } test_vlan() @@ -84,14 +88,12 @@ test_tagged_vlan_dir() RET=0 - mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" - do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" ip \ - 192.0.2.17 192.0.2.18 - do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" ip \ - 192.0.2.17 192.0.2.18 + mirror_install $swp1 $direction $swp3.555 "matchall" + do_test_span_vlan_dir_ips '>= 10' "$h3.555" 111 ip 192.0.2.17 192.0.2.18 + do_test_span_vlan_dir_ips 0 "$h3.555" 555 ip 192.0.2.17 192.0.2.18 mirror_uninstall $swp1 $direction - log_test "$direction mirror tagged to vlan ($tcflags)" + log_test "$direction mirror tagged to vlan" } test_tagged_vlan() @@ -100,32 +102,11 @@ test_tagged_vlan() test_tagged_vlan_dir egress 0 8 } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - trap_install $h3 ingress - - tests_run - - trap_uninstall $h3 ingress - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh index af3b398d13f0..694ece9ba3a7 100755 --- a/tools/testing/selftests/net/forwarding/no_forwarding.sh +++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh @@ -202,7 +202,7 @@ one_bridge_two_pvids() ip link set $swp2 master br0 bridge vlan add dev $swp1 vid 1 pvid untagged - bridge vlan add dev $swp1 vid 2 pvid untagged + bridge vlan add dev $swp2 vid 2 pvid untagged run_test "Switch ports in VLAN-aware bridge with different PVIDs" @@ -233,6 +233,9 @@ cleanup() { pre_cleanup + ip link set dev $swp2 down + ip link set dev $swp1 down + h2_destroy h1_destroy diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh index e064b946e821..16583a470ec3 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh @@ -109,6 +109,7 @@ ALL_TESTS=" ping_ipv4 ping_ipv6 " +REQUIRE_TEAMD="yes" NUM_NETIFS=8 source lib.sh diff --git a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh index f05ffe213c46..2a4cd1af1b85 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh @@ -76,6 +76,7 @@ ping_ipv4 ping_ipv6 "} +REQUIRE_TEAMD="yes" NUM_NETIFS=8 : ${lib_dir:=.} source $lib_dir/lib.sh diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index 3f0f5dc95542..a7d8399c8d4f 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -1,10 +1,46 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.2/24 | | +# | 2001:db8:1::2/64 | | +# +-------------------|-----+ +# | +# +-------------------|----------------------+ +# | | R1 | +# | $rp11 + | +# | 192.0.2.1/24 | +# | 2001:db8:1::1/64 | +# | | +# | + $rp12 + $rp13 | +# | | 169.254.2.12/24 | 169.254.3.13/24 | +# | | fe80:2::12/64 | fe80:3::13/64 | +# +--|--------------------|------------------+ +# | | +# +--|--------------------|------------------+ +# | + $rp22 + $rp23 | +# | 169.254.2.22/24 169.254.3.23/24 | +# | fe80:2::22/64 fe80:3::23/64 | +# | | +# | $rp21 + | +# | 198.51.100.1/24 | | +# | 2001:db8:2::1/64 | R2 | +# +-------------------|----------------------+ +# | +# +-------------------|-----+ +# | | | +# | $h2 + | +# | 198.51.100.2/24 | +# | 2001:db8:2::2/64 H2 | +# +-------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 multipath_test + multipath16_test ping_ipv4_blackhole ping_ipv6_blackhole nh_stats_test_v4 @@ -191,9 +227,11 @@ routing_nh_obj() multipath4_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -207,7 +245,8 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -223,9 +262,11 @@ multipath4_test() multipath6_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -240,7 +281,8 @@ multipath6_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -278,6 +320,23 @@ multipath_test() multipath6_test "Weighted MP 11:45" 11 45 } +multipath16_test() +{ + check_nhgw16 104 || return + + log_info "Running 16-bit IPv4 multipath tests" + multipath4_test "65535:65535" 65535 65535 + multipath4_test "128:512" 128 512 + omit_on_slow \ + multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 + + log_info "Running 16-bit IPv6 multipath tests" + multipath6_test "65535:65535" 65535 65535 + multipath6_test "128:512" 128 512 + omit_on_slow \ + multipath6_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 +} + ping_ipv4_blackhole() { RET=0 diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh index 7e7d62161c34..507b2852dabe 100644 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh @@ -56,21 +56,12 @@ nh_stats_test_dispatch_swhw() local group_id=$1; shift local mz="$@" - local used - nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \ nh_stats_get "${mz[@]}" - used=$(ip -s -j -d nexthop show id $group_id | - jq '.[].hw_stats.used') - kind=$(ip -j -d link show dev $rp11 | - jq -r '.[].linkinfo.info_kind') - if [[ $used == true ]]; then + xfail_on_veth $rp11 \ nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \ nh_stats_get_hw "${mz[@]}" - elif [[ $kind == veth ]]; then - log_test_skip "HW stats not offloaded on veth topology" - fi } nh_stats_test_dispatch() @@ -83,7 +74,6 @@ nh_stats_test_dispatch() local mz="$@" local enabled - local kind if ! ip nexthop help 2>&1 | grep -q hw_stats; then log_test_skip "NH stats test: ip doesn't support HW stats" @@ -127,3 +117,16 @@ __nh_stats_test_v6() $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2 sysctl_restore net.ipv6.fib_multipath_hash_policy } + +check_nhgw16() +{ + local nhid=$1; shift + + ip nexthop replace id 9999 group "$nhid,65535" &>/dev/null + if (( $? )); then + log_test_skip "16-bit multipath tests" \ + "iproute2 or the kernel do not support 16-bit next hop weights" + return 1 + fi + ip nexthop del id 9999 ||: +} diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh index 4b483d24ad00..88ddae05b39d 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh @@ -1,10 +1,46 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.2/24 | | +# | 2001:db8:1::2/64 | | +# +-------------------|-----+ +# | +# +-------------------|----------------------+ +# | | R1 | +# | $rp11 + | +# | 192.0.2.1/24 | +# | 2001:db8:1::1/64 | +# | | +# | + $rp12 + $rp13 | +# | | 169.254.2.12/24 | 169.254.3.13/24 | +# | | fe80:2::12/64 | fe80:3::13/64 | +# +--|--------------------|------------------+ +# | | +# +--|--------------------|------------------+ +# | + $rp22 + $rp23 | +# | 169.254.2.22/24 169.254.3.23/24 | +# | fe80:2::22/64 fe80:3::23/64 | +# | | +# | $rp21 + | +# | 198.51.100.1/24 | | +# | 2001:db8:2::1/64 | R2 | +# +-------------------|----------------------+ +# | +# +-------------------|-----+ +# | | | +# | $h2 + | +# | 198.51.100.2/24 | +# | 2001:db8:2::2/64 H2 | +# +-------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 multipath_test + multipath16_test nh_stats_test_v4 nh_stats_test_v6 " @@ -193,9 +229,11 @@ routing_nh_obj() multipath4_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -208,7 +246,8 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -223,9 +262,11 @@ multipath4_test() multipath6_l4_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -238,7 +279,8 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -336,6 +378,41 @@ multipath_test() ip nexthop replace id 106 group 104,1/105,1 type resilient } +multipath16_test() +{ + check_nhgw16 104 || return + + log_info "Running 16-bit IPv4 multipath tests" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 0 + + ip nexthop replace id 103 group 101,65535/102,65535 type resilient + multipath4_test "65535:65535" 65535 65535 + + ip nexthop replace id 103 group 101,128/102,512 type resilient + multipath4_test "128:512" 128 512 + + ip nexthop replace id 103 group 101,255/102,65535 type resilient + omit_on_slow \ + multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running 16-bit IPv6 L4 hash multipath tests" + ip nexthop replace id 106 group 104/105 type resilient idle_timer 0 + + ip nexthop replace id 106 group 104,65535/105,65535 type resilient + multipath6_l4_test "65535:65535" 65535 65535 + + ip nexthop replace id 106 group 104,128/105,512 type resilient + multipath6_l4_test "128:512" 128 512 + + ip nexthop replace id 106 group 104,255/105,65535 type resilient + omit_on_slow \ + multipath6_l4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 + + ip nexthop replace id 106 group 104,1/105,1 type resilient +} + nh_stats_test_v4() { __nh_stats_test_v4 resilient diff --git a/tools/testing/selftests/net/forwarding/router_mpath_seed.sh b/tools/testing/selftests/net/forwarding/router_mpath_seed.sh new file mode 100755 index 000000000000..314cb906c1eb --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_mpath_seed.sh @@ -0,0 +1,333 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-------------------------+ +-------------------------+ +# | H1 | | H2 | +# | $h1 + | | + $h2 | +# | 192.0.2.1/28 | | | | 192.0.2.34/28 | +# | 2001:db8:1::1/64 | | | | 2001:db8:3::2/64 | +# +-------------------|-----+ +-|-----------------------+ +# | | +# +-------------------|-----+ +-|-----------------------+ +# | R1 | | | | R2 | +# | $rp11 + | | + $rp21 | +# | 192.0.2.2/28 | | 192.0.2.33/28 | +# | 2001:db8:1::2/64 | | 2001:db8:3::1/64 | +# | | | | +# | $rp12 + | | + $rp22 | +# | 192.0.2.17/28 | | | | 192.0.2.18..27/28 | +# | 2001:db8:2::17/64 | | | | 2001:db8:2::18..27/64 | +# +-------------------|-----+ +-|-----------------------+ +# | | +# `----------' + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + test_mpath_seed_stability_ipv4 + test_mpath_seed_stability_ipv6 + test_mpath_seed_get + test_mpath_seed_ipv4 + test_mpath_seed_ipv6 +" +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + ip -4 route add 192.0.2.32/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del 192.0.2.32/28 vrf v$h1 nexthop via 192.0.2.2 + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.34/28 2001:db8:3::2/64 + ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.33 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:3::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:3::1 + ip -4 route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.33 + simple_if_fini $h2 192.0.2.34/28 2001:db8:3::2/64 +} + +router1_create() +{ + simple_if_init $rp11 192.0.2.2/28 2001:db8:1::2/64 + __simple_if_init $rp12 v$rp11 192.0.2.17/28 2001:db8:2::17/64 +} + +router1_destroy() +{ + __simple_if_fini $rp12 192.0.2.17/28 2001:db8:2::17/64 + simple_if_fini $rp11 192.0.2.2/28 2001:db8:1::2/64 +} + +router2_create() +{ + simple_if_init $rp21 192.0.2.33/28 2001:db8:3::1/64 + __simple_if_init $rp22 v$rp21 192.0.2.18/28 2001:db8:2::18/64 + ip -4 route add 192.0.2.0/28 vrf v$rp21 nexthop via 192.0.2.17 + ip -6 route add 2001:db8:1::/64 vrf v$rp21 nexthop via 2001:db8:2::17 +} + +router2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$rp21 nexthop via 2001:db8:2::17 + ip -4 route del 192.0.2.0/28 vrf v$rp21 nexthop via 192.0.2.17 + __simple_if_fini $rp22 192.0.2.18/28 2001:db8:2::18/64 + simple_if_fini $rp21 192.0.2.33/28 2001:db8:3::1/64 +} + +nexthops_create() +{ + local i + for i in $(seq 10); do + ip nexthop add id $((1000 + i)) via 192.0.2.18 dev $rp12 + ip nexthop add id $((2000 + i)) via 2001:db8:2::18 dev $rp12 + done + + ip nexthop add id 1000 group $(seq -s / 1001 1010) hw_stats on + ip nexthop add id 2000 group $(seq -s / 2001 2010) hw_stats on + ip -4 route add 192.0.2.32/28 vrf v$rp11 nhid 1000 + ip -6 route add 2001:db8:3::/64 vrf v$rp11 nhid 2000 +} + +nexthops_destroy() +{ + local i + + ip -6 route del 2001:db8:3::/64 vrf v$rp11 nhid 2000 + ip -4 route del 192.0.2.32/28 vrf v$rp11 nhid 1000 + ip nexthop del id 2000 + ip nexthop del id 1000 + + for i in $(seq 10 -1 1); do + ip nexthop del id $((2000 + i)) + ip nexthop del id $((1000 + i)) + done +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp11=${NETIFS[p2]} + + rp12=${NETIFS[p3]} + rp22=${NETIFS[p4]} + + rp21=${NETIFS[p5]} + h2=${NETIFS[p6]} + + sysctl_save net.ipv4.fib_multipath_hash_seed + + vrf_prepare + + h1_create + h2_create + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + nexthops_destroy + router2_destroy + router1_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + sysctl_restore net.ipv4.fib_multipath_hash_seed +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:3::2 +} + +test_mpath_seed_get() +{ + RET=0 + + local i + for ((i = 0; i < 100; i++)); do + local seed_w=$((999331 * i)) + sysctl -qw net.ipv4.fib_multipath_hash_seed=$seed_w + local seed_r=$(sysctl -n net.ipv4.fib_multipath_hash_seed) + ((seed_r == seed_w)) + check_err $? "mpath seed written as $seed_w, but read as $seed_r" + done + + log_test "mpath seed set/get" +} + +nh_stats_snapshot() +{ + local group_id=$1; shift + + ip -j -s -s nexthop show id $group_id | + jq -c '[.[].group_stats | sort_by(.id) | .[].packets]' +} + +get_active_nh() +{ + local s0=$1; shift + local s1=$1; shift + + jq -n --argjson s0 "$s0" --argjson s1 "$s1" -f /dev/stdin <<-"EOF" + [range($s0 | length)] | + map($s1[.] - $s0[.]) | + map(if . > 8 then 1 else 0 end) | + index(1) + EOF +} + +probe_nh() +{ + local group_id=$1; shift + local -a mz=("$@") + + local s0=$(nh_stats_snapshot $group_id) + "${mz[@]}" + local s1=$(nh_stats_snapshot $group_id) + + get_active_nh "$s0" "$s1" +} + +probe_seed() +{ + local group_id=$1; shift + local seed=$1; shift + local -a mz=("$@") + + sysctl -qw net.ipv4.fib_multipath_hash_seed=$seed + probe_nh "$group_id" "${mz[@]}" +} + +test_mpath_seed() +{ + local group_id=$1; shift + local what=$1; shift + local -a mz=("$@") + local ii + + RET=0 + + local -a tally=(0 0 0 0 0 0 0 0 0 0) + for ((ii = 0; ii < 100; ii++)); do + local act=$(probe_seed $group_id $((999331 * ii)) "${mz[@]}") + ((tally[act]++)) + done + + local tally_str="${tally[@]}" + for ((ii = 0; ii < ${#tally[@]}; ii++)); do + ((tally[ii] > 0)) + check_err $? "NH #$ii not hit, tally='$tally_str'" + done + + log_test "mpath seed $what" + sysctl -qw net.ipv4.fib_multipath_hash_seed=0 +} + +test_mpath_seed_ipv4() +{ + test_mpath_seed 1000 IPv4 \ + $MZ $h1 -A 192.0.2.1 -B 192.0.2.34 -q \ + -p 64 -d 0 -c 10 -t udp +} + +test_mpath_seed_ipv6() +{ + test_mpath_seed 2000 IPv6 \ + $MZ -6 $h1 -A 2001:db8:1::1 -B 2001:db8:3::2 -q \ + -p 64 -d 0 -c 10 -t udp +} + +check_mpath_seed_stability() +{ + local seed=$1; shift + local act_0=$1; shift + local act_1=$1; shift + + ((act_0 == act_1)) + check_err $? "seed $seed: active NH moved from $act_0 to $act_1 after seed change" +} + +test_mpath_seed_stability() +{ + local group_id=$1; shift + local what=$1; shift + local -a mz=("$@") + + RET=0 + + local seed_0=0 + local seed_1=3221338814 + local seed_2=3735928559 + + # Initial active NH before touching the seed at all. + local act_ini=$(probe_nh $group_id "${mz[@]}") + + local act_0_0=$(probe_seed $group_id $seed_0 "${mz[@]}") + local act_1_0=$(probe_seed $group_id $seed_1 "${mz[@]}") + local act_2_0=$(probe_seed $group_id $seed_2 "${mz[@]}") + + local act_0_1=$(probe_seed $group_id $seed_0 "${mz[@]}") + local act_1_1=$(probe_seed $group_id $seed_1 "${mz[@]}") + local act_2_1=$(probe_seed $group_id $seed_2 "${mz[@]}") + + check_mpath_seed_stability initial $act_ini $act_0_0 + check_mpath_seed_stability $seed_0 $act_0_0 $act_0_1 + check_mpath_seed_stability $seed_1 $act_1_0 $act_1_1 + check_mpath_seed_stability $seed_2 $act_2_0 $act_2_1 + + log_test "mpath seed stability $what" + sysctl -qw net.ipv4.fib_multipath_hash_seed=0 +} + +test_mpath_seed_stability_ipv4() +{ + test_mpath_seed_stability 1000 IPv4 \ + $MZ $h1 -A 192.0.2.1 -B 192.0.2.34 -q \ + -p 64 -d 0 -c 10 -t udp +} + +test_mpath_seed_stability_ipv6() +{ + test_mpath_seed_stability 2000 IPv6 \ + $MZ -6 $h1 -A 2001:db8:1::1 -B 2001:db8:3::2 -q \ + -p 64 -d 0 -c 10 -t udp +} + +trap cleanup EXIT + +setup_prepare +setup_wait +nexthops_create + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index e2be354167a1..46f365b557b7 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -180,6 +180,7 @@ multipath4_test() ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -217,6 +218,7 @@ multipath6_test() $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh index f3a53738bdcc..92904b01eae9 100755 --- a/tools/testing/selftests/net/forwarding/router_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_nh.sh @@ -1,6 +1,20 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +-------------------------+ +-------------------------+ +# | H1 | | H2 | +# | $h1 + | | $h2 + | +# | 192.0.2.2/24 | | | 198.51.100.2/24 | | +# | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 | | +# +-------------------|-----+ +-------------------|-----+ +# | | +# +-------------------|----------------------------|-----+ +# | R1 | | | +# | $rp1 + $rp2 + | +# | 192.0.2.1/24 198.51.100.1/24 | +# | 2001:db8:1::1/64 2001:db8:2::1/64 | +# +------------------------------------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index e60c8b4818cc..1f6f53e284b5 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -24,15 +24,10 @@ switch_create() # Create a bottleneck so that the DWRR process can kick in. tc qdisc add dev $swp2 root handle 1: tbf \ rate 1Gbit burst 1Mbit latency 100ms + defer tc qdisc del dev $swp2 root PARENT="parent 1:" } -switch_destroy() -{ - ets_switch_destroy - tc qdisc del dev $swp2 root -} - # Callback from sch_ets_tests.sh collect_stats() { diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh index f906fcc66572..8f9922c695b0 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh @@ -166,44 +166,32 @@ h1_create() local i; simple_if_init $h1 + defer simple_if_fini $h1 + mtu_set $h1 9900 + defer mtu_restore $h1 + for i in {0..2}; do vlan_create $h1 1$i v$h1 $(sip $i)/28 + defer vlan_destroy $h1 1$i ip link set dev $h1.1$i type vlan egress 0:$i done } -h1_destroy() -{ - local i - - for i in {0..2}; do - vlan_destroy $h1 1$i - done - mtu_restore $h1 - simple_if_fini $h1 -} - h2_create() { local i simple_if_init $h2 - mtu_set $h2 9900 - for i in {0..2}; do - vlan_create $h2 1$i v$h2 $(dip $i)/28 - done -} + defer simple_if_fini $h2 -h2_destroy() -{ - local i + mtu_set $h2 9900 + defer mtu_restore $h2 for i in {0..2}; do - vlan_destroy $h2 1$i + vlan_create $h2 1$i v$h2 $(dip $i)/28 + defer vlan_destroy $h2 1$i done - mtu_restore $h2 - simple_if_fini $h2 } ets_switch_create() @@ -211,44 +199,45 @@ ets_switch_create() local i ip link set dev $swp1 up + defer ip link set dev $swp1 down + mtu_set $swp1 9900 + defer mtu_restore $swp1 ip link set dev $swp2 up + defer ip link set dev $swp2 down + mtu_set $swp2 9900 + defer mtu_restore $swp2 for i in {0..2}; do vlan_create $swp1 1$i + defer vlan_destroy $swp1 1$i ip link set dev $swp1.1$i type vlan ingress 0:0 1:1 2:2 vlan_create $swp2 1$i + defer vlan_destroy $swp2 1$i ip link add dev br1$i type bridge + defer ip link del dev br1$i + ip link set dev $swp1.1$i master br1$i + defer ip link set dev $swp1.1$i nomaster + ip link set dev $swp2.1$i master br1$i + defer ip link set dev $swp2.1$i nomaster ip link set dev br1$i up - ip link set dev $swp1.1$i up - ip link set dev $swp2.1$i up - done -} + defer ip link set dev br1$i down -ets_switch_destroy() -{ - local i - - ets_delete_qdisc + ip link set dev $swp1.1$i up + defer ip link set dev $swp1.1$i down - for i in {0..2}; do - ip link del dev br1$i - vlan_destroy $swp2 1$i - vlan_destroy $swp1 1$i + ip link set dev $swp2.1$i up + defer ip link set dev $swp2.1$i down done - mtu_restore $swp2 - ip link set dev $swp2 down - - mtu_restore $swp1 - ip link set dev $swp1 down + defer ets_delete_qdisc } setup_prepare() @@ -263,23 +252,13 @@ setup_prepare() hut=$h2 vrf_prepare + defer vrf_cleanup h1_create h2_create switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1.10 $(dip 0) " vlan 10" diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index cdf689e99458..08240d3e3c87 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -90,6 +90,7 @@ __ets_dwrr_test() for stream in ${streams[@]}; do ets_start_traffic $stream + defer stop_traffic $! done sleep 10 @@ -120,25 +121,24 @@ __ets_dwrr_test() ${d[0]} ${d[$i]} fi done - - for stream in ${streams[@]}; do - stop_traffic - done } ets_dwrr_test_012() { - __ets_dwrr_test 0 1 2 + in_defer_scope \ + __ets_dwrr_test 0 1 2 } ets_dwrr_test_01() { - __ets_dwrr_test 0 1 + in_defer_scope \ + __ets_dwrr_test 0 1 } ets_dwrr_test_12() { - __ets_dwrr_test 1 2 + in_defer_scope \ + __ets_dwrr_test 1 2 } ets_qdisc_setup() @@ -199,25 +199,28 @@ ets_set_dwrr_two_bands() ets_test_strict() { ets_set_strict - ets_dwrr_test_01 - ets_dwrr_test_12 + xfail_on_slow ets_dwrr_test_01 + xfail_on_slow ets_dwrr_test_12 } ets_test_mixed() { ets_set_mixed - ets_dwrr_test_01 - ets_dwrr_test_12 + xfail_on_slow ets_dwrr_test_01 + xfail_on_slow ets_dwrr_test_12 } ets_test_dwrr() { ets_set_dwrr_uniform - ets_dwrr_test_012 + xfail_on_slow ets_dwrr_test_012 + ets_set_dwrr_varying - ets_dwrr_test_012 + xfail_on_slow ets_dwrr_test_012 + ets_change_quantum - ets_dwrr_test_012 + xfail_on_slow ets_dwrr_test_012 + ets_set_dwrr_two_bands - ets_dwrr_test_01 + xfail_on_slow ets_dwrr_test_01 } diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh index 81f31179ac88..af166662b78a 100755 --- a/tools/testing/selftests/net/forwarding/sch_red.sh +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -53,71 +53,63 @@ PKTSZ=1400 h1_create() { simple_if_init $h1 192.0.2.1/28 + defer simple_if_fini $h1 192.0.2.1/28 + mtu_set $h1 10000 + defer mtu_restore $h1 + tc qdisc replace dev $h1 root handle 1: tbf \ rate 10Mbit burst 10K limit 1M -} - -h1_destroy() -{ - tc qdisc del dev $h1 root - mtu_restore $h1 - simple_if_fini $h1 192.0.2.1/28 + defer tc qdisc del dev $h1 root } h2_create() { simple_if_init $h2 192.0.2.2/28 - mtu_set $h2 10000 -} + defer simple_if_fini $h2 192.0.2.2/28 -h2_destroy() -{ - mtu_restore $h2 - simple_if_fini $h2 192.0.2.2/28 + mtu_set $h2 10000 + defer mtu_restore $h2 } h3_create() { simple_if_init $h3 192.0.2.3/28 - mtu_set $h3 10000 -} + defer simple_if_fini $h3 192.0.2.3/28 -h3_destroy() -{ - mtu_restore $h3 - simple_if_fini $h3 192.0.2.3/28 + mtu_set $h3 10000 + defer mtu_restore $h3 } switch_create() { ip link add dev br up type bridge + defer ip link del dev br + ip link set dev $swp1 up master br + defer ip link set dev $swp1 down nomaster + ip link set dev $swp2 up master br + defer ip link set dev $swp2 down nomaster + ip link set dev $swp3 up master br + defer ip link set dev $swp3 down nomaster mtu_set $swp1 10000 + defer mtu_restore $h1 + mtu_set $swp2 10000 + defer mtu_restore $h2 + mtu_set $swp3 10000 + defer mtu_restore $h3 tc qdisc replace dev $swp3 root handle 1: tbf \ rate 10Mbit burst 10K limit 1M - ip link add name _drop_test up type dummy -} + defer tc qdisc del dev $swp3 root -switch_destroy() -{ - ip link del dev _drop_test - tc qdisc del dev $swp3 root - - mtu_restore $h3 - mtu_restore $h2 - mtu_restore $h1 - - ip link set dev $swp3 down nomaster - ip link set dev $swp2 down nomaster - ip link set dev $swp1 down nomaster - ip link del dev br + ip link add name _drop_test up type dummy + defer ip link del dev _drop_test } setup_prepare() @@ -134,6 +126,7 @@ setup_prepare() h3_mac=$(mac_get $h3) vrf_prepare + defer vrf_cleanup h1_create h2_create @@ -141,18 +134,6 @@ setup_prepare() switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h3_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1 192.0.2.3 " from host 1" @@ -287,6 +268,7 @@ do_ecn_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! sleep 1 ecn_test_common "$name" $limit @@ -298,9 +280,6 @@ do_ecn_test() build_backlog $((2 * limit)) udp >/dev/null check_fail $? "UDP traffic went into backlog instead of being early-dropped" log_test "$name backlog > limit: UDP early-dropped" - - stop_traffic - sleep 1 } do_ecn_nodrop_test() @@ -310,6 +289,7 @@ do_ecn_nodrop_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! sleep 1 ecn_test_common "$name" $limit @@ -321,9 +301,6 @@ do_ecn_nodrop_test() build_backlog $((2 * limit)) udp >/dev/null check_err $? "UDP traffic was early-dropped instead of getting into backlog" log_test "$name backlog > limit: UDP not dropped" - - stop_traffic - sleep 1 } do_red_test() @@ -336,6 +313,7 @@ do_red_test() # is above limit. $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! # Pushing below the queue limit should work. RET=0 @@ -352,9 +330,6 @@ do_red_test() pct=$(check_marking "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." log_test "RED backlog > limit" - - stop_traffic - sleep 1 } do_red_qevent_test() @@ -369,6 +344,7 @@ do_red_qevent_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t udp -q & + defer stop_traffic $! sleep 1 tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ @@ -396,9 +372,6 @@ do_red_qevent_test() check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen" log_test "RED early_dropped packets mirrored" - - stop_traffic - sleep 1 } do_ecn_qevent_test() @@ -410,6 +383,7 @@ do_ecn_qevent_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! sleep 1 tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ @@ -428,9 +402,6 @@ do_ecn_qevent_test() tc filter del block 10 pref 1234 handle 102 matchall log_test "ECN marked packets mirrored" - - stop_traffic - sleep 1 } install_qdisc() @@ -451,36 +422,36 @@ uninstall_qdisc() ecn_test() { install_qdisc ecn - do_ecn_test $BACKLOG - uninstall_qdisc + defer uninstall_qdisc + xfail_on_slow do_ecn_test $BACKLOG } ecn_nodrop_test() { install_qdisc ecn nodrop - do_ecn_nodrop_test $BACKLOG - uninstall_qdisc + defer uninstall_qdisc + xfail_on_slow do_ecn_nodrop_test $BACKLOG } red_test() { install_qdisc - do_red_test $BACKLOG - uninstall_qdisc + defer uninstall_qdisc + xfail_on_slow do_red_test $BACKLOG } red_qevent_test() { install_qdisc qevent early_drop block 10 - do_red_qevent_test $BACKLOG - uninstall_qdisc + defer uninstall_qdisc + xfail_on_slow do_red_qevent_test $BACKLOG } ecn_qevent_test() { install_qdisc ecn qevent mark block 10 - do_ecn_qevent_test $BACKLOG - uninstall_qdisc + defer uninstall_qdisc + xfail_on_slow do_ecn_qevent_test $BACKLOG } trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh index d1f26cb7cd73..ec309a5086bc 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -60,68 +60,65 @@ host_create() local host=$1; shift simple_if_init $dev + defer simple_if_fini $dev + mtu_set $dev 10000 + defer mtu_restore $dev vlan_create $dev 10 v$dev $(ipaddr $host 10)/28 + defer vlan_destroy $dev 10 ip link set dev $dev.10 type vlan egress 0:0 vlan_create $dev 11 v$dev $(ipaddr $host 11)/28 + defer vlan_destroy $dev 11 ip link set dev $dev.11 type vlan egress 0:1 } -host_destroy() -{ - local dev=$1; shift - - vlan_destroy $dev 11 - vlan_destroy $dev 10 - mtu_restore $dev - simple_if_fini $dev -} - h1_create() { host_create $h1 1 } -h1_destroy() -{ - host_destroy $h1 -} - h2_create() { host_create $h2 2 tc qdisc add dev $h2 clsact + defer tc qdisc del dev $h2 clsact + tc filter add dev $h2 ingress pref 1010 prot 802.1q \ flower $TCFLAGS vlan_id 10 action pass tc filter add dev $h2 ingress pref 1011 prot 802.1q \ flower $TCFLAGS vlan_id 11 action pass } -h2_destroy() -{ - tc qdisc del dev $h2 clsact - host_destroy $h2 -} - switch_create() { local intf local vlan ip link add dev br10 type bridge + defer ip link del dev br10 + ip link add dev br11 type bridge + defer ip link del dev br11 for intf in $swp1 $swp2; do ip link set dev $intf up + defer ip link set dev $intf down + mtu_set $intf 10000 + defer mtu_restore $intf for vlan in 10 11; do vlan_create $intf $vlan + defer vlan_destroy $intf $vlan + ip link set dev $intf.$vlan master br$vlan + defer ip link set dev $intf.$vlan nomaster + ip link set dev $intf.$vlan up + defer ip link set dev $intf.$vlan down done done @@ -130,34 +127,10 @@ switch_create() done ip link set dev br10 up - ip link set dev br11 up -} - -switch_destroy() -{ - local intf - local vlan - - # A test may have been interrupted mid-run, with Qdisc installed. Delete - # it here. - tc qdisc del dev $swp2 root 2>/dev/null - - ip link set dev br11 down - ip link set dev br10 down + defer ip link set dev br10 down - for intf in $swp2 $swp1; do - for vlan in 11 10; do - ip link set dev $intf.$vlan down - ip link set dev $intf.$vlan nomaster - vlan_destroy $intf $vlan - done - - mtu_restore $intf - ip link set dev $intf down - done - - ip link del dev br11 - ip link del dev br10 + ip link set dev br11 up + defer ip link set dev br11 down } setup_prepare() @@ -177,23 +150,13 @@ setup_prepare() h2_mac=$(mac_get $h2) vrf_prepare + defer vrf_cleanup h1_create h2_create switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1.10 $(ipaddr 2 10) " vlan 10" @@ -207,18 +170,18 @@ tbf_get_counter() tc_rule_stats_get $h2 10$vlan ingress .bytes } -do_tbf_test() +__tbf_test() { local vlan=$1; shift local mbit=$1; shift start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 2 $vlan) $h2_mac + defer stop_traffic $! sleep 5 # Wait for the burst to dwindle local t2=$(busywait_for_counter 1000 +1 tbf_get_counter $vlan) sleep 10 local t3=$(tbf_get_counter $vlan) - stop_traffic RET=0 @@ -227,7 +190,13 @@ do_tbf_test() local nr=$(rate $t2 $t3 10) local nr_pct=$((100 * (nr - er) / er)) ((-5 <= nr_pct && nr_pct <= 5)) - check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%." + xfail_on_slow check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%." log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit" } + +do_tbf_test() +{ + in_defer_scope \ + __tbf_test "$@" +} diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh index df9bcd6a811a..c182a04282bc 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh @@ -30,8 +30,9 @@ tbf_test() # This test is used for both ETS and PRIO. Even though we only need two # bands, PRIO demands a minimum of three. tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0 + defer tc qdisc del dev $swp2 root + tbf_test_one 128K - tc qdisc del dev $swp2 root } tbf_root_test() @@ -42,6 +43,8 @@ tbf_root_test() tc qdisc replace dev $swp2 root handle 1: \ tbf rate 400Mbit burst $bs limit 1M + defer tc qdisc del dev $swp2 root + tc qdisc replace dev $swp2 parent 1:1 handle 10: \ $QDISC 3 priomap 2 1 0 tc qdisc replace dev $swp2 parent 10:3 handle 103: \ @@ -53,8 +56,6 @@ tbf_root_test() do_tbf_test 10 400 $bs do_tbf_test 11 400 $bs - - tc qdisc del dev $swp2 root } if type -t sch_tbf_pre_hook >/dev/null; then diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh index 96c997be0d03..9f20320f8d84 100755 --- a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh @@ -14,13 +14,14 @@ tbf_test_one() tc qdisc replace dev $swp2 root handle 108: tbf \ rate 400Mbit burst $bs limit 1M + defer tc qdisc del dev $swp2 root + do_tbf_test 10 400 $bs } tbf_test() { tbf_test_one 128K - tc qdisc del dev $swp2 root } if type -t sch_tbf_pre_hook >/dev/null; then diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index 589629636502..ea89e558672d 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -4,7 +4,8 @@ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ gact_trap_test mirred_egress_to_ingress_test \ - mirred_egress_to_ingress_tcp_test" + mirred_egress_to_ingress_tcp_test \ + ingress_2nd_vlan_push egress_2nd_vlan_push" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -244,6 +245,49 @@ mirred_egress_to_ingress_tcp_test() log_test "mirred_egress_to_ingress_tcp ($tcflags)" } +ingress_2nd_vlan_push() +{ + tc filter add dev $swp1 ingress pref 20 chain 0 handle 20 flower \ + $tcflags num_of_vlans 1 \ + action vlan push id 100 protocol 0x8100 action goto chain 5 + tc filter add dev $swp1 ingress pref 30 chain 5 handle 30 flower \ + $tcflags num_of_vlans 2 \ + cvlan_ethtype 0x800 action pass + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -Q 10 -q + + tc_check_packets "dev $swp1 ingress" 30 1 + check_err $? "No double-vlan packets received" + + tc filter del dev $swp1 ingress pref 20 chain 0 handle 20 flower + tc filter del dev $swp1 ingress pref 30 chain 5 handle 30 flower + + log_test "ingress_2nd_vlan_push ($tcflags)" +} + +egress_2nd_vlan_push() +{ + tc filter add dev $h1 egress pref 20 chain 0 handle 20 flower \ + $tcflags num_of_vlans 0 \ + action vlan push id 10 protocol 0x8100 \ + pipe action vlan push id 100 protocol 0x8100 action goto chain 5 + tc filter add dev $h1 egress pref 30 chain 5 handle 30 flower \ + $tcflags num_of_vlans 2 \ + cvlan_ethtype 0x800 action pass + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h1 egress" 30 1 + check_err $? "No double-vlan packets received" + + tc filter del dev $h1 egress pref 20 chain 0 handle 20 flower + tc filter del dev $h1 egress pref 30 chain 5 handle 30 flower + + log_test "egress_2nd_vlan_push ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh index bce8bb8d2b6f..2e3326edfa9a 100644 --- a/tools/testing/selftests/net/forwarding/tc_common.sh +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -4,7 +4,7 @@ CHECK_TC="yes" # Can be overridden by the configuration file. See lib.sh -TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms +: "${TC_HIT_TIMEOUT:=1000}" # ms tc_check_packets() { diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh index 3885a2a91f7d..baed5e380dae 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh @@ -20,6 +20,7 @@ ALL_TESTS=" test_port_range_ipv4_tcp test_port_range_ipv6_udp test_port_range_ipv6_tcp + test_port_range_ipv4_udp_drop " NUM_NETIFS=4 @@ -194,6 +195,51 @@ test_port_range_ipv6_tcp() __test_port_range $proto $ip_proto $sip $dip $mode "$name" } +test_port_range_ipv4_udp_drop() +{ + local proto=ipv4 + local ip_proto=udp + local sip=192.0.2.1 + local dip=192.0.2.2 + local mode="-4" + local name="IPv4 UDP Drop" + local dmac=$(mac_get $h2) + local smac=$(mac_get $h1) + local sport_min=2000 + local sport_max=3000 + local sport_mid=$((sport_min + (sport_max - sport_min) / 2)) + local dport=5000 + + RET=0 + + tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \ + flower src_ip $sip dst_ip $dip ip_proto $ip_proto \ + src_port $sport_min-$sport_max \ + dst_port $dport \ + action drop + + # Test ports outside range - should pass + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_min - 1)),dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_max + 1)),dp=$dport" + + # Test ports inside range - should be dropped + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_min,dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_mid,dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_max,dp=$dport" + + tc_check_packets "dev $swp1 ingress" 101 3 + check_err $? "Filter did not drop the expected number of packets" + + tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower + + log_test "Port range matching - $name" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh index 5103f64a71d6..509fdedfcfa1 100755 --- a/tools/testing/selftests/net/forwarding/tc_police.sh +++ b/tools/testing/selftests/net/forwarding/tc_police.sh @@ -148,7 +148,7 @@ police_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower } @@ -198,7 +198,7 @@ police_shared_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% } police_shared_test() @@ -278,7 +278,7 @@ police_mirror_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% tc filter del dev $pol_if $dir protocol ip pref 1 handle 101 flower tc filter del dev $h3 ingress protocol ip pref 1 handle 101 flower tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower @@ -320,7 +320,7 @@ police_pps_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower } diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh new file mode 100755 index 000000000000..8992aeabfe0b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh @@ -0,0 +1,421 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" \ + test_clock_jump_backward \ + test_taprio_after_ptp \ + test_max_sdu \ + test_clock_jump_backward_forward \ +" +NUM_NETIFS=4 +source tc_common.sh +source lib.sh +source tsn_lib.sh + +require_command python3 + +# The test assumes the usual topology from the README, where h1 is connected to +# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge. +# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2. +# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to +# swp1 (and both to CLOCK_REALTIME). +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +UDS_ADDRESS_H1="/var/run/ptp4l_h1" +UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1" + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +# Tunables +NUM_PKTS=100 +STREAM_VID=10 +STREAM_PRIO_1=6 +STREAM_PRIO_2=5 +STREAM_PRIO_3=4 +# PTP uses TC 0 +ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2)) +# Use a conservative cycle of 10 ms to allow the test to still pass when the +# kernel has some extra overhead like lockdep etc +CYCLE_TIME_NS=10000000 +# Create two Gate Control List entries, one OPEN and one CLOSE, of equal +# durations +GATE_DURATION_NS=$((CYCLE_TIME_NS / 2)) +# Give 2/3 of the cycle time to user space and 1/3 to the kernel +FUDGE_FACTOR=$((CYCLE_TIME_NS / 3)) +# Shift the isochron base time by half the gate time, so that packets are +# always received by swp1 close to the middle of the time slot, to minimize +# inaccuracies due to network sync +SHIFT_TIME_NS=$((GATE_DURATION_NS / 2)) + +path_delay= + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +switch_create() +{ + local h2_mac_addr=$(mac_get $h2) + + ip link set $swp1 up + ip link set $swp2 up + + ip link add br0 type bridge vlan_filtering 1 + ip link set $swp1 master br0 + ip link set $swp2 master br0 + ip link set br0 up + + bridge vlan add dev $swp2 vid $STREAM_VID + bridge vlan add dev $swp1 vid $STREAM_VID + bridge fdb add dev $swp2 \ + $h2_mac_addr vlan $STREAM_VID static master +} + +switch_destroy() +{ + ip link del br0 +} + +ptp_setup() +{ + # Set up swp1 as a master PHC for h1, synchronized to the local + # CLOCK_REALTIME. + phc2sys_start $UDS_ADDRESS_SWP1 + ptp4l_start $h1 true $UDS_ADDRESS_H1 + ptp4l_start $swp1 false $UDS_ADDRESS_SWP1 +} + +ptp_cleanup() +{ + ptp4l_stop $swp1 + ptp4l_stop $h1 + phc2sys_stop +} + +txtime_setup() +{ + local if_name=$1 + + tc qdisc add dev $if_name clsact + # Classify PTP on TC 7 and isochron on TC 6 + tc filter add dev $if_name egress protocol 0x88f7 \ + flower action skbedit priority 7 + tc filter add dev $if_name egress protocol 802.1Q \ + flower vlan_ethtype 0xdead action skbedit priority 6 + tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + hw 1 + # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1. + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR +} + +txtime_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name clsact + tc qdisc del dev $if_name root +} + +taprio_replace() +{ + local if_name="$1"; shift + local extra_args="$1"; shift + + # STREAM_PRIO_1 always has an open gate. + # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time) + # STREAM_PRIO_3 always has a closed gate. + tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \ + sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \ + base-time 0 flags 0x2 $extra_args + taprio_wait_for_admin $if_name +} + +taprio_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name root +} + +probe_path_delay() +{ + local isochron_dat="$(mktemp)" + local received + + log_info "Probing path delay" + + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \ + "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \ + "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + if [ "$received" != "$NUM_PKTS" ]; then + echo "Cannot establish basic data path between $h1 and $h2" + exit $ksft_fail + fi + + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(np.max(w))) + EOF + path_delay=$(python3 ./isochron_postprocess.py) + + log_info "Path delay from $h1 to $h2 estimated at $path_delay ns" + + if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then + echo "Path delay larger than gate duration, aborting" + exit $ksft_fail + fi + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create + + txtime_setup $h1 + + # Temporarily set up PTP just to probe the end-to-end path delay. + ptp_setup + probe_path_delay + ptp_cleanup +} + +cleanup() +{ + pre_cleanup + + isochron_recv_stop + txtime_cleanup $h1 + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +run_test() +{ + local base_time=$1; shift + local stream_prio=$1; shift + local expected_delay=$1; shift + local should_fail=$1; shift + local test_name=$1; shift + local isochron_dat="$(mktemp)" + local received + local median_delay + + RET=0 + + # Set the shift time equal to the cycle time, which effectively + # cancels the default advance time. Packets won't be sent early in + # software, which ensures that they won't prematurely enter through + # the open gate in __test_out_of_band(). Also, the gate is open for + # long enough that this won't cause a problem in __test_in_band(). + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \ + "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \ + "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + [ "$received" = "$NUM_PKTS" ] + check_err_fail $should_fail $? "Reception of $NUM_PKTS packets" + + if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(int(np.median(w)))) + EOF + median_delay=$(python3 ./isochron_postprocess.py) + + # If the condition below is true, packets were delayed by a closed gate + [ "$median_delay" -gt $((path_delay + expected_delay)) ] + check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay" + + # If the condition below is true, packets were sent expecting them to + # hit a closed gate in the switch, but were not delayed + [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ] + check_fail $? "Median delay $median_delay is less than expected delay $expected_delay" + fi + + log_test "$test_name" + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +__test_always_open() +{ + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open" +} + +__test_always_closed() +{ + run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed" +} + +__test_in_band() +{ + # Send packets in-band with the OPEN gate entry + run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate" +} + +__test_out_of_band() +{ + # Send packets in-band with the CLOSE gate entry + run_test 0.005000000 $STREAM_PRIO_2 \ + $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \ + "Out of band with gate" +} + +run_subtests() +{ + __test_always_open + __test_always_closed + __test_in_band + __test_out_of_band +} + +test_taprio_after_ptp() +{ + log_info "Setting up taprio after PTP" + ptp_setup + taprio_replace $swp2 + run_subtests + taprio_cleanup $swp2 + ptp_cleanup +} + +__test_under_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0" + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU" +} + +__test_over_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0" + run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU" +} + +test_max_sdu() +{ + ptp_setup + __test_under_max_sdu + __test_over_max_sdu + taprio_cleanup $swp2 + ptp_cleanup +} + +# Perform a clock jump in the past without synchronization running, so that the +# time base remains where it was set by phc_ctl. +test_clock_jump_backward() +{ + # This is a more complex schedule specifically crafted in a way that + # has been problematic on NXP LS1028A. Not much to test with it other + # than the fact that it passes traffic. + tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \ + base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \ + sched-entry S 20 300000 sched-entry S 48 200000 \ + sched-entry S 20 300000 sched-entry S 83 200000 \ + sched-entry S 40 300000 sched-entry S 00 200000 flags 2 + + log_info "Forcing a backward clock jump" + phc_ctl $swp1 set 0 + + ping_test $h1 192.0.2.2 + taprio_cleanup $swp2 +} + +# Test that taprio tolerates clock jumps. +# Since ptp4l and phc2sys are running, it is expected for the time to +# eventually recover (through yet another clock jump). Isochron waits +# until that is the case. +test_clock_jump_backward_forward() +{ + log_info "Forcing a backward and a forward clock jump" + taprio_replace $swp2 + phc_ctl $swp1 set 0 + ptp_setup + ping_test $h1 192.0.2.2 + run_subtests + ptp_cleanup + taprio_cleanup $swp2 +} + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_test_skip "Could not test offloaded functionality" + exit $EXIT_STATUS +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh index 5a5dd9034819..79775b10b99f 100755 --- a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh +++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh @@ -1,7 +1,5 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 ALL_TESTS="tunnel_key_nofrag_test" diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index b91bcd8008a9..08c044ff6689 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright 2021-2022 NXP +tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts + REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes} REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes} @@ -18,6 +20,7 @@ fi if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then require_command phc2sys require_command ptp4l + require_command phc_ctl fi phc2sys_start() @@ -182,6 +185,7 @@ isochron_do() local base_time=$1; shift local cycle_time=$1; shift local shift_time=$1; shift + local window_size=$1; shift local num_pkts=$1; shift local vid=$1; shift local priority=$1; shift @@ -212,6 +216,10 @@ isochron_do() extra_args="${extra_args} --shift-time=${shift_time}" fi + if ! [ -z "${window_size}" ]; then + extra_args="${extra_args} --window-size=${window_size}" + fi + if [ "${use_l2}" = "true" ]; then extra_args="${extra_args} --l2 --etype=0xdead ${vid}" receiver_extra_args="--l2 --etype=0xdead" @@ -247,3 +255,21 @@ isochron_do() cpufreq_restore ${ISOCHRON_CPU} } + +isochron_report_num_received() +{ + local isochron_dat=$1; shift + + # Count all received packets by looking at the non-zero RX timestamps + isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "%u\n" --printf-args "R" | \ + grep -w -v '0' | wc -l +} + +taprio_wait_for_admin() +{ + local if_name="$1"; shift + + "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name" +} diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 6f0a2e452ba1..b43816dd998c 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -428,6 +428,14 @@ __test_flood() test_flood() { __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood" + + # Add an entry with arbitrary destination IP. Verify that packets are + # not duplicated (this can happen if hardware floods the packets, and + # then traps them due to misconfiguration, so software data path repeats + # flooding and resends packets). + bridge fdb append dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self + __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood, unresolved FDB entry" + bridge fdb del dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self } vxlan_fdb_add_del() @@ -680,9 +688,9 @@ test_learning() local mac=de:ad:be:ef:13:37 local dst=192.0.2.100 - # Enable learning on the VxLAN device and set ageing time to 10 seconds - ip link set dev br1 type bridge ageing_time 1000 - ip link set dev vx1 type vxlan ageing 10 + # Enable learning on the VxLAN device and set ageing time to 30 seconds + ip link set dev br1 type bridge ageing_time 3000 + ip link set dev vx1 type vxlan ageing 30 ip link set dev vx1 type vxlan learning reapply_config @@ -740,7 +748,9 @@ test_learning() vxlan_flood_test $mac $dst 0 10 0 - sleep 20 + # The entry should age out when it only forwards traffic + $MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q & + sleep 60 bridge fdb show brport vx1 | grep $mac | grep -q self check_fail $? diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index fb9a34cb50c6..afc65647f673 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -539,6 +539,21 @@ test_flood() 10 10 0 10 0 __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \ 10 0 10 0 10 + + # Add entries with arbitrary destination IP. Verify that packets are + # not duplicated (this can happen if hardware floods the packets, and + # then traps them due to misconfiguration, so software data path repeats + # flooding and resends packets). + bridge fdb append dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self + + __test_flood de:ad:be:ef:13:37 192.0.2.100 10 \ + "flood vlan 10, unresolved FDB entry" 10 10 0 10 0 + __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 \ + "flood vlan 20, unresolved FDB entry" 10 0 10 0 10 + + bridge fdb del dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self } vxlan_fdb_add_del() diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh new file mode 100755 index 000000000000..46c31794b91b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh @@ -0,0 +1,352 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|---------------+ +# | +# +----|--------------------------------+ +# | SW | | +# | +--|------------------------------+ | +# | | + $swp1 BR1 (802.1d) | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | id 1000 dstport $VXPORT | | +# | +---------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +--|----------------------------------+ +# | +# +--|----------------------------------+ +# | | | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | +# | VRP2 (vrf) | +# +-------------------------------------+ + +: ${VXPORT:=4789} +: ${ALL_TESTS:=" + default_test + plain_test + reserved_0_test + reserved_10_test + reserved_31_test + reserved_56_test + reserved_63_test + "} + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + defer simple_if_fini $h1 192.0.2.1/28 + + tc qdisc add dev $h1 clsact + defer tc qdisc del dev $h1 clsact + + tc filter add dev $h1 ingress pref 77 \ + prot ip flower skip_hw ip_proto icmp action drop + defer tc filter del dev $h1 ingress pref 77 +} + +switch_create() +{ + ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip_link_set_addr br1 $(mac_get $swp1) + ip_link_set_up br1 + + ip_link_set_up $rp1 + ip_addr_add $rp1 192.0.2.17/28 + ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 + + ip_link_set_master $swp1 br1 + ip_link_set_up $swp1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + defer simple_if_fini $rp2 192.0.2.18/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + defer vrf_cleanup + + forwarding_enable + defer forwarding_restore + + h1_create + switch_create + + vrp2_create +} + +vxlan_header_bytes() +{ + local vni=$1; shift + local -a extra_bits=("$@") + local -a bits + local i + + for ((i=0; i < 64; i++)); do + bits[i]=0 + done + + # Bit 4 is the I flag and is always on. + bits[4]=1 + + for i in ${extra_bits[@]}; do + bits[i]=1 + done + + # Bits 32..55 carry the VNI + local mask=0x800000 + for ((i=0; i < 24; i++)); do + bits[$((i + 32))]=$(((vni & mask) != 0)) + ((mask >>= 1)) + done + + local bytes + for ((i=0; i < 8; i++)); do + local byte=0 + local j + for ((j=0; j < 8; j++)); do + local bit=${bits[8 * i + j]} + ((byte += bit << (7 - j))) + done + bytes+=$(printf %02x $byte): + done + + echo ${bytes%:} +} + +neg_bytes() +{ + local bytes=$1; shift + + local -A neg=([0]=f [1]=e [2]=d [3]=c [4]=b [5]=a [6]=9 [7]=8 + [8]=7 [9]=6 [a]=5 [b]=4 [c]=3 [d]=2 [e]=1 [f]=0 [:]=:) + local out + local i + + for ((i=0; i < ${#bytes}; i++)); do + local c=${bytes:$i:1} + out+=${neg[$c]} + done + echo $out +} + +vxlan_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local vni=$1; shift + local reserved_bits=$1; shift + + local vxlan_header=$(vxlan_header_bytes $vni $reserved_bits) + + $MZ $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp sp=23456,dp=$VXPORT,p=$(: + )"$vxlan_header:"$( : VXLAN + )"$dest_mac:"$( : ETH daddr + )"00:11:22:33:44:55:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:54:"$( : IP total length + )"99:83:"$( : IP identification + )"40:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"01:"$( : IP proto + )"00:00:"$( : IP header csum + )"$(ipv4_to_bytes 192.0.2.3):"$( : IP saddr + )"$(ipv4_to_bytes 192.0.2.1):"$( : IP daddr + )"08:"$( : ICMP type + )"00:"$( : ICMP code + )"8b:f2:"$( : ICMP csum + )"1f:6a:"$( : ICMP request identifier + )"00:01:"$( : ICMP request seq. number + )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload + )"6d:74:0b:00:00:00:00:00:"$( : + )"10:11:12:13:14:15:16:17:"$( : + )"18:19:1a:1b:1c:1d:1e:1f:"$( : + )"20:21:22:23:24:25:26:27:"$( : + )"28:29:2a:2b:2c:2d:2e:2f:"$( : + )"30:31:32:33:34:35:36:37" +} + +vxlan_device_add() +{ + ip_link_add vx1 up type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 "$@" + ip_link_set_master vx1 br1 +} + +vxlan_all_reserved_bits() +{ + local i + + for ((i=0; i < 64; i++)); do + if ((i == 4 || i >= 32 && i < 56)); then + continue + fi + echo $i + done +} + +vxlan_ping_vanilla() +{ + vxlan_ping_do 10 $rp2 $(mac_get $rp1) 192.0.2.17 $(mac_get $h1) 1000 +} + +vxlan_ping_reserved() +{ + for bit in $(vxlan_all_reserved_bits); do + vxlan_ping_do 1 $rp2 $(mac_get $rp1) \ + 192.0.2.17 $(mac_get $h1) 1000 "$bit" + ((n++)) + done +} + +vxlan_ping_test() +{ + local what=$1; shift + local get_stat=$1; shift + local expect=$1; shift + + RET=0 + + local t0=$($get_stat) + + "$@" + check_err $? "Failure when running $@" + + local t1=$($get_stat) + local delta=$((t1 - t0)) + + ((expect == delta)) + check_err $? "Expected to capture $expect packets, got $delta." + + log_test "$what" +} + +__default_test_do() +{ + local n_allowed_bits=$1; shift + local what=$1; shift + + vxlan_ping_test "$what: clean packets" \ + "tc_rule_stats_get $h1 77 ingress" \ + 10 vxlan_ping_vanilla + + local t0=$(link_stats_get vx1 rx errors) + vxlan_ping_test "$what: mangled packets" \ + "tc_rule_stats_get $h1 77 ingress" \ + $n_allowed_bits vxlan_ping_reserved + local t1=$(link_stats_get vx1 rx errors) + + RET=0 + local expect=$((39 - n_allowed_bits)) + local delta=$((t1 - t0)) + ((expect == delta)) + check_err $? "Expected $expect error packets, got $delta." + log_test "$what: drops reported" +} + +default_test_do() +{ + vxlan_device_add + __default_test_do 0 "Default" +} + +default_test() +{ + in_defer_scope \ + default_test_do +} + +plain_test_do() +{ + vxlan_device_add reserved_bits 0xf7ffffff000000ff + __default_test_do 0 "reserved_bits 0xf7ffffff000000ff" +} + +plain_test() +{ + in_defer_scope \ + plain_test_do +} + +reserved_test() +{ + local bit=$1; shift + + local allowed_bytes=$(vxlan_header_bytes 0xffffff $bit) + local reserved_bytes=$(neg_bytes $allowed_bytes) + local reserved_bits=${reserved_bytes//:/} + + vxlan_device_add reserved_bits 0x$reserved_bits + __default_test_do 1 "reserved_bits 0x$reserved_bits" +} + +reserved_0_test() +{ + in_defer_scope \ + reserved_test 0 +} + +reserved_10_test() +{ + in_defer_scope \ + reserved_test 10 +} + +reserved_31_test() +{ + in_defer_scope \ + reserved_test 31 +} + +reserved_56_test() +{ + in_defer_scope \ + reserved_test 56 +} + +reserved_63_test() +{ + in_defer_scope \ + reserved_test 63 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 000000000000..5b34f6e1f831 --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,177 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check if network device has an IPv6 link-local address assigned. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_ll_addr() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "" + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check that IPv6 link-local address is generated when device goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check that link-local address generation works when re-enabled while + # the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 353e1e867fbb..d5824eadea10 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -93,6 +93,7 @@ static bool tx_socket = true; static int tcp_offset = -1; static int total_hdr_len = -1; static int ethhdr_proto = -1; +static const int num_flush_id_cases = 6; static void vlog(const char *fmt, ...) { @@ -119,6 +120,9 @@ static void setup_sock_filter(int fd) next_off = offsetof(struct ipv6hdr, nexthdr); ipproto_off = ETH_HLEN + next_off; + /* Overridden later if exthdrs are used: */ + opt_ipproto_off = ipproto_off; + if (strcmp(testname, "ip") == 0) { if (proto == PF_INET) optlen = sizeof(struct ip_timestamp); @@ -617,6 +621,113 @@ static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE); } +static void fix_ip4_checksum(struct iphdr *iph) +{ + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); +} + +static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) +{ + static char buf1[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf2[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf3[MAX_HDR_LEN + PAYLOAD_LEN]; + bool send_three = false; + struct iphdr *iph1; + struct iphdr *iph2; + struct iphdr *iph3; + + iph1 = (struct iphdr *)(buf1 + ETH_HLEN); + iph2 = (struct iphdr *)(buf2 + ETH_HLEN); + iph3 = (struct iphdr *)(buf3 + ETH_HLEN); + + create_packet(buf1, 0, 0, PAYLOAD_LEN, 0); + create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + + switch (tcase) { + case 0: /* DF=1, Incrementing - should coalesce */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(9); + break; + + case 1: /* DF=1, Fixed - should coalesce */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(8); + break; + + case 2: /* DF=0, Incrementing - should coalesce */ + iph1->frag_off &= ~htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off &= ~htons(IP_DF); + iph2->id = htons(9); + break; + + case 3: /* DF=0, Fixed - should not coalesce */ + iph1->frag_off &= ~htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off &= ~htons(IP_DF); + iph2->id = htons(8); + break; + + case 4: /* DF=1, two packets incrementing, and one fixed - should + * coalesce only the first two packets + */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(9); + + iph3->frag_off |= htons(IP_DF); + iph3->id = htons(9); + send_three = true; + break; + + case 5: /* DF=1, two packets fixed, and one incrementing - should + * coalesce only the first two packets + */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(8); + + iph3->frag_off |= htons(IP_DF); + iph3->id = htons(9); + send_three = true; + break; + } + + fix_ip4_checksum(iph1); + fix_ip4_checksum(iph2); + write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr); + write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr); + + if (send_three) { + fix_ip4_checksum(iph3); + write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr); + } +} + +static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt) +{ + for (int i = 0; i < num_flush_id_cases; i++) { + sleep(1); + send_flush_id_case(fd, daddr, i); + sleep(1); + write_packet(fd, fin_pkt, total_hdr_len, daddr); + } +} + static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2) { static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; @@ -935,6 +1046,8 @@ static void gro_sender(void) send_fragment4(txfd, &daddr); sleep(1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + test_flush_id(txfd, &daddr, fin_pkt); } else if (proto == PF_INET6) { sleep(1); send_fragment6(txfd, &daddr); @@ -1061,6 +1174,34 @@ static void gro_receiver(void) printf("fragmented ip4 doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); + + /* is_atomic checks */ + printf("DF=1, Incrementing - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=1, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=0, Incrementing - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=0, Fixed - should not coalesce: "); + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + + printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + + printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); } else if (proto == PF_INET6) { /* GRO doesn't check for ipv6 hop limit when flushing. * Hence no corresponding test to the ipv4 case. @@ -1177,11 +1318,13 @@ int main(int argc, char **argv) read_MAC(src_mac, smac); read_MAC(dst_mac, dmac); - if (tx_socket) + if (tx_socket) { gro_sender(); - else + } else { + /* Only the receiver exit status determines test success. */ gro_receiver(); + fprintf(stderr, "Gro::%s test passed.\n", testname); + } - fprintf(stderr, "Gro::%s test passed.\n", testname); return 0; } diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh index 02c21ff4ca81..9e3f186bc2a1 100755 --- a/tools/testing/selftests/net/gro.sh +++ b/tools/testing/selftests/net/gro.sh @@ -18,10 +18,10 @@ run_test() { "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" ) setup_ns - # Each test is run 3 times to deflake, because given the receive timing, + # Each test is run 6 times to deflake, because given the receive timing, # not all packets that should coalesce will be considered in the same flow # on every try. - for tries in {1..3}; do + for tries in {1..6}; do # Actual test starts here ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ 1>>log.txt & @@ -100,5 +100,6 @@ trap cleanup EXIT if [[ "${test}" == "all" ]]; then run_all_tests else - run_test "${proto}" "${test}" + exit_code=$(run_test "${proto}" "${test}") + exit $exit_code fi; diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile index 92c1d9d080cd..884cd2cc0681 100644 --- a/tools/testing/selftests/net/hsr/Makefile +++ b/tools/testing/selftests/net/hsr/Makefile @@ -2,6 +2,7 @@ top_srcdir = ../../../../.. -TEST_PROGS := hsr_ping.sh +TEST_PROGS := hsr_ping.sh hsr_redbox.sh +TEST_FILES += hsr_common.sh include ../../lib.mk diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config index 22061204fb69..555a868743f0 100644 --- a/tools/testing/selftests/net/hsr/config +++ b/tools/testing/selftests/net/hsr/config @@ -2,3 +2,5 @@ CONFIG_IPV6=y CONFIG_NET_SCH_NETEM=m CONFIG_HSR=y CONFIG_VETH=y +CONFIG_BRIDGE=y +CONFIG_VLAN_8021Q=m diff --git a/tools/testing/selftests/net/hsr/hsr_common.sh b/tools/testing/selftests/net/hsr/hsr_common.sh new file mode 100644 index 000000000000..1dc882ac1c74 --- /dev/null +++ b/tools/testing/selftests/net/hsr/hsr_common.sh @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: GPL-2.0 +# Common code for HSR testing scripts + +source ../lib.sh +ret=0 +ksft_skip=4 + +# $1: IP address +is_v6() +{ + [ -z "${1##*:*}" ] +} + +do_ping() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 2 -i 0.1" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + return 0 +} + +do_ping_long() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 10 -i 0.1" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)" + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + VAL="$(echo $OUT | cut -d' ' -f1-8)" + SED_VAL="$(echo ${VAL} | sed -r -e 's/([0-9]{2}).*([0-9]{2}).*[[:space:]]([0-9]+%).*/\1 transmitted \2 received \3 loss/')" + if [ "${SED_VAL}" != "10 transmitted 10 received 0% loss" ] + then + echo "$netns -> $connect_addr ping TEST [ FAIL ]" + echo "Expect to send and receive 10 packets and no duplicates." + echo "Full message: ${OUT}." + ret=1 + return 1 + fi + + return 0 +} + +stop_if_error() +{ + local msg="$1" + + if [ ${ret} -ne 0 ]; then + echo "FAIL: ${msg}" 1>&2 + exit ${ret} + fi +} + +check_prerequisites() +{ + ip -Version > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip + fi +} diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index 1c6457e54625..5a65f4f836be 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -1,10 +1,10 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ret=0 -ksft_skip=4 ipv6=true +source ./hsr_common.sh + optstring="h4" usage() { echo "Usage: $0 [OPTION]" @@ -27,88 +27,6 @@ while getopts "$optstring" option;do esac done -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" -ns3="ns3-$rndh" - -cleanup() -{ - local netns - for netns in "$ns1" "$ns2" "$ns3" ;do - ip netns del $netns - done -} - -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - -do_ping() -{ - local netns="$1" - local connect_addr="$2" - local ping_args="-q -c 2" - - if is_v6 "${connect_addr}"; then - $ipv6 || return 0 - ping_args="${ping_args} -6" - fi - - ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null - if [ $? -ne 0 ] ; then - echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2 - ret=1 - return 1 - fi - - return 0 -} - -do_ping_long() -{ - local netns="$1" - local connect_addr="$2" - local ping_args="-q -c 10" - - if is_v6 "${connect_addr}"; then - $ipv6 || return 0 - ping_args="${ping_args} -6" - fi - - OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)" - if [ $? -ne 0 ] ; then - echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2 - ret=1 - return 1 - fi - - VAL="$(echo $OUT | cut -d' ' -f1-8)" - if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ] - then - echo "$netns -> $connect_addr ping TEST [ FAIL ]" - echo "Expect to send and receive 10 packets and no duplicates." - echo "Full message: ${OUT}." - ret=1 - return 1 - fi - - return 0 -} - -stop_if_error() -{ - local msg="$1" - - if [ ${ret} -ne 0 ]; then - echo "FAIL: ${msg}" 1>&2 - exit ${ret} - fi -} - do_complete_ping_test() { echo "INFO: Initial validation ping." @@ -234,6 +152,15 @@ setup_hsr_interfaces() ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1 + ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2 + + ip -net "$ns2" link set address 00:11:22:00:02:01 dev ns2eth1 + ip -net "$ns2" link set address 00:11:22:00:02:02 dev ns2eth2 + + ip -net "$ns3" link set address 00:11:22:00:03:01 dev ns3eth1 + ip -net "$ns3" link set address 00:11:22:00:03:02 dev ns3eth2 + # All Links up ip -net "$ns1" link set ns1eth1 up ip -net "$ns1" link set ns1eth2 up @@ -248,29 +175,115 @@ setup_hsr_interfaces() ip -net "$ns3" link set hsr3 up } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +setup_vlan_interfaces() { + ip -net "$ns1" link add link hsr1 name hsr1.2 type vlan id 2 + ip -net "$ns1" link add link hsr1 name hsr1.3 type vlan id 3 + ip -net "$ns1" link add link hsr1 name hsr1.4 type vlan id 4 + ip -net "$ns1" link add link hsr1 name hsr1.5 type vlan id 5 + + ip -net "$ns2" link add link hsr2 name hsr2.2 type vlan id 2 + ip -net "$ns2" link add link hsr2 name hsr2.3 type vlan id 3 + ip -net "$ns2" link add link hsr2 name hsr2.4 type vlan id 4 + ip -net "$ns2" link add link hsr2 name hsr2.5 type vlan id 5 + + ip -net "$ns3" link add link hsr3 name hsr3.2 type vlan id 2 + ip -net "$ns3" link add link hsr3 name hsr3.3 type vlan id 3 + ip -net "$ns3" link add link hsr3 name hsr3.4 type vlan id 4 + ip -net "$ns3" link add link hsr3 name hsr3.5 type vlan id 5 + + ip -net "$ns1" addr add 100.64.2.1/24 dev hsr1.2 + ip -net "$ns1" addr add 100.64.3.1/24 dev hsr1.3 + ip -net "$ns1" addr add 100.64.4.1/24 dev hsr1.4 + ip -net "$ns1" addr add 100.64.5.1/24 dev hsr1.5 + + ip -net "$ns2" addr add 100.64.2.2/24 dev hsr2.2 + ip -net "$ns2" addr add 100.64.3.2/24 dev hsr2.3 + ip -net "$ns2" addr add 100.64.4.2/24 dev hsr2.4 + ip -net "$ns2" addr add 100.64.5.2/24 dev hsr2.5 + + ip -net "$ns3" addr add 100.64.2.3/24 dev hsr3.2 + ip -net "$ns3" addr add 100.64.3.3/24 dev hsr3.3 + ip -net "$ns3" addr add 100.64.4.3/24 dev hsr3.4 + ip -net "$ns3" addr add 100.64.5.3/24 dev hsr3.5 + + ip -net "$ns1" link set dev hsr1.2 up + ip -net "$ns1" link set dev hsr1.3 up + ip -net "$ns1" link set dev hsr1.4 up + ip -net "$ns1" link set dev hsr1.5 up + + ip -net "$ns2" link set dev hsr2.2 up + ip -net "$ns2" link set dev hsr2.3 up + ip -net "$ns2" link set dev hsr2.4 up + ip -net "$ns2" link set dev hsr2.5 up + + ip -net "$ns3" link set dev hsr3.2 up + ip -net "$ns3" link set dev hsr3.3 up + ip -net "$ns3" link set dev hsr3.4 up + ip -net "$ns3" link set dev hsr3.5 up -trap cleanup EXIT +} -for i in "$ns1" "$ns2" "$ns3" ;do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done +hsr_vlan_ping() { + do_ping "$ns1" 100.64.2.2 + do_ping "$ns1" 100.64.3.2 + do_ping "$ns1" 100.64.4.2 + do_ping "$ns1" 100.64.5.2 + + do_ping "$ns1" 100.64.2.3 + do_ping "$ns1" 100.64.3.3 + do_ping "$ns1" 100.64.4.3 + do_ping "$ns1" 100.64.5.3 + + do_ping "$ns2" 100.64.2.1 + do_ping "$ns2" 100.64.3.1 + do_ping "$ns2" 100.64.4.1 + do_ping "$ns2" 100.64.5.1 + + do_ping "$ns2" 100.64.2.3 + do_ping "$ns2" 100.64.3.3 + do_ping "$ns2" 100.64.4.3 + do_ping "$ns2" 100.64.5.3 + + do_ping "$ns3" 100.64.2.1 + do_ping "$ns3" 100.64.3.1 + do_ping "$ns3" 100.64.4.1 + do_ping "$ns3" 100.64.5.1 + + do_ping "$ns3" 100.64.2.2 + do_ping "$ns3" 100.64.3.2 + do_ping "$ns3" 100.64.4.2 + do_ping "$ns3" 100.64.5.2 +} + +run_vlan_tests() { + vlan_challenged_hsr1=$(ip net exec "$ns1" ethtool -k hsr1 | grep "vlan-challenged" | awk '{print $2}') + vlan_challenged_hsr2=$(ip net exec "$ns2" ethtool -k hsr2 | grep "vlan-challenged" | awk '{print $2}') + vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}') + + if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then + echo "INFO: Running VLAN tests" + setup_vlan_interfaces + hsr_vlan_ping + else + echo "INFO: Not Running VLAN tests as the device does not support VLAN" + fi +} + +check_prerequisites +setup_ns ns1 ns2 ns3 + +trap cleanup_all_ns EXIT setup_hsr_interfaces 0 do_complete_ping_test -cleanup -for i in "$ns1" "$ns2" "$ns3" ;do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done +run_vlan_tests + +setup_ns ns1 ns2 ns3 setup_hsr_interfaces 1 do_complete_ping_test +run_vlan_tests + exit $ret diff --git a/tools/testing/selftests/net/hsr/hsr_redbox.sh b/tools/testing/selftests/net/hsr/hsr_redbox.sh new file mode 100755 index 000000000000..998103502d5d --- /dev/null +++ b/tools/testing/selftests/net/hsr/hsr_redbox.sh @@ -0,0 +1,136 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ipv6=false + +source ./hsr_common.sh + +do_complete_ping_test() +{ + echo "INFO: Initial validation ping (HSR-SAN/RedBox)." + # Each node has to be able to reach each one. + do_ping "${ns1}" 100.64.0.2 + do_ping "${ns2}" 100.64.0.1 + # Ping between SANs (test bridge) + do_ping "${ns4}" 100.64.0.51 + do_ping "${ns5}" 100.64.0.41 + # Ping from SANs to hsr1 (via hsr2) (and opposite) + do_ping "${ns3}" 100.64.0.1 + do_ping "${ns1}" 100.64.0.3 + do_ping "${ns1}" 100.64.0.41 + do_ping "${ns4}" 100.64.0.1 + do_ping "${ns1}" 100.64.0.51 + do_ping "${ns5}" 100.64.0.1 + stop_if_error "Initial validation failed." + + # Wait for MGNT HSR frames being received and nodes being + # merged. + sleep 5 + + echo "INFO: Longer ping test (HSR-SAN/RedBox)." + # Ping from SAN to hsr1 (via hsr2) + do_ping_long "${ns3}" 100.64.0.1 + # Ping from hsr1 (via hsr2) to SANs (and opposite) + do_ping_long "${ns1}" 100.64.0.3 + do_ping_long "${ns1}" 100.64.0.41 + do_ping_long "${ns4}" 100.64.0.1 + do_ping_long "${ns1}" 100.64.0.51 + do_ping_long "${ns5}" 100.64.0.1 + stop_if_error "Longer ping test failed." + + echo "INFO: All good." +} + +setup_hsr_interfaces() +{ + local HSRv="$1" + + echo "INFO: preparing interfaces for HSRv${HSRv} (HSR-SAN/RedBox)." +# +# IPv4 addresses (100.64.X.Y/24), and [X.Y] is presented on below diagram: +# +# +# |NS1 | |NS4 | +# | [0.1] | | | +# | /-- hsr1 --\ | | [0.41] | +# | ns1eth1 ns1eth2 | | ns4eth1 (SAN) | +# |------------------------| |-------------------| +# | | | +# | | | +# | | | +# |------------------------| |-------------------------------| +# | ns2eth1 ns2eth2 | | ns3eth2 | +# | \-- hsr2 --/ | | / | +# | [0.2] \ | | / | |------------| +# | ns2eth3 |---| ns3eth1 -- ns3br1 -- ns3eth3--|--| ns5eth1 | +# | (interlink)| | [0.3] [0.11] | | [0.51] | +# |NS2 (RedBOX) | |NS3 (BR) | | NS5 (SAN) | +# +# + # Check if iproute2 supports adding interlink port to hsrX device + ip link help hsr | grep -q INTERLINK + [ $? -ne 0 ] && { echo "iproute2: HSR interlink interface not supported!"; exit 0; } + + # Create interfaces for name spaces + ip link add ns1eth1 netns "${ns1}" type veth peer name ns2eth1 netns "${ns2}" + ip link add ns1eth2 netns "${ns1}" type veth peer name ns2eth2 netns "${ns2}" + ip link add ns2eth3 netns "${ns2}" type veth peer name ns3eth1 netns "${ns3}" + ip link add ns3eth2 netns "${ns3}" type veth peer name ns4eth1 netns "${ns4}" + ip link add ns3eth3 netns "${ns3}" type veth peer name ns5eth1 netns "${ns5}" + + sleep 1 + + ip -n "${ns1}" link set ns1eth1 up + ip -n "${ns1}" link set ns1eth2 up + + ip -n "${ns2}" link set ns2eth1 up + ip -n "${ns2}" link set ns2eth2 up + ip -n "${ns2}" link set ns2eth3 up + + ip -n "${ns3}" link add name ns3br1 type bridge + ip -n "${ns3}" link set ns3br1 up + ip -n "${ns3}" link set ns3eth1 master ns3br1 up + ip -n "${ns3}" link set ns3eth2 master ns3br1 up + ip -n "${ns3}" link set ns3eth3 master ns3br1 up + + ip -n "${ns4}" link set ns4eth1 up + ip -n "${ns5}" link set ns5eth1 up + + ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1 + ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2 + + ip -net "$ns2" link set address 00:11:22:00:02:01 dev ns2eth1 + ip -net "$ns2" link set address 00:11:22:00:02:02 dev ns2eth2 + ip -net "$ns2" link set address 00:11:22:00:02:03 dev ns2eth3 + + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth1 + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth2 + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth3 + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3br1 + + ip -net "$ns4" link set address 00:11:22:00:04:01 dev ns4eth1 + ip -net "$ns5" link set address 00:11:22:00:05:01 dev ns5eth1 + + ip -net "${ns1}" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version ${HSRv} proto 0 + ip -net "${ns2}" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 interlink ns2eth3 supervision 45 version ${HSRv} proto 0 + + ip -n "${ns1}" addr add 100.64.0.1/24 dev hsr1 + ip -n "${ns2}" addr add 100.64.0.2/24 dev hsr2 + ip -n "${ns3}" addr add 100.64.0.11/24 dev ns3br1 + ip -n "${ns3}" addr add 100.64.0.3/24 dev ns3eth1 + ip -n "${ns4}" addr add 100.64.0.41/24 dev ns4eth1 + ip -n "${ns5}" addr add 100.64.0.51/24 dev ns5eth1 + + ip -n "${ns1}" link set hsr1 up + ip -n "${ns2}" link set hsr2 up +} + +check_prerequisites +setup_ns ns1 ns2 ns3 ns4 ns5 + +trap cleanup_all_ns EXIT + +setup_hsr_interfaces 1 +do_complete_ping_test + +exit $ret diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings new file mode 100644 index 000000000000..0fbc037f2aa8 --- /dev/null +++ b/tools/testing/selftests/net/hsr/settings @@ -0,0 +1 @@ +timeout=50 diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index d6f0e449c029..b13c89a99ecb 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -178,8 +178,6 @@ setup() else ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index 12491850ae98..845c26dd01a9 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -3,119 +3,106 @@ # # Author: Justin Iurman <justin.iurman@uliege.be> # -# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data -# consistency directly inside packets on the receiver side. Tests are divided -# into three categories: OUTPUT (evaluates the IOAM processing by the sender), -# INPUT (evaluates the IOAM processing by a receiver) and GLOBAL (evaluates -# wider use cases that do not fall into the other two categories). Both OUTPUT -# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL -# tests use the entire three-node topology (alpha, beta, gamma). Each test is -# documented inside its own handler in the code below. +# This script evaluates IOAM for IPv6 by checking local IOAM configurations and +# IOAM data inside packets. There are three categories of tests: LOCAL, OUTPUT, +# and INPUT. The former (LOCAL) checks all IOAM related configurations locally +# without sending packets. OUTPUT tests verify the processing of an IOAM +# encapsulating node, while INPUT tests verify the processing of an IOAM transit +# node. Both OUTPUT and INPUT tests send packets. Each test is documented inside +# its own handler. # -# An IOAM domain is configured from Alpha to Gamma but not on the reverse path. -# When either Beta or Gamma is the destination (depending on the test category), -# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop. +# The topology used for OUTPUT and INPUT tests is made of three nodes: +# - Alpha (the IOAM encapsulating node) +# - Beta (the IOAM transit node) +# - Gamma (the receiver) ** # +# An IOAM domain is configured from Alpha to Beta, but not on the reverse path. +# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop. # -# +-------------------+ +-------------------+ -# | | | | -# | Alpha netns | | Gamma netns | -# | | | | -# | +-------------+ | | +-------------+ | -# | | veth0 | | | | veth0 | | -# | | db01::2/64 | | | | db02::2/64 | | -# | +-------------+ | | +-------------+ | -# | . | | . | -# +-------------------+ +-------------------+ -# . . -# . . -# . . -# +----------------------------------------------------+ -# | . . | -# | +-------------+ +-------------+ | -# | | veth0 | | veth1 | | -# | | db01::1/64 | ................ | db02::1/64 | | -# | +-------------+ +-------------+ | -# | | -# | Beta netns | -# | | -# +----------------------------------------------------+ +# ** Gamma is required because ioam6_parser.c uses a packet socket and we need +# to see IOAM data inserted by the very last node (Beta), which would happen +# _after_ we get a copy of the packet on Beta. Note that using an +# IPv6 raw socket with IPV6_RECVHOPOPTS on Beta would not be enough: we also +# need to access the IPv6 header to check some fields (e.g., source and +# destination addresses), which is not possible in that case. As a +# consequence, we need Gamma as a receiver to run ioam6_parser.c which uses a +# packet socket. # # +# +-----------------------+ +-----------------------+ +# | | | | +# | Alpha netns | | Gamma netns | +# | | | | +# | +-------------------+ | | +-------------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:db8:1::2/64 | | | | 2001:db8:2::2/64 | | +# | +-------------------+ | | +-------------------+ | +# | . | | . | +# +-----------.-----------+ +-----------.-----------+ +# . . +# . . +# . . +# +-----------.----------------------------------.-----------+ +# | . . | +# | +-------------------+ +-------------------+ | +# | | veth0 | | veth1 | | +# | | 2001:db8:1::1/64 | ............ | 2001:db8:2::1/64 | | +# | +-------------------+ +-------------------+ | +# | | +# | Beta netns | +# | | +# +----------------------------------------------------------+ # -# ============================================================= -# | Alpha - IOAM configuration | -# +===========================================================+ -# | Node ID | 1 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 11111111 | -# +-----------------------------------------------------------+ -# | Ingress ID | 0xffff (default value) | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 0xffffffff (default value) | -# +-----------------------------------------------------------+ -# | Egress ID | 101 | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 101101 | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee0 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf00dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 777 | -# +-----------------------------------------------------------+ -# | Schema Data | something that will be 4n-aligned | -# +-----------------------------------------------------------+ # # -# ============================================================= -# | Beta - IOAM configuration | -# +===========================================================+ -# | Node ID | 2 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 22222222 | -# +-----------------------------------------------------------+ -# | Ingress ID | 201 | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 201201 | -# +-----------------------------------------------------------+ -# | Egress ID | 202 | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 202202 | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee1 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf11dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 666 | -# +-----------------------------------------------------------+ -# | Schema Data | Hello there -Obi | -# +-----------------------------------------------------------+ +# +==========================================================+ +# | Alpha - IOAM configuration | +# +=====================+====================================+ +# | Node ID | 1 | +# +---------------------+------------------------------------+ +# | Node Wide ID | 11111111 | +# +---------------------+------------------------------------+ +# | Ingress ID | 0xffff (default value) | +# +---------------------+------------------------------------+ +# | Ingress Wide ID | 0xffffffff (default value) | +# +---------------------+------------------------------------+ +# | Egress ID | 101 | +# +---------------------+------------------------------------+ +# | Egress Wide ID | 101101 | +# +---------------------+------------------------------------+ +# | Namespace Data | 0xdeadbeef | +# +---------------------+------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf00dc0de | +# +---------------------+------------------------------------+ +# | Schema ID | 777 | +# +---------------------+------------------------------------+ +# | Schema Data | something that will be 4n-aligned | +# +---------------------+------------------------------------+ # # -# ============================================================= -# | Gamma - IOAM configuration | -# +===========================================================+ -# | Node ID | 3 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 33333333 | -# +-----------------------------------------------------------+ -# | Ingress ID | 301 | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 301301 | -# +-----------------------------------------------------------+ -# | Egress ID | 0xffff (default value) | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 0xffffffff (default value) | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee2 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf22dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 0xffffff (= None) | -# +-----------------------------------------------------------+ -# | Schema Data | | -# +-----------------------------------------------------------+ +# +==========================================================+ +# | Beta - IOAM configuration | +# +=====================+====================================+ +# | Node ID | 2 | +# +---------------------+------------------------------------+ +# | Node Wide ID | 22222222 | +# +---------------------+------------------------------------+ +# | Ingress ID | 201 | +# +---------------------+------------------------------------+ +# | Ingress Wide ID | 201201 | +# +---------------------+------------------------------------+ +# | Egress ID | 202 | +# +---------------------+------------------------------------+ +# | Egress Wide ID | 202202 | +# +---------------------+------------------------------------+ +# | Namespace Data | 0xffffffff (default value) | +# +---------------------+------------------------------------+ +# | Namespace Wide Data | 0xffffffffffffffff (default value) | +# +---------------------+------------------------------------+ +# | Schema ID | 0xffffff (= None) | +# +---------------------+------------------------------------+ +# | Schema Data | | +# +---------------------+------------------------------------+ source lib.sh @@ -128,64 +115,69 @@ source lib.sh ################################################################################ ALPHA=( - 1 # ID - 11111111 # Wide ID - 0xffff # Ingress ID - 0xffffffff # Ingress Wide ID - 101 # Egress ID - 101101 # Egress Wide ID - 0xdeadbee0 # Namespace Data - 0xcafec0caf00dc0de # Namespace Wide Data - 777 # Schema ID (0xffffff = None) - "something that will be 4n-aligned" # Schema Data + 1 # ID + 11111111 # Wide ID + 0xffff # Ingress ID (default value) + 0xffffffff # Ingress Wide ID (default value) + 101 # Egress ID + 101101 # Egress Wide ID + 0xdeadbeef # Namespace Data + 0xcafec0caf00dc0de # Namespace Wide Data + 777 # Schema ID + "something that will be 4n-aligned" # Schema Data ) BETA=( - 2 - 22222222 - 201 - 201201 - 202 - 202202 - 0xdeadbee1 - 0xcafec0caf11dc0de - 666 - "Hello there -Obi" + 2 # ID + 22222222 # Wide ID + 201 # Ingress ID + 201201 # Ingress Wide ID + 202 # Egress ID + 202202 # Egress Wide ID + 0xffffffff # Namespace Data (empty value) + 0xffffffffffffffff # Namespace Wide Data (empty value) + 0xffffff # Schema ID (empty value) + "" # Schema Data (empty value) ) -GAMMA=( - 3 - 33333333 - 301 - 301301 - 0xffff - 0xffffffff - 0xdeadbee2 - 0xcafec0caf22dc0de - 0xffffff - "" -) +TESTS_LOCAL=" + local_sysctl_ioam_id + local_sysctl_ioam_id_wide + local_sysctl_ioam_intf_id + local_sysctl_ioam_intf_id_wide + local_sysctl_ioam_intf_enabled + local_ioam_namespace + local_ioam_schema + local_ioam_schema_namespace + local_route_ns + local_route_tunsrc + local_route_tundst + local_route_trace_type + local_route_trace_size + local_route_trace_type_bits + local_route_trace_size_values +" TESTS_OUTPUT=" - out_undef_ns - out_no_room - out_bits - out_full_supp_trace + output_undef_ns + output_no_room + output_no_room_oss + output_bits + output_sizes + output_full_supp_trace " TESTS_INPUT=" - in_undef_ns - in_no_room - in_oflag - in_bits - in_full_supp_trace + input_undef_ns + input_no_room + input_no_room_oss + input_disabled + input_oflag + input_bits + input_sizes + input_full_supp_trace " -TESTS_GLOBAL=" - fwd_full_supp_trace -" - - ################################################################################ # # # LIBRARY # @@ -194,66 +186,64 @@ TESTS_GLOBAL=" check_kernel_compatibility() { - setup_ns ioam_tmp_node - ip link add name veth0 netns $ioam_tmp_node type veth \ - peer name veth1 netns $ioam_tmp_node + setup_ns ioam_tmp_node &>/dev/null + local ret=$? - ip -netns $ioam_tmp_node link set veth0 up - ip -netns $ioam_tmp_node link set veth1 up + ip link add name veth0 netns $ioam_tmp_node type veth \ + peer name veth1 netns $ioam_tmp_node &>/dev/null + ret=$((ret + $?)) - ip -netns $ioam_tmp_node ioam namespace add 0 - ns_ad=$? + ip -netns $ioam_tmp_node link set veth0 up &>/dev/null + ret=$((ret + $?)) - ip -netns $ioam_tmp_node ioam namespace show | grep -q "namespace 0" - ns_sh=$? + ip -netns $ioam_tmp_node link set veth1 up &>/dev/null + ret=$((ret + $?)) - if [[ $ns_ad != 0 || $ns_sh != 0 ]] + if [ $ret != 0 ] then - echo "SKIP: kernel version probably too old, missing ioam support" - ip link del veth0 2>/dev/null || true - cleanup_ns $ioam_tmp_node || true + echo "SKIP: Setup failed." + cleanup_ns $ioam_tmp_node exit $ksft_skip fi - ip -netns $ioam_tmp_node route add db02::/64 encap ioam6 mode inline \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 - tr_ad=$? + ip -netns $ioam_tmp_node route add 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 0 size 4 dev veth0 &>/dev/null + ret=$? - ip -netns $ioam_tmp_node -6 route | grep -q "encap ioam6" - tr_sh=$? + ip -netns $ioam_tmp_node -6 route 2>/dev/null | grep -q "encap ioam6" + ret=$((ret + $?)) - if [[ $tr_ad != 0 || $tr_sh != 0 ]] + if [ $ret != 0 ] then - echo "SKIP: cannot attach an ioam trace to a route, did you compile" \ - "without CONFIG_IPV6_IOAM6_LWTUNNEL?" - ip link del veth0 2>/dev/null || true - cleanup_ns $ioam_tmp_node || true + echo "SKIP: Cannot attach an IOAM trace to a route. Was your kernel" \ + "compiled without CONFIG_IPV6_IOAM6_LWTUNNEL? Are you running an" \ + "old kernel? Are you using an old version of iproute2?" + cleanup_ns $ioam_tmp_node exit $ksft_skip fi - ip link del veth0 2>/dev/null || true - cleanup_ns $ioam_tmp_node || true + cleanup_ns $ioam_tmp_node - lsmod | grep -q "ip6_tunnel" + lsmod 2>/dev/null | grep -q "ip6_tunnel" ip6tnl_loaded=$? - if [ $ip6tnl_loaded = 0 ] + if [ $ip6tnl_loaded == 0 ] then encap_tests=0 else modprobe ip6_tunnel &>/dev/null - lsmod | grep -q "ip6_tunnel" + lsmod 2>/dev/null | grep -q "ip6_tunnel" encap_tests=$? if [ $encap_tests != 0 ] then - ip a | grep -q "ip6tnl0" + ip a 2>/dev/null | grep -q "ip6tnl0" encap_tests=$? if [ $encap_tests != 0 ] then echo "Note: ip6_tunnel not found neither as a module nor inside the" \ - "kernel, tests that require it (encap mode) will be omitted" + "kernel. Any tests that require it will be skipped." fi fi fi @@ -261,477 +251,1400 @@ check_kernel_compatibility() cleanup() { - ip link del ioam-veth-alpha 2>/dev/null || true - ip link del ioam-veth-gamma 2>/dev/null || true - - cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma || true + cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma if [ $ip6tnl_loaded != 0 ] then - modprobe -r ip6_tunnel 2>/dev/null || true + modprobe -r ip6_tunnel &>/dev/null fi } setup() { - setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma + setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma &>/dev/null ip link add name ioam-veth-alpha netns $ioam_node_alpha type veth \ - peer name ioam-veth-betaL netns $ioam_node_beta + peer name ioam-veth-betaL netns $ioam_node_beta &>/dev/null ip link add name ioam-veth-betaR netns $ioam_node_beta type veth \ - peer name ioam-veth-gamma netns $ioam_node_gamma - - ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 - ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 - ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 - ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 - - ip -netns $ioam_node_alpha addr add db01::2/64 dev veth0 - ip -netns $ioam_node_alpha link set veth0 up - ip -netns $ioam_node_alpha link set lo up - ip -netns $ioam_node_alpha route add db02::/64 via db01::1 dev veth0 - ip -netns $ioam_node_alpha route del db01::/64 - ip -netns $ioam_node_alpha route add db01::/64 dev veth0 - - ip -netns $ioam_node_beta addr add db01::1/64 dev veth0 - ip -netns $ioam_node_beta addr add db02::1/64 dev veth1 - ip -netns $ioam_node_beta link set veth0 up - ip -netns $ioam_node_beta link set veth1 up - ip -netns $ioam_node_beta link set lo up - - ip -netns $ioam_node_gamma addr add db02::2/64 dev veth0 - ip -netns $ioam_node_gamma link set veth0 up - ip -netns $ioam_node_gamma link set lo up - ip -netns $ioam_node_gamma route add db01::/64 via db02::1 dev veth0 - - # - IOAM config - - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} - ip -netns $ioam_node_alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns $ioam_node_alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" - ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} - - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.all.forwarding=1 - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} - ip -netns $ioam_node_beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]} - ip -netns $ioam_node_beta ioam schema add ${BETA[8]} "${BETA[9]}" - ip -netns $ioam_node_beta ioam namespace set 123 schema ${BETA[8]} - - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} - ip -netns $ioam_node_gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]} + peer name ioam-veth-gamma netns $ioam_node_gamma &>/dev/null + + ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 &>/dev/null + ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 &>/dev/null + ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null + ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null + + ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null + ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null + ip -netns $ioam_node_alpha link set veth0 up &>/dev/null + ip -netns $ioam_node_alpha link set lo up &>/dev/null + ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + ip -netns $ioam_node_beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null + ip -netns $ioam_node_beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null + ip -netns $ioam_node_beta link set veth0 up &>/dev/null + ip -netns $ioam_node_beta link set veth1 up &>/dev/null + ip -netns $ioam_node_beta link set lo up &>/dev/null + + ip -netns $ioam_node_gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null + ip -netns $ioam_node_gamma link set veth0 up &>/dev/null + ip -netns $ioam_node_gamma link set lo up &>/dev/null + ip -netns $ioam_node_gamma route add 2001:db8:1::/64 \ + via 2001:db8:2::1 dev veth0 &>/dev/null + + # - Alpha: IOAM config - + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} &>/dev/null + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} &>/dev/null + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} &>/dev/null + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} &>/dev/null + ip -netns $ioam_node_alpha \ + ioam schema add ${ALPHA[8]} "${ALPHA[9]}" &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace set 123 schema ${ALPHA[8]} &>/dev/null + + # - Beta: IOAM config - + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.ioam6_id=${BETA[0]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} &>/dev/null + ip -netns $ioam_node_beta ioam namespace add 123 &>/dev/null sleep 1 - ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null if [ $? != 0 ] then - echo "Setup FAILED" - cleanup &>/dev/null - exit 0 + echo "SKIP: Setup failed." + cleanup + exit $ksft_skip fi } log_test_passed() { - local desc=$1 - printf "TEST: %-60s [ OK ]\n" "${desc}" + printf " - TEST: %-57s [ OK ]\n" "$1" + npassed=$((npassed+1)) } -log_test_failed() +log_test_skipped() { - local desc=$1 - printf "TEST: %-60s [FAIL]\n" "${desc}" + printf " - TEST: %-57s [SKIP]\n" "$1" + nskipped=$((nskipped+1)) } -log_results() +log_test_failed() { - echo "- Tests passed: ${npassed}" - echo "- Tests failed: ${nfailed}" + printf " - TEST: %-57s [FAIL]\n" "$1" + nfailed=$((nfailed+1)) } run_test() { local name=$1 local desc=$2 - local node_src=$3 - local node_dst=$4 - local ip6_dst=$5 - local trace_type=$6 - local ioam_ns=$7 - local type=$8 - - ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type & + local ip6_src=$3 + local trace_type=$4 + local trace_size=$5 + local ioam_ns=$6 + local type=$7 + + ip netns exec $ioam_node_gamma \ + ./ioam6_parser veth0 $name $ip6_src 2001:db8:2::2 \ + $trace_type $trace_size $ioam_ns $type & local spid=$! sleep 0.1 - ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null + ip netns exec $ioam_node_alpha ping6 -t 64 -c 1 -W 1 2001:db8:2::2 &>/dev/null if [ $? != 0 ] then - nfailed=$((nfailed+1)) log_test_failed "${desc}" kill -2 $spid &>/dev/null else wait $spid - if [ $? = 0 ] - then - npassed=$((npassed+1)) - log_test_passed "${desc}" - else - nfailed=$((nfailed+1)) - log_test_failed "${desc}" - fi + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" fi } run() { + local test + + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" + echo + printf "| %-28s LOCAL tests %-29s |" echo - printf "%0.s-" {1..74} + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - echo "OUTPUT tests" - printf "%0.s-" {1..74} + + echo + echo "Global config" + for test in $TESTS_LOCAL + do + $test + done + + echo + echo "Inline mode" + for test in $TESTS_LOCAL + do + $test "inline" + done + + echo + echo "Encap mode" + for test in $TESTS_LOCAL + do + $test "encap" + done + + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" + echo + printf "| %-28s OUTPUT tests %-28s |" + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" echo # set OUTPUT settings - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 &>/dev/null - for t in $TESTS_OUTPUT + echo + echo "Inline mode" + for test in $TESTS_OUTPUT do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" + $test "inline" done - # clean OUTPUT settings - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip -netns $ioam_node_alpha route change db01::/64 dev veth0 + echo + echo "Encap mode" + for test in $TESTS_OUTPUT + do + $test "encap" + done + echo + echo "Encap mode (with tunsrc)" + for test in $TESTS_OUTPUT + do + $test "encap" "tunsrc" + done + + # clean OUTPUT settings + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null echo - printf "%0.s-" {1..74} + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - echo "INPUT tests" - printf "%0.s-" {1..74} + printf "| %-28s INPUT tests %-29s |" + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" echo # set INPUT settings - ip -netns $ioam_node_alpha ioam namespace del 123 + ip -netns $ioam_node_alpha ioam namespace del 123 &>/dev/null - for t in $TESTS_INPUT + echo + echo "Inline mode" + for test in $TESTS_INPUT do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" + $test "inline" + done + + echo + echo "Encap mode" + for test in $TESTS_INPUT + do + $test "encap" done # clean INPUT settings - ip -netns $ioam_node_alpha ioam namespace add 123 \ - data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} - ip -netns $ioam_node_alpha route change db01::/64 dev veth0 + ip -netns $ioam_node_alpha \ + ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace set 123 schema ${ALPHA[8]} &>/dev/null echo - printf "%0.s-" {1..74} + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - echo "GLOBAL tests" - printf "%0.s-" {1..74} + printf "| %-30s Results %-31s |" + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - for t in $TESTS_GLOBAL - do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" - done - echo - log_results + echo "- Passed: ${npassed}" + echo "- Skipped: ${nskipped}" + echo "- Failed: ${nfailed}" + echo } bit2type=( 0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000 0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100 - 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002 + 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002 0x000001 ) -bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 ) +bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 0 ) ################################################################################ # # -# OUTPUT tests # +# LOCAL tests # # # -# Two nodes (sender/receiver), IOAM disabled on ingress for the receiver. # ################################################################################ -out_undef_ns() +local_sysctl_ioam_id() +{ + ############################################################################## + # Make sure the sysctl "net.ipv6.ioam6_id" works as expected. # + ############################################################################## + local desc="Sysctl net.ipv6.ioam6_id" + + [ ! -z $1 ] && return + + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.ioam6_id 2>/dev/null | grep -wq ${ALPHA[0]} + + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_sysctl_ioam_id_wide() { ############################################################################## - # Make sure that the encap node won't fill the trace if the chosen IOAM # - # namespace is not configured locally. # + # Make sure the sysctl "net.ipv6.ioam6_id_wide" works as expected. # ############################################################################## - local desc="Unknown IOAM namespace" + local desc="Sysctl net.ipv6.ioam6_id_wide" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + [ ! -z $1 ] && return - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.ioam6_id_wide 2>/dev/null | grep -wq ${ALPHA[1]} - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0x800000 0 $1 + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down +local_sysctl_ioam_intf_id() +{ + ############################################################################## + # Make sure the sysctl "net.ipv6.conf.XX.ioam6_id" works as expected. # + ############################################################################## + local desc="Sysctl net.ipv6.conf.XX.ioam6_id" + + [ ! -z $1 ] && return + + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.conf.veth0.ioam6_id 2>/dev/null | grep -wq ${ALPHA[4]} + + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" } -out_no_room() +local_sysctl_ioam_intf_id_wide() { ############################################################################## - # Make sure that the encap node won't fill the trace and will set the # - # Overflow flag since there is no room enough for its data. # + # Make sure the sysctl "net.ipv6.conf.XX.ioam6_id_wide" works as expected. # ############################################################################## - local desc="Missing trace room" + local desc="Sysctl net.ipv6.conf.XX.ioam6_id_wide" + + [ ! -z $1 ] && return + + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.conf.veth0.ioam6_id_wide 2>/dev/null | grep -wq ${ALPHA[5]} + + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up +local_sysctl_ioam_intf_enabled() +{ + ############################################################################## + # Make sure the sysctl "net.ipv6.conf.XX.ioam6_enabled" works as expected. # + ############################################################################## + local desc="Sysctl net.ipv6.conf.XX.ioam6_enabled" - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + [ ! -z $1 ] && return - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xc00000 123 $1 + ip netns exec $ioam_node_beta \ + sysctl net.ipv6.conf.veth0.ioam6_enabled 2>/dev/null | grep -wq 1 - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" } -out_bits() +local_ioam_namespace() { ############################################################################## - # Make sure that, for each trace type bit, the encap node will either: # - # (i) fill the trace with its data when it is a supported bit # - # (ii) not fill the trace with its data when it is an unsupported bit # + # Make sure the creation of an IOAM Namespace works as expected. # ############################################################################## - local desc="Trace type with bit <n> only" + local desc="Create an IOAM Namespace" - local tmp=${bit2size[22]} - bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + [ ! -z $1 ] && return - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq 123 + local ret=$? - for i in {0..22} - do - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ - dev veth0 &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq ${ALPHA[6]} + ret=$((ret + $?)) - local cmd_res=$? - local descr="${desc/<n>/$i}" + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq ${ALPHA[7]} + ret=$((ret + $?)) + + [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_ioam_schema() +{ + ############################################################################## + # Make sure the creation of an IOAM Schema works as expected. # + ############################################################################## + local desc="Create an IOAM Schema" + + [ ! -z $1 ] && return + + ip -netns $ioam_node_alpha \ + ioam schema show 2>/dev/null | grep -wq ${ALPHA[8]} + local ret=$? + + local sc_data=$( + for i in `seq 0 $((${#ALPHA[9]}-1))` + do + chr=${ALPHA[9]:i:1} + printf "%x " "'${chr}" + done + ) + + ip -netns $ioam_node_alpha \ + ioam schema show 2>/dev/null | grep -q "$sc_data" + ret=$((ret + $?)) + + [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_ioam_schema_namespace() +{ + ############################################################################## + # Make sure the binding of a Schema to a Namespace works as expected. # + ############################################################################## + local desc="Bind an IOAM Schema to an IOAM Namespace" + + [ ! -z $1 ] && return + + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq ${ALPHA[8]} + local ret=$? + + ip -netns $ioam_node_alpha \ + ioam schema show 2>/dev/null | grep -wq 123 + ret=$((ret + $?)) + + [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_route_ns() +{ + ############################################################################## + # Make sure the Namespace-ID is always provided, whatever the mode. # + ############################################################################## + local desc="Mandatory Namespace-ID" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_tunsrc() +{ + ############################################################################## + # Make sure the Tunnel Source is only (and possibly) used with encap mode. # + ############################################################################## + local desc + local mode + local mode_tunsrc - if [[ $i -ge 12 && $i -le 21 ]] + [ -z $1 ] && return + + if [ "$1" == "encap" ] + then + desc="Optional Tunnel Source" + mode="$1 tundst 2001:db8:2::2" + mode_tunsrc="$1 tunsrc 2001:db8:1::50 tundst 2001:db8:2::2" + else + desc="Unneeded Tunnel Source" + mode="$1" + mode_tunsrc="$1 tunsrc 2001:db8:1::50" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode_tunsrc trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + if [ "$1" == "encap" ] + then + [[ $ret1 != 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + else + [[ $ret1 != 0 || $ret2 == 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_tundst() +{ + ############################################################################## + # Make sure the Tunnel Destination is only (and always) used with encap mode.# + ############################################################################## + local desc + + [ -z $1 ] && return + + [ "$1" == "encap" ] && desc="Mandatory Tunnel Destination" \ + || desc="Unneeded Tunnel Destination" + + local mode="$1" + local mode_tundst="$1 tundst 2001:db8:2::2" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode_tundst trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + if [ "$1" == "encap" ] + then + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + else + [[ $ret1 != 0 || $ret2 == 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_trace_type() +{ + ############################################################################## + # Make sure the Trace Type is always provided, whatever the mode. # + ############################################################################## + local desc="Mandatory Trace Type" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_trace_size() +{ + ############################################################################## + # Make sure the Trace Size is always provided, whatever the mode. # + ############################################################################## + local desc="Mandatory Trace Size" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_trace_type_bits() +{ + ############################################################################## + # Make sure only allowed bits (0-11 and 22) are accepted. # + ############################################################################## + local desc="Trace Type bits" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + local i + for i in {0..23} + do + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type ${bit2type[$i]} ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [[ ($? == 0 && (($i -ge 12 && $i -le 21) || $i == 23)) || + ($? != 0 && (($i -ge 0 && $i -le 11) || $i == 22)) ]] then - if [ $cmd_res != 0 ] - then - npassed=$((npassed+1)) - log_test_passed "$descr ($1 mode)" - else - nfailed=$((nfailed+1)) - log_test_failed "$descr ($1 mode)" - fi - else - run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 + local err=1 + break fi done - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + [ -z $err ] && log_test_passed "${desc}" || log_test_failed "${desc}" - bit2size[22]=$tmp + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null } -out_full_supp_trace() +local_route_trace_size_values() { ############################################################################## - # Make sure that the encap node will correctly fill a full trace. Be careful,# - # "full trace" here does NOT mean all bits (only supported ones). # + # Make sure only allowed sizes (multiples of four in [4,244]) are accepted. # ############################################################################## - local desc="Full supported trace" + local desc="Trace Size values" + local mode - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + [ -z $1 ] && return - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 100 dev veth0 + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xfff002 123 $1 + # we also try the next multiple of four after the MAX to check it's refused + local i + for i in {0..248} + do + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size $i \ + via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + if [[ ($? == 0 && ($i == 0 || $i == 248 || $(( $i % 4 )) != 0)) || + ($? != 0 && $i != 0 && $i != 248 && $(( $i % 4 )) == 0) ]] + then + local err=1 + break + fi + done + + [ -z $err ] && log_test_passed "${desc}" || log_test_failed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null } ################################################################################ # # -# INPUT tests # +# OUTPUT tests # # # -# Two nodes (sender/receiver), the sender MUST NOT fill the trace upon # -# insertion -> the IOAM namespace configured on the sender is removed # -# and is used in the inserted trace to force the sender not to fill it. # ################################################################################ -in_undef_ns() +output_undef_ns() { ############################################################################## - # Make sure that the receiving node won't fill the trace if the related IOAM # - # namespace is not configured locally. # + # Make sure an IOAM encapsulating node does NOT fill the trace when the # + # corresponding IOAM Namespace-ID is not configured locally. # ############################################################################## - local desc="Unknown IOAM namespace" + local desc="Unknown IOAM Namespace-ID" + local ns=0 + local tr_type=0x800000 + local tr_size=4 + local mode="$1" + local saddr="2001:db8:1::2" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0x800000 0 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } -in_no_room() +output_no_room() { ############################################################################## - # Make sure that the receiving node won't fill the trace and will set the # - # Overflow flag if there is no room enough for its data. # + # Make sure an IOAM encapsulating node does NOT fill the trace AND sets the # + # Overflow flag when there is not enough room for its data. # ############################################################################## - local desc="Missing trace room" + local desc="Missing room for data" + local ns=123 + local tr_type=0xc00000 + local tr_size=4 + local mode="$1" + local saddr="2001:db8:1::2" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xc00000 123 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } -in_bits() +output_no_room_oss() { ############################################################################## - # Make sure that, for each trace type bit, the receiving node will either: # - # (i) fill the trace with its data when it is a supported bit # - # (ii) not fill the trace with its data when it is an unsupported bit # + # Make sure an IOAM encapsulating node does NOT fill the trace AND sets the # + # Overflow flag when there is not enough room for the Opaque State Snapshot. # ############################################################################## - local desc="Trace type with bit <n> only" + local desc="Missing room for Opaque State Snapshot" + local ns=123 + local tr_type=0x000002 + local tr_size=4 + local mode="$1" + local saddr="2001:db8:1::2" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi - local tmp=${bit2size[22]} - bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +output_bits() +{ + ############################################################################## + # Make sure an IOAM encapsulating node implements all supported bits by # + # checking it correctly fills the trace with its data. # + ############################################################################## + local desc="Trace Type with supported bit <n> only" + local ns=123 + local mode="$1" + local saddr="2001:db8:1::2" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + if [ "$1" == "encap" ] + then + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + local tmp=${bit2size[22]} + bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + local i for i in {0..11} {22..22} do - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ - dev veth0 + local descr="${desc/<n>/$i}" + + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi - run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc \ + type ${bit2type[$i]} ns $ns size ${bit2size[$i]} \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test "output_bit$i" "${descr}" $saddr \ + ${bit2type[$i]} ${bit2size[$i]} $ns $1 + else + log_test_failed "${descr}" + fi done - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null bit2size[22]=$tmp } -in_oflag() +output_sizes() { ############################################################################## - # Make sure that the receiving node won't fill the trace since the Overflow # - # flag is set. # + # Make sure an IOAM encapsulating node allocates supported sizes correctly. # ############################################################################## - local desc="Overflow flag is set" + local desc="Trace Size of <n> bytes" + local ns=0 + local tr_type=0x800000 + local mode="$1" + local saddr="2001:db8:1::2" - # Exception: - # Here, we need the sender to set the Overflow flag. For that, we will add - # back the IOAM namespace that was previously configured on the sender. - ip -netns $ioam_node_alpha ioam namespace add 123 + if [ "$1" == "encap" ] + then + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + local i + for i in $(seq 4 4 244) + do + local descr="${desc/<n>/$i}" - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xc00000 123 $1 + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $i \ + via 2001:db8:1::1 dev veth0 &>/dev/null - # And we clean the exception for this test to get things back to normal for - # other INPUT tests - ip -netns $ioam_node_alpha ioam namespace del 123 + if [ $? == 0 ] + then + run_test "output_size$i" "${descr}" $saddr $tr_type $i $ns $1 + else + log_test_failed "${descr}" + fi + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } -in_full_supp_trace() +output_full_supp_trace() { ############################################################################## - # Make sure that the receiving node will correctly fill a full trace. Be # - # careful, "full trace" here does NOT mean all bits (only supported ones). # + # Make sure an IOAM encapsulating node correctly fills a trace when all # + # supported bits are set. # ############################################################################## local desc="Full supported trace" + local ns=123 + local tr_type=0xfff002 + local tr_size + local mode="$1" + local saddr="2001:db8:1::2" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 80 dev veth0 + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xfff002 123 $1 + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + local i + tr_size=$(( ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + for i in {0..11} {22..22} + do + tr_size=$((tr_size + bit2size[$i])) + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } ################################################################################ # # -# GLOBAL tests # +# INPUT tests # # # -# Three nodes (sender/router/receiver), IOAM fully enabled on every node. # ################################################################################ -fwd_full_supp_trace() +input_undef_ns() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace when the corresponding IOAM # + # Namespace-ID is not configured locally. # + ############################################################################## + local desc="Unknown IOAM Namespace-ID" + local ns=0 + local tr_type=0x800000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_no_room() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace AND sets the Overflow flag # + # when there is not enough room for its data. # + ############################################################################## + local desc="Missing room for data" + local ns=123 + local tr_type=0xc00000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_no_room_oss() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace AND sets the Overflow flag # + # when there is not enough room for the Opaque State Snapshot. # + ############################################################################## + local desc="Missing room for Opaque State Snapshot" + local ns=123 + local tr_type=0x000002 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_disabled() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace when IOAM is not enabled on # + # the corresponding (ingress) interface. # + ############################################################################## + local desc="IOAM disabled on ingress interface" + local ns=123 + local tr_type=0x800000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + # Exception: disable IOAM on ingress interface + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 &>/dev/null + local ret=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + ret=$((ret + $?)) + + if [ $ret == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + # Clean Exception + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_oflag() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace when the Overflow flag is # + # set. # + ############################################################################## + local desc="Overflow flag is set" + local ns=123 + local tr_type=0xc00000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + # Exception: + # Here, we need the sender to set the Overflow flag. For that, we will add + # back the IOAM namespace that was previously configured on the sender. + ip -netns $ioam_node_alpha ioam namespace add 123 &>/dev/null + local ret=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + ret=$((ret + $?)) + + if [ $ret == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + # Clean Exception + ip -netns $ioam_node_alpha ioam namespace del 123 &>/dev/null + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_bits() +{ + ############################################################################## + # Make sure an IOAM node implements all supported bits by checking it # + # correctly fills the trace with its data. # + ############################################################################## + local desc="Trace Type with supported bit <n> only" + local ns=123 + local mode="$1" + + if [ "$1" == "encap" ] + then + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + local tmp=${bit2size[22]} + bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + + local i + for i in {0..11} {22..22} + do + local descr="${desc/<n>/$i}" + + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc \ + type ${bit2type[$i]} ns $ns size ${bit2size[$i]} \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test "input_bit$i" "${descr}" 2001:db8:1::2 \ + ${bit2type[$i]} ${bit2size[$i]} $ns $1 + else + log_test_failed "${descr}" + fi + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null + + bit2size[22]=$tmp +} + +input_sizes() { ############################################################################## - # Make sure that all three nodes correctly filled the full supported trace # - # by checking that the trace data is consistent with the predefined config. # + # Make sure an IOAM node handles all supported sizes correctly. # ############################################################################## - local desc="Forward - Full supported trace" + local desc="Trace Size of <n> bytes" + local ns=123 + local tr_type=0x800000 + local mode="$1" + + if [ "$1" == "encap" ] + then + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 up + local i + for i in $(seq 4 4 244) + do + local descr="${desc/<n>/$i}" - ip -netns $ioam_node_alpha route change db02::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ - db02::2 0xfff002 123 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $i \ + via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down + if [ $? == 0 ] + then + run_test "input_size$i" "${descr}" 2001:db8:1::2 $tr_type $i $ns $1 + else + log_test_failed "${descr}" + fi + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_full_supp_trace() +{ + ############################################################################## + # Make sure an IOAM node correctly fills a trace when all supported bits are # + # set. # + ############################################################################## + local desc="Full supported trace" + local ns=123 + local tr_type=0xfff002 + local tr_size + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + local i + tr_size=$(( ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + for i in {0..11} {22..22} + do + tr_size=$((tr_size + bit2size[$i])) + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } @@ -742,30 +1655,29 @@ fwd_full_supp_trace() ################################################################################ npassed=0 +nskipped=0 nfailed=0 if [ "$(id -u)" -ne 0 ] then - echo "SKIP: Need root privileges" + echo "SKIP: Need root privileges." exit $ksft_skip fi if [ ! -x "$(command -v ip)" ] then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -ip ioam &>/dev/null -if [ $? = 1 ] -then - echo "SKIP: iproute2 too old, missing ioam command" + echo "SKIP: Could not run test without ip tool." exit $ksft_skip fi check_kernel_compatibility - -cleanup &>/dev/null setup run -cleanup &>/dev/null +cleanup + +if [ $nfailed != 0 ] +then + exit $ksft_fail +fi + +exit $ksft_pass diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index 895e5bb5044b..de4b5c9e8a74 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -8,8 +8,10 @@ #include <errno.h> #include <limits.h> #include <linux/const.h> +#include <linux/if_ether.h> #include <linux/ioam6.h> #include <linux/ipv6.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -40,7 +42,7 @@ static struct ioam_config node1 = { .egr_id = 101, .ingr_wide = 0xffffffff, /* default value */ .egr_wide = 101101, - .ns_data = 0xdeadbee0, + .ns_data = 0xdeadbeef, .ns_wide = 0xcafec0caf00dc0de, .sc_id = 777, .sc_data = "something that will be 4n-aligned", @@ -54,33 +56,22 @@ static struct ioam_config node2 = { .egr_id = 202, .ingr_wide = 201201, .egr_wide = 202202, - .ns_data = 0xdeadbee1, - .ns_wide = 0xcafec0caf11dc0de, - .sc_id = 666, - .sc_data = "Hello there -Obi", - .hlim = 63, -}; - -static struct ioam_config node3 = { - .id = 3, - .wide = 33333333, - .ingr_id = 301, - .egr_id = 0xffff, /* default value */ - .ingr_wide = 301301, - .egr_wide = 0xffffffff, /* default value */ - .ns_data = 0xdeadbee2, - .ns_wide = 0xcafec0caf22dc0de, + .ns_data = 0xffffffff, /* default value */ + .ns_wide = 0xffffffffffffffff, /* default value */ .sc_id = 0xffffff, /* default value */ .sc_data = NULL, - .hlim = 62, + .hlim = 63, }; enum { /********** * OUTPUT * **********/ + __TEST_OUT_MIN, + TEST_OUT_UNDEF_NS, TEST_OUT_NO_ROOM, + TEST_OUT_NO_ROOM_OSS, TEST_OUT_BIT0, TEST_OUT_BIT1, TEST_OUT_BIT2, @@ -94,13 +85,80 @@ enum { TEST_OUT_BIT10, TEST_OUT_BIT11, TEST_OUT_BIT22, + TEST_OUT_SIZE4, + TEST_OUT_SIZE8, + TEST_OUT_SIZE12, + TEST_OUT_SIZE16, + TEST_OUT_SIZE20, + TEST_OUT_SIZE24, + TEST_OUT_SIZE28, + TEST_OUT_SIZE32, + TEST_OUT_SIZE36, + TEST_OUT_SIZE40, + TEST_OUT_SIZE44, + TEST_OUT_SIZE48, + TEST_OUT_SIZE52, + TEST_OUT_SIZE56, + TEST_OUT_SIZE60, + TEST_OUT_SIZE64, + TEST_OUT_SIZE68, + TEST_OUT_SIZE72, + TEST_OUT_SIZE76, + TEST_OUT_SIZE80, + TEST_OUT_SIZE84, + TEST_OUT_SIZE88, + TEST_OUT_SIZE92, + TEST_OUT_SIZE96, + TEST_OUT_SIZE100, + TEST_OUT_SIZE104, + TEST_OUT_SIZE108, + TEST_OUT_SIZE112, + TEST_OUT_SIZE116, + TEST_OUT_SIZE120, + TEST_OUT_SIZE124, + TEST_OUT_SIZE128, + TEST_OUT_SIZE132, + TEST_OUT_SIZE136, + TEST_OUT_SIZE140, + TEST_OUT_SIZE144, + TEST_OUT_SIZE148, + TEST_OUT_SIZE152, + TEST_OUT_SIZE156, + TEST_OUT_SIZE160, + TEST_OUT_SIZE164, + TEST_OUT_SIZE168, + TEST_OUT_SIZE172, + TEST_OUT_SIZE176, + TEST_OUT_SIZE180, + TEST_OUT_SIZE184, + TEST_OUT_SIZE188, + TEST_OUT_SIZE192, + TEST_OUT_SIZE196, + TEST_OUT_SIZE200, + TEST_OUT_SIZE204, + TEST_OUT_SIZE208, + TEST_OUT_SIZE212, + TEST_OUT_SIZE216, + TEST_OUT_SIZE220, + TEST_OUT_SIZE224, + TEST_OUT_SIZE228, + TEST_OUT_SIZE232, + TEST_OUT_SIZE236, + TEST_OUT_SIZE240, + TEST_OUT_SIZE244, TEST_OUT_FULL_SUPP_TRACE, + __TEST_OUT_MAX, + /********* * INPUT * *********/ + __TEST_IN_MIN, + TEST_IN_UNDEF_NS, TEST_IN_NO_ROOM, + TEST_IN_NO_ROOM_OSS, + TEST_IN_DISABLED, TEST_IN_OFLAG, TEST_IN_BIT0, TEST_IN_BIT1, @@ -115,36 +173,107 @@ enum { TEST_IN_BIT10, TEST_IN_BIT11, TEST_IN_BIT22, + TEST_IN_SIZE4, + TEST_IN_SIZE8, + TEST_IN_SIZE12, + TEST_IN_SIZE16, + TEST_IN_SIZE20, + TEST_IN_SIZE24, + TEST_IN_SIZE28, + TEST_IN_SIZE32, + TEST_IN_SIZE36, + TEST_IN_SIZE40, + TEST_IN_SIZE44, + TEST_IN_SIZE48, + TEST_IN_SIZE52, + TEST_IN_SIZE56, + TEST_IN_SIZE60, + TEST_IN_SIZE64, + TEST_IN_SIZE68, + TEST_IN_SIZE72, + TEST_IN_SIZE76, + TEST_IN_SIZE80, + TEST_IN_SIZE84, + TEST_IN_SIZE88, + TEST_IN_SIZE92, + TEST_IN_SIZE96, + TEST_IN_SIZE100, + TEST_IN_SIZE104, + TEST_IN_SIZE108, + TEST_IN_SIZE112, + TEST_IN_SIZE116, + TEST_IN_SIZE120, + TEST_IN_SIZE124, + TEST_IN_SIZE128, + TEST_IN_SIZE132, + TEST_IN_SIZE136, + TEST_IN_SIZE140, + TEST_IN_SIZE144, + TEST_IN_SIZE148, + TEST_IN_SIZE152, + TEST_IN_SIZE156, + TEST_IN_SIZE160, + TEST_IN_SIZE164, + TEST_IN_SIZE168, + TEST_IN_SIZE172, + TEST_IN_SIZE176, + TEST_IN_SIZE180, + TEST_IN_SIZE184, + TEST_IN_SIZE188, + TEST_IN_SIZE192, + TEST_IN_SIZE196, + TEST_IN_SIZE200, + TEST_IN_SIZE204, + TEST_IN_SIZE208, + TEST_IN_SIZE212, + TEST_IN_SIZE216, + TEST_IN_SIZE220, + TEST_IN_SIZE224, + TEST_IN_SIZE228, + TEST_IN_SIZE232, + TEST_IN_SIZE236, + TEST_IN_SIZE240, + TEST_IN_SIZE244, TEST_IN_FULL_SUPP_TRACE, - /********** - * GLOBAL * - **********/ - TEST_FWD_FULL_SUPP_TRACE, + __TEST_IN_MAX, __TEST_MAX, }; -static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, - __u32 trace_type, __u16 ioam_ns) +static int check_header(int tid, struct ioam6_trace_hdr *trace, + __u32 trace_type, __u8 trace_size, __u16 ioam_ns) { - if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns || - __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8)) + if (__be16_to_cpu(trace->namespace_id) != ioam_ns || + __be32_to_cpu(trace->type_be32) != (trace_type << 8)) return 1; switch (tid) { case TEST_OUT_UNDEF_NS: case TEST_IN_UNDEF_NS: - return ioam6h->overflow || - ioam6h->nodelen != 1 || - ioam6h->remlen != 1; + case TEST_IN_DISABLED: + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != 1; case TEST_OUT_NO_ROOM: case TEST_IN_NO_ROOM: case TEST_IN_OFLAG: - return !ioam6h->overflow || - ioam6h->nodelen != 2 || - ioam6h->remlen != 1; + return trace->overflow == 0 || + trace->nodelen != 2 || + trace->remlen != 1; + + case TEST_OUT_NO_ROOM_OSS: + return trace->overflow == 0 || + trace->nodelen != 0 || + trace->remlen != 1; + + case TEST_IN_NO_ROOM_OSS: + case TEST_OUT_BIT22: + case TEST_IN_BIT22: + return trace->overflow == 1 || + trace->nodelen != 0 || + trace->remlen != 0; case TEST_OUT_BIT0: case TEST_IN_BIT0: @@ -164,9 +293,9 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, case TEST_IN_BIT7: case TEST_OUT_BIT11: case TEST_IN_BIT11: - return ioam6h->overflow || - ioam6h->nodelen != 1 || - ioam6h->remlen; + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != 0; case TEST_OUT_BIT8: case TEST_IN_BIT8: @@ -174,22 +303,145 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, case TEST_IN_BIT9: case TEST_OUT_BIT10: case TEST_IN_BIT10: - return ioam6h->overflow || - ioam6h->nodelen != 2 || - ioam6h->remlen; - - case TEST_OUT_BIT22: - case TEST_IN_BIT22: - return ioam6h->overflow || - ioam6h->nodelen || - ioam6h->remlen; + return trace->overflow == 1 || + trace->nodelen != 2 || + trace->remlen != 0; + + case TEST_OUT_SIZE4: + case TEST_OUT_SIZE8: + case TEST_OUT_SIZE12: + case TEST_OUT_SIZE16: + case TEST_OUT_SIZE20: + case TEST_OUT_SIZE24: + case TEST_OUT_SIZE28: + case TEST_OUT_SIZE32: + case TEST_OUT_SIZE36: + case TEST_OUT_SIZE40: + case TEST_OUT_SIZE44: + case TEST_OUT_SIZE48: + case TEST_OUT_SIZE52: + case TEST_OUT_SIZE56: + case TEST_OUT_SIZE60: + case TEST_OUT_SIZE64: + case TEST_OUT_SIZE68: + case TEST_OUT_SIZE72: + case TEST_OUT_SIZE76: + case TEST_OUT_SIZE80: + case TEST_OUT_SIZE84: + case TEST_OUT_SIZE88: + case TEST_OUT_SIZE92: + case TEST_OUT_SIZE96: + case TEST_OUT_SIZE100: + case TEST_OUT_SIZE104: + case TEST_OUT_SIZE108: + case TEST_OUT_SIZE112: + case TEST_OUT_SIZE116: + case TEST_OUT_SIZE120: + case TEST_OUT_SIZE124: + case TEST_OUT_SIZE128: + case TEST_OUT_SIZE132: + case TEST_OUT_SIZE136: + case TEST_OUT_SIZE140: + case TEST_OUT_SIZE144: + case TEST_OUT_SIZE148: + case TEST_OUT_SIZE152: + case TEST_OUT_SIZE156: + case TEST_OUT_SIZE160: + case TEST_OUT_SIZE164: + case TEST_OUT_SIZE168: + case TEST_OUT_SIZE172: + case TEST_OUT_SIZE176: + case TEST_OUT_SIZE180: + case TEST_OUT_SIZE184: + case TEST_OUT_SIZE188: + case TEST_OUT_SIZE192: + case TEST_OUT_SIZE196: + case TEST_OUT_SIZE200: + case TEST_OUT_SIZE204: + case TEST_OUT_SIZE208: + case TEST_OUT_SIZE212: + case TEST_OUT_SIZE216: + case TEST_OUT_SIZE220: + case TEST_OUT_SIZE224: + case TEST_OUT_SIZE228: + case TEST_OUT_SIZE232: + case TEST_OUT_SIZE236: + case TEST_OUT_SIZE240: + case TEST_OUT_SIZE244: + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != trace_size / 4; + + case TEST_IN_SIZE4: + case TEST_IN_SIZE8: + case TEST_IN_SIZE12: + case TEST_IN_SIZE16: + case TEST_IN_SIZE20: + case TEST_IN_SIZE24: + case TEST_IN_SIZE28: + case TEST_IN_SIZE32: + case TEST_IN_SIZE36: + case TEST_IN_SIZE40: + case TEST_IN_SIZE44: + case TEST_IN_SIZE48: + case TEST_IN_SIZE52: + case TEST_IN_SIZE56: + case TEST_IN_SIZE60: + case TEST_IN_SIZE64: + case TEST_IN_SIZE68: + case TEST_IN_SIZE72: + case TEST_IN_SIZE76: + case TEST_IN_SIZE80: + case TEST_IN_SIZE84: + case TEST_IN_SIZE88: + case TEST_IN_SIZE92: + case TEST_IN_SIZE96: + case TEST_IN_SIZE100: + case TEST_IN_SIZE104: + case TEST_IN_SIZE108: + case TEST_IN_SIZE112: + case TEST_IN_SIZE116: + case TEST_IN_SIZE120: + case TEST_IN_SIZE124: + case TEST_IN_SIZE128: + case TEST_IN_SIZE132: + case TEST_IN_SIZE136: + case TEST_IN_SIZE140: + case TEST_IN_SIZE144: + case TEST_IN_SIZE148: + case TEST_IN_SIZE152: + case TEST_IN_SIZE156: + case TEST_IN_SIZE160: + case TEST_IN_SIZE164: + case TEST_IN_SIZE168: + case TEST_IN_SIZE172: + case TEST_IN_SIZE176: + case TEST_IN_SIZE180: + case TEST_IN_SIZE184: + case TEST_IN_SIZE188: + case TEST_IN_SIZE192: + case TEST_IN_SIZE196: + case TEST_IN_SIZE200: + case TEST_IN_SIZE204: + case TEST_IN_SIZE208: + case TEST_IN_SIZE212: + case TEST_IN_SIZE216: + case TEST_IN_SIZE220: + case TEST_IN_SIZE224: + case TEST_IN_SIZE228: + case TEST_IN_SIZE232: + case TEST_IN_SIZE236: + case TEST_IN_SIZE240: + case TEST_IN_SIZE244: + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != (trace_size / 4) - trace->nodelen; case TEST_OUT_FULL_SUPP_TRACE: case TEST_IN_FULL_SUPP_TRACE: - case TEST_FWD_FULL_SUPP_TRACE: - return ioam6h->overflow || - ioam6h->nodelen != 15 || - ioam6h->remlen; + return trace->overflow == 1 || + trace->nodelen != 15 || + trace->remlen != 0; default: break; @@ -198,167 +450,137 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, return 1; } -static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, - const struct ioam_config cnf) +static int check_data(struct ioam6_trace_hdr *trace, __u8 trace_size, + const struct ioam_config cnf, bool is_output) { - unsigned int len; + unsigned int len, i; __u8 aligned; __u64 raw64; __u32 raw32; + __u8 *p; - if (ioam6h->type.bit0) { - raw32 = __be32_to_cpu(*((__u32 *)*p)); - if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff)) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit1) { - raw32 = __be32_to_cpu(*((__u32 *)*p)); - if (cnf.ingr_id != (raw32 >> 16) || - cnf.egr_id != (raw32 & 0xffff)) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit2) - *p += sizeof(__u32); - - if (ioam6h->type.bit3) - *p += sizeof(__u32); - - if (ioam6h->type.bit4) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit5) { - if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit6) - *p += sizeof(__u32); + if (trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | + trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | + trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | + trace->type.bit21 | trace->type.bit23) + return 1; - if (ioam6h->type.bit7) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + for (i = 0; i < trace->remlen * 4; i++) { + if (trace->data[i] != 0) return 1; - *p += sizeof(__u32); } - if (ioam6h->type.bit8) { - raw64 = __be64_to_cpu(*((__u64 *)*p)); - if (cnf.hlim != (raw64 >> 56) || - cnf.wide != (raw64 & 0xffffffffffffff)) - return 1; - *p += sizeof(__u64); - } + if (trace->remlen * 4 == trace_size) + return 0; - if (ioam6h->type.bit9) { - if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide) - return 1; - *p += sizeof(__u32); + p = trace->data + trace->remlen * 4; - if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide) + if (trace->type.bit0) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff)) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit10) { - if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide) + if (trace->type.bit1) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if (cnf.ingr_id != (raw32 >> 16) || + cnf.egr_id != (raw32 & 0xffff)) return 1; - *p += sizeof(__u64); + p += sizeof(__u32); } - if (ioam6h->type.bit11) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit2) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if ((is_output && raw32 != 0xffffffff) || + (!is_output && (raw32 == 0 || raw32 == 0xffffffff))) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit12) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit3) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if ((is_output && raw32 != 0xffffffff) || + (!is_output && (raw32 == 0 || raw32 == 0xffffffff))) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit13) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit4) { + if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit14) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit5) { + if (__be32_to_cpu(*((__u32 *)p)) != cnf.ns_data) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit15) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit6) { + if (__be32_to_cpu(*((__u32 *)p)) == 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit16) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit7) { + if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit17) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit8) { + raw64 = __be64_to_cpu(*((__u64 *)p)); + if (cnf.hlim != (raw64 >> 56) || + cnf.wide != (raw64 & 0xffffffffffffff)) return 1; - *p += sizeof(__u32); + p += sizeof(__u64); } - if (ioam6h->type.bit18) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit9) { + if (__be32_to_cpu(*((__u32 *)p)) != cnf.ingr_wide) return 1; - *p += sizeof(__u32); - } + p += sizeof(__u32); - if (ioam6h->type.bit19) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (__be32_to_cpu(*((__u32 *)p)) != cnf.egr_wide) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit20) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit10) { + if (__be64_to_cpu(*((__u64 *)p)) != cnf.ns_wide) return 1; - *p += sizeof(__u32); + p += sizeof(__u64); } - if (ioam6h->type.bit21) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit11) { + if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit22) { + if (trace->type.bit22) { len = cnf.sc_data ? strlen(cnf.sc_data) : 0; aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0; - raw32 = __be32_to_cpu(*((__u32 *)*p)); + raw32 = __be32_to_cpu(*((__u32 *)p)); if (aligned != (raw32 >> 24) * 4 || cnf.sc_id != (raw32 & 0xffffff)) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); if (cnf.sc_data) { - if (strncmp((char *)*p, cnf.sc_data, len)) + if (strncmp((char *)p, cnf.sc_data, len)) return 1; - *p += len; + p += len; aligned -= len; while (aligned--) { - if (**p != '\0') + if (*p != '\0') return 1; - *p += sizeof(__u8); + p += sizeof(__u8); } } } @@ -366,151 +588,351 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, return 0; } -static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h, - __u32 trace_type, __u16 ioam_ns) +static int check_ioam_trace(int tid, struct ioam6_trace_hdr *trace, + __u32 trace_type, __u8 trace_size, __u16 ioam_ns) { - __u8 *p; - - if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns)) + if (check_header(tid, trace, trace_type, trace_size, ioam_ns)) return 1; - p = ioam6h->data + ioam6h->remlen * 4; - - switch (tid) { - case TEST_OUT_BIT0: - case TEST_OUT_BIT1: - case TEST_OUT_BIT2: - case TEST_OUT_BIT3: - case TEST_OUT_BIT4: - case TEST_OUT_BIT5: - case TEST_OUT_BIT6: - case TEST_OUT_BIT7: - case TEST_OUT_BIT8: - case TEST_OUT_BIT9: - case TEST_OUT_BIT10: - case TEST_OUT_BIT11: - case TEST_OUT_BIT22: - case TEST_OUT_FULL_SUPP_TRACE: - return check_ioam6_data(&p, ioam6h, node1); - - case TEST_IN_BIT0: - case TEST_IN_BIT1: - case TEST_IN_BIT2: - case TEST_IN_BIT3: - case TEST_IN_BIT4: - case TEST_IN_BIT5: - case TEST_IN_BIT6: - case TEST_IN_BIT7: - case TEST_IN_BIT8: - case TEST_IN_BIT9: - case TEST_IN_BIT10: - case TEST_IN_BIT11: - case TEST_IN_BIT22: - case TEST_IN_FULL_SUPP_TRACE: - { - __u32 tmp32 = node2.egr_wide; - __u16 tmp16 = node2.egr_id; - int res; - - node2.egr_id = 0xffff; - node2.egr_wide = 0xffffffff; + if (tid > __TEST_OUT_MIN && tid < __TEST_OUT_MAX) + return check_data(trace, trace_size, node1, true); - res = check_ioam6_data(&p, ioam6h, node2); - - node2.egr_id = tmp16; - node2.egr_wide = tmp32; - - return res; - } - - case TEST_FWD_FULL_SUPP_TRACE: - if (check_ioam6_data(&p, ioam6h, node3)) - return 1; - if (check_ioam6_data(&p, ioam6h, node2)) - return 1; - return check_ioam6_data(&p, ioam6h, node1); - - default: - break; - } + if (tid > __TEST_IN_MIN && tid < __TEST_IN_MAX) + return check_data(trace, trace_size, node2, false); return 1; } static int str2id(const char *tname) { - if (!strcmp("out_undef_ns", tname)) + if (!strcmp("output_undef_ns", tname)) return TEST_OUT_UNDEF_NS; - if (!strcmp("out_no_room", tname)) + if (!strcmp("output_no_room", tname)) return TEST_OUT_NO_ROOM; - if (!strcmp("out_bit0", tname)) + if (!strcmp("output_no_room_oss", tname)) + return TEST_OUT_NO_ROOM_OSS; + if (!strcmp("output_bit0", tname)) return TEST_OUT_BIT0; - if (!strcmp("out_bit1", tname)) + if (!strcmp("output_bit1", tname)) return TEST_OUT_BIT1; - if (!strcmp("out_bit2", tname)) + if (!strcmp("output_bit2", tname)) return TEST_OUT_BIT2; - if (!strcmp("out_bit3", tname)) + if (!strcmp("output_bit3", tname)) return TEST_OUT_BIT3; - if (!strcmp("out_bit4", tname)) + if (!strcmp("output_bit4", tname)) return TEST_OUT_BIT4; - if (!strcmp("out_bit5", tname)) + if (!strcmp("output_bit5", tname)) return TEST_OUT_BIT5; - if (!strcmp("out_bit6", tname)) + if (!strcmp("output_bit6", tname)) return TEST_OUT_BIT6; - if (!strcmp("out_bit7", tname)) + if (!strcmp("output_bit7", tname)) return TEST_OUT_BIT7; - if (!strcmp("out_bit8", tname)) + if (!strcmp("output_bit8", tname)) return TEST_OUT_BIT8; - if (!strcmp("out_bit9", tname)) + if (!strcmp("output_bit9", tname)) return TEST_OUT_BIT9; - if (!strcmp("out_bit10", tname)) + if (!strcmp("output_bit10", tname)) return TEST_OUT_BIT10; - if (!strcmp("out_bit11", tname)) + if (!strcmp("output_bit11", tname)) return TEST_OUT_BIT11; - if (!strcmp("out_bit22", tname)) + if (!strcmp("output_bit22", tname)) return TEST_OUT_BIT22; - if (!strcmp("out_full_supp_trace", tname)) + if (!strcmp("output_size4", tname)) + return TEST_OUT_SIZE4; + if (!strcmp("output_size8", tname)) + return TEST_OUT_SIZE8; + if (!strcmp("output_size12", tname)) + return TEST_OUT_SIZE12; + if (!strcmp("output_size16", tname)) + return TEST_OUT_SIZE16; + if (!strcmp("output_size20", tname)) + return TEST_OUT_SIZE20; + if (!strcmp("output_size24", tname)) + return TEST_OUT_SIZE24; + if (!strcmp("output_size28", tname)) + return TEST_OUT_SIZE28; + if (!strcmp("output_size32", tname)) + return TEST_OUT_SIZE32; + if (!strcmp("output_size36", tname)) + return TEST_OUT_SIZE36; + if (!strcmp("output_size40", tname)) + return TEST_OUT_SIZE40; + if (!strcmp("output_size44", tname)) + return TEST_OUT_SIZE44; + if (!strcmp("output_size48", tname)) + return TEST_OUT_SIZE48; + if (!strcmp("output_size52", tname)) + return TEST_OUT_SIZE52; + if (!strcmp("output_size56", tname)) + return TEST_OUT_SIZE56; + if (!strcmp("output_size60", tname)) + return TEST_OUT_SIZE60; + if (!strcmp("output_size64", tname)) + return TEST_OUT_SIZE64; + if (!strcmp("output_size68", tname)) + return TEST_OUT_SIZE68; + if (!strcmp("output_size72", tname)) + return TEST_OUT_SIZE72; + if (!strcmp("output_size76", tname)) + return TEST_OUT_SIZE76; + if (!strcmp("output_size80", tname)) + return TEST_OUT_SIZE80; + if (!strcmp("output_size84", tname)) + return TEST_OUT_SIZE84; + if (!strcmp("output_size88", tname)) + return TEST_OUT_SIZE88; + if (!strcmp("output_size92", tname)) + return TEST_OUT_SIZE92; + if (!strcmp("output_size96", tname)) + return TEST_OUT_SIZE96; + if (!strcmp("output_size100", tname)) + return TEST_OUT_SIZE100; + if (!strcmp("output_size104", tname)) + return TEST_OUT_SIZE104; + if (!strcmp("output_size108", tname)) + return TEST_OUT_SIZE108; + if (!strcmp("output_size112", tname)) + return TEST_OUT_SIZE112; + if (!strcmp("output_size116", tname)) + return TEST_OUT_SIZE116; + if (!strcmp("output_size120", tname)) + return TEST_OUT_SIZE120; + if (!strcmp("output_size124", tname)) + return TEST_OUT_SIZE124; + if (!strcmp("output_size128", tname)) + return TEST_OUT_SIZE128; + if (!strcmp("output_size132", tname)) + return TEST_OUT_SIZE132; + if (!strcmp("output_size136", tname)) + return TEST_OUT_SIZE136; + if (!strcmp("output_size140", tname)) + return TEST_OUT_SIZE140; + if (!strcmp("output_size144", tname)) + return TEST_OUT_SIZE144; + if (!strcmp("output_size148", tname)) + return TEST_OUT_SIZE148; + if (!strcmp("output_size152", tname)) + return TEST_OUT_SIZE152; + if (!strcmp("output_size156", tname)) + return TEST_OUT_SIZE156; + if (!strcmp("output_size160", tname)) + return TEST_OUT_SIZE160; + if (!strcmp("output_size164", tname)) + return TEST_OUT_SIZE164; + if (!strcmp("output_size168", tname)) + return TEST_OUT_SIZE168; + if (!strcmp("output_size172", tname)) + return TEST_OUT_SIZE172; + if (!strcmp("output_size176", tname)) + return TEST_OUT_SIZE176; + if (!strcmp("output_size180", tname)) + return TEST_OUT_SIZE180; + if (!strcmp("output_size184", tname)) + return TEST_OUT_SIZE184; + if (!strcmp("output_size188", tname)) + return TEST_OUT_SIZE188; + if (!strcmp("output_size192", tname)) + return TEST_OUT_SIZE192; + if (!strcmp("output_size196", tname)) + return TEST_OUT_SIZE196; + if (!strcmp("output_size200", tname)) + return TEST_OUT_SIZE200; + if (!strcmp("output_size204", tname)) + return TEST_OUT_SIZE204; + if (!strcmp("output_size208", tname)) + return TEST_OUT_SIZE208; + if (!strcmp("output_size212", tname)) + return TEST_OUT_SIZE212; + if (!strcmp("output_size216", tname)) + return TEST_OUT_SIZE216; + if (!strcmp("output_size220", tname)) + return TEST_OUT_SIZE220; + if (!strcmp("output_size224", tname)) + return TEST_OUT_SIZE224; + if (!strcmp("output_size228", tname)) + return TEST_OUT_SIZE228; + if (!strcmp("output_size232", tname)) + return TEST_OUT_SIZE232; + if (!strcmp("output_size236", tname)) + return TEST_OUT_SIZE236; + if (!strcmp("output_size240", tname)) + return TEST_OUT_SIZE240; + if (!strcmp("output_size244", tname)) + return TEST_OUT_SIZE244; + if (!strcmp("output_full_supp_trace", tname)) return TEST_OUT_FULL_SUPP_TRACE; - if (!strcmp("in_undef_ns", tname)) + if (!strcmp("input_undef_ns", tname)) return TEST_IN_UNDEF_NS; - if (!strcmp("in_no_room", tname)) + if (!strcmp("input_no_room", tname)) return TEST_IN_NO_ROOM; - if (!strcmp("in_oflag", tname)) + if (!strcmp("input_no_room_oss", tname)) + return TEST_IN_NO_ROOM_OSS; + if (!strcmp("input_disabled", tname)) + return TEST_IN_DISABLED; + if (!strcmp("input_oflag", tname)) return TEST_IN_OFLAG; - if (!strcmp("in_bit0", tname)) + if (!strcmp("input_bit0", tname)) return TEST_IN_BIT0; - if (!strcmp("in_bit1", tname)) + if (!strcmp("input_bit1", tname)) return TEST_IN_BIT1; - if (!strcmp("in_bit2", tname)) + if (!strcmp("input_bit2", tname)) return TEST_IN_BIT2; - if (!strcmp("in_bit3", tname)) + if (!strcmp("input_bit3", tname)) return TEST_IN_BIT3; - if (!strcmp("in_bit4", tname)) + if (!strcmp("input_bit4", tname)) return TEST_IN_BIT4; - if (!strcmp("in_bit5", tname)) + if (!strcmp("input_bit5", tname)) return TEST_IN_BIT5; - if (!strcmp("in_bit6", tname)) + if (!strcmp("input_bit6", tname)) return TEST_IN_BIT6; - if (!strcmp("in_bit7", tname)) + if (!strcmp("input_bit7", tname)) return TEST_IN_BIT7; - if (!strcmp("in_bit8", tname)) + if (!strcmp("input_bit8", tname)) return TEST_IN_BIT8; - if (!strcmp("in_bit9", tname)) + if (!strcmp("input_bit9", tname)) return TEST_IN_BIT9; - if (!strcmp("in_bit10", tname)) + if (!strcmp("input_bit10", tname)) return TEST_IN_BIT10; - if (!strcmp("in_bit11", tname)) + if (!strcmp("input_bit11", tname)) return TEST_IN_BIT11; - if (!strcmp("in_bit22", tname)) + if (!strcmp("input_bit22", tname)) return TEST_IN_BIT22; - if (!strcmp("in_full_supp_trace", tname)) + if (!strcmp("input_size4", tname)) + return TEST_IN_SIZE4; + if (!strcmp("input_size8", tname)) + return TEST_IN_SIZE8; + if (!strcmp("input_size12", tname)) + return TEST_IN_SIZE12; + if (!strcmp("input_size16", tname)) + return TEST_IN_SIZE16; + if (!strcmp("input_size20", tname)) + return TEST_IN_SIZE20; + if (!strcmp("input_size24", tname)) + return TEST_IN_SIZE24; + if (!strcmp("input_size28", tname)) + return TEST_IN_SIZE28; + if (!strcmp("input_size32", tname)) + return TEST_IN_SIZE32; + if (!strcmp("input_size36", tname)) + return TEST_IN_SIZE36; + if (!strcmp("input_size40", tname)) + return TEST_IN_SIZE40; + if (!strcmp("input_size44", tname)) + return TEST_IN_SIZE44; + if (!strcmp("input_size48", tname)) + return TEST_IN_SIZE48; + if (!strcmp("input_size52", tname)) + return TEST_IN_SIZE52; + if (!strcmp("input_size56", tname)) + return TEST_IN_SIZE56; + if (!strcmp("input_size60", tname)) + return TEST_IN_SIZE60; + if (!strcmp("input_size64", tname)) + return TEST_IN_SIZE64; + if (!strcmp("input_size68", tname)) + return TEST_IN_SIZE68; + if (!strcmp("input_size72", tname)) + return TEST_IN_SIZE72; + if (!strcmp("input_size76", tname)) + return TEST_IN_SIZE76; + if (!strcmp("input_size80", tname)) + return TEST_IN_SIZE80; + if (!strcmp("input_size84", tname)) + return TEST_IN_SIZE84; + if (!strcmp("input_size88", tname)) + return TEST_IN_SIZE88; + if (!strcmp("input_size92", tname)) + return TEST_IN_SIZE92; + if (!strcmp("input_size96", tname)) + return TEST_IN_SIZE96; + if (!strcmp("input_size100", tname)) + return TEST_IN_SIZE100; + if (!strcmp("input_size104", tname)) + return TEST_IN_SIZE104; + if (!strcmp("input_size108", tname)) + return TEST_IN_SIZE108; + if (!strcmp("input_size112", tname)) + return TEST_IN_SIZE112; + if (!strcmp("input_size116", tname)) + return TEST_IN_SIZE116; + if (!strcmp("input_size120", tname)) + return TEST_IN_SIZE120; + if (!strcmp("input_size124", tname)) + return TEST_IN_SIZE124; + if (!strcmp("input_size128", tname)) + return TEST_IN_SIZE128; + if (!strcmp("input_size132", tname)) + return TEST_IN_SIZE132; + if (!strcmp("input_size136", tname)) + return TEST_IN_SIZE136; + if (!strcmp("input_size140", tname)) + return TEST_IN_SIZE140; + if (!strcmp("input_size144", tname)) + return TEST_IN_SIZE144; + if (!strcmp("input_size148", tname)) + return TEST_IN_SIZE148; + if (!strcmp("input_size152", tname)) + return TEST_IN_SIZE152; + if (!strcmp("input_size156", tname)) + return TEST_IN_SIZE156; + if (!strcmp("input_size160", tname)) + return TEST_IN_SIZE160; + if (!strcmp("input_size164", tname)) + return TEST_IN_SIZE164; + if (!strcmp("input_size168", tname)) + return TEST_IN_SIZE168; + if (!strcmp("input_size172", tname)) + return TEST_IN_SIZE172; + if (!strcmp("input_size176", tname)) + return TEST_IN_SIZE176; + if (!strcmp("input_size180", tname)) + return TEST_IN_SIZE180; + if (!strcmp("input_size184", tname)) + return TEST_IN_SIZE184; + if (!strcmp("input_size188", tname)) + return TEST_IN_SIZE188; + if (!strcmp("input_size192", tname)) + return TEST_IN_SIZE192; + if (!strcmp("input_size196", tname)) + return TEST_IN_SIZE196; + if (!strcmp("input_size200", tname)) + return TEST_IN_SIZE200; + if (!strcmp("input_size204", tname)) + return TEST_IN_SIZE204; + if (!strcmp("input_size208", tname)) + return TEST_IN_SIZE208; + if (!strcmp("input_size212", tname)) + return TEST_IN_SIZE212; + if (!strcmp("input_size216", tname)) + return TEST_IN_SIZE216; + if (!strcmp("input_size220", tname)) + return TEST_IN_SIZE220; + if (!strcmp("input_size224", tname)) + return TEST_IN_SIZE224; + if (!strcmp("input_size228", tname)) + return TEST_IN_SIZE228; + if (!strcmp("input_size232", tname)) + return TEST_IN_SIZE232; + if (!strcmp("input_size236", tname)) + return TEST_IN_SIZE236; + if (!strcmp("input_size240", tname)) + return TEST_IN_SIZE240; + if (!strcmp("input_size244", tname)) + return TEST_IN_SIZE244; + if (!strcmp("input_full_supp_trace", tname)) return TEST_IN_FULL_SUPP_TRACE; - if (!strcmp("fwd_full_supp_trace", tname)) - return TEST_FWD_FULL_SUPP_TRACE; return -1; } +static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) +{ + return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | + (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | + (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | + (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; +} + static int get_u32(__u32 *val, const char *arg, int base) { unsigned long res; @@ -555,119 +977,124 @@ static int get_u16(__u16 *val, const char *arg, int base) return 0; } -static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { - [TEST_OUT_UNDEF_NS] = check_ioam_header, - [TEST_OUT_NO_ROOM] = check_ioam_header, - [TEST_OUT_BIT0] = check_ioam_header_and_data, - [TEST_OUT_BIT1] = check_ioam_header_and_data, - [TEST_OUT_BIT2] = check_ioam_header_and_data, - [TEST_OUT_BIT3] = check_ioam_header_and_data, - [TEST_OUT_BIT4] = check_ioam_header_and_data, - [TEST_OUT_BIT5] = check_ioam_header_and_data, - [TEST_OUT_BIT6] = check_ioam_header_and_data, - [TEST_OUT_BIT7] = check_ioam_header_and_data, - [TEST_OUT_BIT8] = check_ioam_header_and_data, - [TEST_OUT_BIT9] = check_ioam_header_and_data, - [TEST_OUT_BIT10] = check_ioam_header_and_data, - [TEST_OUT_BIT11] = check_ioam_header_and_data, - [TEST_OUT_BIT22] = check_ioam_header_and_data, - [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data, - [TEST_IN_UNDEF_NS] = check_ioam_header, - [TEST_IN_NO_ROOM] = check_ioam_header, - [TEST_IN_OFLAG] = check_ioam_header, - [TEST_IN_BIT0] = check_ioam_header_and_data, - [TEST_IN_BIT1] = check_ioam_header_and_data, - [TEST_IN_BIT2] = check_ioam_header_and_data, - [TEST_IN_BIT3] = check_ioam_header_and_data, - [TEST_IN_BIT4] = check_ioam_header_and_data, - [TEST_IN_BIT5] = check_ioam_header_and_data, - [TEST_IN_BIT6] = check_ioam_header_and_data, - [TEST_IN_BIT7] = check_ioam_header_and_data, - [TEST_IN_BIT8] = check_ioam_header_and_data, - [TEST_IN_BIT9] = check_ioam_header_and_data, - [TEST_IN_BIT10] = check_ioam_header_and_data, - [TEST_IN_BIT11] = check_ioam_header_and_data, - [TEST_IN_BIT22] = check_ioam_header_and_data, - [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data, - [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data, -}; +static int get_u8(__u8 *val, const char *arg, int base) +{ + unsigned long res; + char *ptr; + + if (!arg || !*arg) + return -1; + res = strtoul(arg, &ptr, base); + + if (!ptr || ptr == arg || *ptr) + return -1; + + if (res == ULONG_MAX && errno == ERANGE) + return -1; + + if (res > 0xFFUL) + return -1; + + *val = res; + return 0; +} int main(int argc, char **argv) { - int fd, size, hoplen, tid, ret = 1, on = 1; - struct ioam6_hdr *opt; - struct cmsghdr *cmsg; - struct msghdr msg; - struct iovec iov; - __u8 buffer[512]; + __u8 buffer[512], *ptr, nexthdr, tr_size; + struct ioam6_trace_hdr *trace; + unsigned int hoplen, ret = 1; + struct ipv6_hopopt_hdr *hbh; + int fd, size, testname_id; + struct in6_addr src, dst; + struct ioam6_hdr *ioam6; + struct timeval timeout; + struct ipv6hdr *ipv6; __u32 tr_type; __u16 ioam_ns; - __u8 *ptr; - if (argc != 5) + if (argc != 9) goto out; - tid = str2id(argv[1]); - if (tid < 0 || !func[tid]) - goto out; + testname_id = str2id(argv[2]); - if (get_u32(&tr_type, argv[2], 16) || - get_u16(&ioam_ns, argv[3], 0)) + if (testname_id < 0 || + inet_pton(AF_INET6, argv[3], &src) != 1 || + inet_pton(AF_INET6, argv[4], &dst) != 1 || + get_u32(&tr_type, argv[5], 16) || + get_u8(&tr_size, argv[6], 0) || + get_u16(&ioam_ns, argv[7], 0)) goto out; - fd = socket(PF_INET6, SOCK_RAW, - !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + nexthdr = (!strcmp(argv[8], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + + hoplen = sizeof(*hbh); + hoplen += 2; // 2-byte padding for alignment + hoplen += sizeof(*ioam6); // IOAM option header + hoplen += sizeof(*trace); // IOAM trace header + hoplen += tr_size; // IOAM trace size + hoplen += (tr_size % 8); // optional padding + + fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); if (fd < 0) goto out; - setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on)); + if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, + argv[1], strlen(argv[1]))) + goto close; - iov.iov_len = 1; - iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer))); - if (!iov.iov_base) + timeout.tv_sec = 1; + timeout.tv_usec = 0; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, + (const char *)&timeout, sizeof(timeout))) goto close; recv: - memset(&msg, 0, sizeof(msg)); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = buffer; - msg.msg_controllen = CMSG_SPACE(sizeof(buffer)); - - size = recvmsg(fd, &msg, 0); + size = recv(fd, buffer, sizeof(buffer), 0); if (size <= 0) goto close; - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level != IPPROTO_IPV6 || - cmsg->cmsg_type != IPV6_HOPOPTS || - cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr)) - continue; + ipv6 = (struct ipv6hdr *)buffer; + + /* Skip packets that do not have the expected src/dst address or that + * do not have a Hop-by-hop. + */ + if (!ipv6_addr_equal(&ipv6->saddr, &src) || + !ipv6_addr_equal(&ipv6->daddr, &dst) || + ipv6->nexthdr != IPPROTO_HOPOPTS) + goto recv; + + /* Check Hbh's Next Header and Size. */ + hbh = (struct ipv6_hopopt_hdr *)(buffer + sizeof(*ipv6)); + if (hbh->nexthdr != nexthdr || hbh->hdrlen != (hoplen >> 3) - 1) + goto close; - ptr = (__u8 *)CMSG_DATA(cmsg); + /* Check we have a 2-byte padding for alignment. */ + ptr = (__u8 *)hbh + sizeof(*hbh); + if (ptr[0] != IPV6_TLV_PADN && ptr[1] != 0) + goto close; - hoplen = (ptr[1] + 1) << 3; - ptr += sizeof(struct ipv6_hopopt_hdr); + /* Check we now have the IOAM option. */ + ptr += 2; + if (ptr[0] != IPV6_TLV_IOAM) + goto close; - while (hoplen > 0) { - opt = (struct ioam6_hdr *)ptr; + /* Check its size and the IOAM option type. */ + ioam6 = (struct ioam6_hdr *)ptr; + if (ioam6->opt_len != sizeof(*ioam6) - 2 + sizeof(*trace) + tr_size || + ioam6->type != IOAM6_TYPE_PREALLOC) + goto close; - if (opt->opt_type == IPV6_TLV_IOAM && - opt->type == IOAM6_TYPE_PREALLOC) { - ptr += sizeof(*opt); - ret = func[tid](tid, - (struct ioam6_trace_hdr *)ptr, - tr_type, ioam_ns); - goto close; - } + trace = (struct ioam6_trace_hdr *)(ptr + sizeof(*ioam6)); - ptr += opt->opt_len + 2; - hoplen -= opt->opt_len + 2; - } - } + /* Check the trailing 4-byte padding (potentially). */ + ptr = (__u8 *)trace + sizeof(*trace) + tr_size; + if (tr_size % 8 && ptr[0] != IPV6_TLV_PADN && ptr[1] != 2 && + ptr[2] != 0 && ptr[3] != 0) + goto close; - goto recv; + /* Check the IOAM header and data. */ + ret = check_ioam_trace(testname_id, trace, tr_type, tr_size, ioam_ns); close: - free(iov.iov_base); close(fd); out: return ret; diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c index 193b82745fd8..29451d2244b7 100644 --- a/tools/testing/selftests/net/ip_local_port_range.c +++ b/tools/testing/selftests/net/ip_local_port_range.c @@ -359,7 +359,7 @@ TEST_F(ip_local_port_range, late_bind) struct sockaddr_in v4; struct sockaddr_in6 v6; } addr; - socklen_t addr_len; + socklen_t addr_len = 0; const int one = 1; int fd, err; __u32 range; diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh index 6c6ad346eaa0..4ff746db1256 100755 --- a/tools/testing/selftests/net/ip_local_port_range.sh +++ b/tools/testing/selftests/net/ip_local_port_range.sh @@ -2,4 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 ./in_netns.sh \ - sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range' + sh -c 'sysctl -q -w net.mptcp.enabled=1 && \ + sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && \ + ./ip_local_port_range' diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index be4a30a0d02a..9b44a091802c 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -227,7 +227,8 @@ static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, attr->rta_len = RTA_LENGTH(size); attr->rta_type = rta_type; - memcpy(RTA_DATA(attr), payload, size); + if (payload) + memcpy(RTA_DATA(attr), payload, size); return 0; } diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh new file mode 100755 index 000000000000..c6866e42f95c --- /dev/null +++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh @@ -0,0 +1,261 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing for potential kernel soft lockup during IPv6 routing table +# refresh under heavy outgoing IPv6 traffic. If a kernel soft lockup +# occurs, a kernel panic will be triggered to prevent associated issues. +# +# +# Test Environment Layout +# +# ┌----------------┐ ┌----------------┐ +# | SOURCE_NS | | SINK_NS | +# | NAMESPACE | | NAMESPACE | +# |(iperf3 clients)| |(iperf3 servers)| +# | | | | +# | | | | +# | ┌-----------| nexthops |---------┐ | +# | |veth_source|<--------------------------------------->|veth_sink|<┐ | +# | └-----------|2001:0DB8:1::0:1/96 2001:0DB8:1::1:1/96 |---------┘ | | +# | | ^ 2001:0DB8:1::1:2/96 | | | +# | | . . | fwd | | +# | ┌---------┐ | . . | | | +# | | IPv6 | | . . | V | +# | | routing | | . 2001:0DB8:1::1:80/96| ┌-----┐ | +# | | table | | . | | lo | | +# | | nexthop | | . └--------┴-----┴-┘ +# | | update | | ............................> 2001:0DB8:2::1:1/128 +# | └-------- ┘ | +# └----------------┘ +# +# The test script sets up two network namespaces, source_ns and sink_ns, +# connected via a veth link. Within source_ns, it continuously updates the +# IPv6 routing table by flushing and inserting IPV6_NEXTHOP_ADDR_COUNT nexthop +# IPs destined for SINK_LOOPBACK_IP_ADDR in sink_ns. This refresh occurs at a +# rate of 1/ROUTING_TABLE_REFRESH_PERIOD per second for TEST_DURATION seconds. +# +# Simultaneously, multiple iperf3 clients within source_ns generate heavy +# outgoing IPv6 traffic. Each client is assigned a unique port number starting +# at 5000 and incrementing sequentially. Each client targets a unique iperf3 +# server running in sink_ns, connected to the SINK_LOOPBACK_IFACE interface +# using the same port number. +# +# The number of iperf3 servers and clients is set to half of the total +# available cores on each machine. +# +# NOTE: We have tested this script on machines with various CPU specifications, +# ranging from lower to higher performance as listed below. The test script +# effectively triggered a kernel soft lockup on machines running an unpatched +# kernel in under a minute: +# +# - 1x Intel Xeon E-2278G 8-Core Processor @ 3.40GHz +# - 1x Intel Xeon E-2378G Processor 8-Core @ 2.80GHz +# - 1x AMD EPYC 7401P 24-Core Processor @ 2.00GHz +# - 1x AMD EPYC 7402P 24-Core Processor @ 2.80GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 1x Ampere Altra Q80-30 80-Core Processor @ 3.00GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 2x Intel Xeon Silver 4214 24-Core Processor @ 2.20GHz +# - 1x AMD EPYC 7502P 32-Core @ 2.50GHz +# - 1x Intel Xeon Gold 6314U 32-Core Processor @ 2.30GHz +# - 2x Intel Xeon Gold 6338 32-Core Processor @ 2.00GHz +# +# On less performant machines, you may need to increase the TEST_DURATION +# parameter to enhance the likelihood of encountering a race condition leading +# to a kernel soft lockup and avoid a false negative result. +# +# NOTE: The test may not produce the expected result in virtualized +# environments (e.g., qemu) due to differences in timing and CPU handling, +# which can affect the conditions needed to trigger a soft lockup. + +source lib.sh + +TEST_DURATION=300 +ROUTING_TABLE_REFRESH_PERIOD=0.01 + +IPERF3_BITRATE="300m" + + +IPV6_NEXTHOP_ADDR_COUNT="128" +IPV6_NEXTHOP_ADDR_MASK="96" +IPV6_NEXTHOP_PREFIX="2001:0DB8:1" + + +SOURCE_TEST_IFACE="veth_source" +SOURCE_TEST_IP_ADDR="2001:0DB8:1::0:1/96" + +SINK_TEST_IFACE="veth_sink" +# ${SINK_TEST_IFACE} is populated with the following range of IPv6 addresses: +# 2001:0DB8:1::1:1 to 2001:0DB8:1::1:${IPV6_NEXTHOP_ADDR_COUNT} +SINK_LOOPBACK_IFACE="lo" +SINK_LOOPBACK_IP_MASK="128" +SINK_LOOPBACK_IP_ADDR="2001:0DB8:2::1:1" + +nexthop_ip_list="" +termination_signal="" +kernel_softlokup_panic_prev_val="" + +terminate_ns_processes_by_pattern() { + local ns=$1 + local pattern=$2 + + for pid in $(ip netns pids ${ns}); do + [ -e /proc/$pid/cmdline ] && grep -qe "${pattern}" /proc/$pid/cmdline && kill -9 $pid + done +} + +cleanup() { + echo "info: cleaning up namespaces and terminating all processes within them..." + + + # Terminate iperf3 instances running in the source_ns. To avoid race + # conditions, first iterate over the PIDs and terminate those + # associated with the bash shells running the + # `while true; do iperf3 -c ...; done` loops. In a second iteration, + # terminate the individual `iperf3 -c ...` instances. + terminate_ns_processes_by_pattern ${source_ns} while + terminate_ns_processes_by_pattern ${source_ns} iperf3 + + # Repeat the same process for sink_ns + terminate_ns_processes_by_pattern ${sink_ns} while + terminate_ns_processes_by_pattern ${sink_ns} iperf3 + + # Check if any iperf3 instances are still running. This could happen + # if a core has entered an infinite loop and the timeout for detecting + # the soft lockup has not expired, but either the test interval has + # already elapsed or the test was terminated manually (e.g., with ^C) + for pid in $(ip netns pids ${source_ns}); do + if [ -e /proc/$pid/cmdline ] && grep -qe 'iperf3' /proc/$pid/cmdline; then + echo "FAIL: unable to terminate some iperf3 instances. Soft lockup is underway. A kernel panic is on the way!" + exit ${ksft_fail} + fi + done + + if [ "$termination_signal" == "SIGINT" ]; then + echo "SKIP: Termination due to ^C (SIGINT)" + elif [ "$termination_signal" == "SIGALRM" ]; then + echo "PASS: No kernel soft lockup occurred during this ${TEST_DURATION} second test" + fi + + cleanup_ns ${source_ns} ${sink_ns} + + sysctl -qw kernel.softlockup_panic=${kernel_softlokup_panic_prev_val} +} + +setup_prepare() { + setup_ns source_ns sink_ns + + ip -n ${source_ns} link add name ${SOURCE_TEST_IFACE} type veth peer name ${SINK_TEST_IFACE} netns ${sink_ns} + + # Setting up the Source namespace + ip -n ${source_ns} addr add ${SOURCE_TEST_IP_ADDR} dev ${SOURCE_TEST_IFACE} + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} qlen 10000 + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} up + ip netns exec ${source_ns} sysctl -qw net.ipv6.fib_multipath_hash_policy=1 + + # Setting up the Sink namespace + ip -n ${sink_ns} addr add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} dev ${SINK_LOOPBACK_IFACE} + ip -n ${sink_ns} link set dev ${SINK_LOOPBACK_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_LOOPBACK_IFACE}.forwarding=1 + + ip -n ${sink_ns} link set ${SINK_TEST_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_TEST_IFACE}.forwarding=1 + + + # Populate nexthop IPv6 addresses on the test interface in the sink_ns + echo "info: populating ${IPV6_NEXTHOP_ADDR_COUNT} IPv6 addresses on the ${SINK_TEST_IFACE} interface ..." + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + ip -n ${sink_ns} addr add ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" "${IP}")/${IPV6_NEXTHOP_ADDR_MASK} dev ${SINK_TEST_IFACE}; + done + + # Preparing list of nexthops + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + nexthop_ip_list=$nexthop_ip_list" nexthop via ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" $IP) dev ${SOURCE_TEST_IFACE} weight 1" + done +} + + +test_soft_lockup_during_routing_table_refresh() { + # Start num_of_iperf_servers iperf3 servers in the sink_ns namespace, + # each listening on ports starting at 5001 and incrementing + # sequentially. Since iperf3 instances may terminate unexpectedly, a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 servers in the sink_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 --bind ${SINK_LOOPBACK_IP_ADDR} -s -p $(printf '5%03d' ${i}) --rcv-timeout 200 &>/dev/null" + ip netns exec ${sink_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + # Wait for the iperf3 servers to be ready + for i in $(seq ${num_of_iperf_servers}); do + port=$(printf '5%03d' ${i}); + wait_local_port_listen ${sink_ns} ${port} tcp + done + + # Continuously refresh the routing table in the background within + # the source_ns namespace + ip netns exec ${source_ns} bash -c " + while \$(ip netns list | grep -q ${source_ns}); do + ip -6 route add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} ${nexthop_ip_list}; + sleep ${ROUTING_TABLE_REFRESH_PERIOD}; + ip -6 route delete ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK}; + done &" + + # Start num_of_iperf_servers iperf3 clients in the source_ns namespace, + # each sending TCP traffic on sequential ports starting at 5001. + # Since iperf3 instances may terminate unexpectedly (e.g., if the route + # to the server is deleted in the background during a route refresh), a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 clients in the source_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 -c ${SINK_LOOPBACK_IP_ADDR} -p $(printf '5%03d' ${i}) --length 64 --bitrate ${IPERF3_BITRATE} -t 0 --connect-timeout 150 &>/dev/null" + ip netns exec ${source_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + echo "info: IPv6 routing table is being updated at the rate of $(echo "1/${ROUTING_TABLE_REFRESH_PERIOD}" | bc)/s for ${TEST_DURATION} seconds ..." + echo "info: A kernel soft lockup, if detected, results in a kernel panic!" + + wait +} + +# Make sure 'iperf3' is installed, skip the test otherwise +if [ ! -x "$(command -v "iperf3")" ]; then + echo "SKIP: 'iperf3' is not installed. Skipping the test." + exit ${ksft_skip} +fi + +# Determine the number of cores on the machine +num_of_iperf_servers=$(( $(nproc)/2 )) + +# Check if we are running on a multi-core machine, skip the test otherwise +if [ "${num_of_iperf_servers}" -eq 0 ]; then + echo "SKIP: This test is not valid on a single core machine!" + exit ${ksft_skip} +fi + +# Since the kernel soft lockup we're testing causes at least one core to enter +# an infinite loop, destabilizing the host and likely affecting subsequent +# tests, we trigger a kernel panic instead of reporting a failure and +# continuing +kernel_softlokup_panic_prev_val=$(sysctl -n kernel.softlockup_panic) +sysctl -qw kernel.softlockup_panic=1 + +handle_sigint() { + termination_signal="SIGINT" + cleanup + exit ${ksft_skip} +} + +handle_sigalrm() { + termination_signal="SIGALRM" + cleanup + exit ${ksft_pass} +} + +trap handle_sigint SIGINT +trap handle_sigalrm SIGALRM + +(sleep ${TEST_DURATION} && kill -s SIGALRM $$)& + +setup_prepare +test_soft_lockup_during_routing_table_refresh diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index f9fe182dfbd4..006fdadcc4b9 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -1,96 +1,618 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +net_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") +source "$net_dir/lib/sh/defer.sh" + ############################################################################## # Defines -WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} +: "${WAIT_TIMEOUT:=20}" + +# Whether to pause on after a failure. +: "${PAUSE_ON_FAIL:=no}" + BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms -# Kselftest framework requirement - SKIP code is 4. +# Kselftest framework constants. +ksft_pass=0 +ksft_fail=1 +ksft_xfail=2 ksft_skip=4 + # namespace list created by setup_ns -NS_LIST="" +NS_LIST=() + +# Exit status to return at the end. Set in case one of the tests fails. +EXIT_STATUS=0 +# Per-test return value. Clear at the beginning of each test. +RET=0 ############################################################################## # Helpers -busywait() + +__ksft_status_merge() { - local timeout=$1; shift + local a=$1; shift + local b=$1; shift + local -A weights + local weight=0 + + local i + for i in "$@"; do + weights[$i]=$((weight++)) + done + + if [[ ${weights[$a]} > ${weights[$b]} ]]; then + echo "$a" + return 0 + else + echo "$b" + return 1 + fi +} + +ksft_status_merge() +{ + local a=$1; shift + local b=$1; shift + + __ksft_status_merge "$a" "$b" \ + $ksft_pass $ksft_xfail $ksft_skip $ksft_fail +} + +ksft_exit_status_merge() +{ + local a=$1; shift + local b=$1; shift + + __ksft_status_merge "$a" "$b" \ + $ksft_xfail $ksft_pass $ksft_skip $ksft_fail +} + +loopy_wait() +{ + local sleep_cmd=$1; shift + local timeout_ms=$1; shift local start_time="$(date -u +%s%3N)" while true do local out - out=$("$@") - local ret=$? - if ((!ret)); then + if out=$("$@"); then echo -n "$out" return 0 fi local current_time="$(date -u +%s%3N)" - if ((current_time - start_time > timeout)); then + if ((current_time - start_time > timeout_ms)); then echo -n "$out" return 1 fi + + $sleep_cmd + done +} + +busywait() +{ + local timeout_ms=$1; shift + + loopy_wait : "$timeout_ms" "$@" +} + +# timeout in seconds +slowwait() +{ + local timeout_sec=$1; shift + + loopy_wait "sleep 0.1" "$((timeout_sec * 1000))" "$@" +} + +until_counter_is() +{ + local expr=$1; shift + local current=$("$@") + + echo $((current)) + ((current $expr)) +} + +busywait_for_counter() +{ + local timeout=$1; shift + local delta=$1; shift + + local base=$("$@") + busywait "$timeout" until_counter_is ">= $((base + delta))" "$@" +} + +slowwait_for_counter() +{ + local timeout=$1; shift + local delta=$1; shift + + local base=$("$@") + slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@" +} + +# Check for existence of tools which are built as part of selftests +# but may also already exist in $PATH +check_gen_prog() +{ + local prog_name=$1; shift + + if ! which $prog_name >/dev/null 2>/dev/null; then + PATH=$PWD:$PATH + if ! which $prog_name >/dev/null; then + echo "'$prog_name' command not found; skipping tests" + exit $ksft_skip + fi + fi +} + +remove_ns_list() +{ + local item=$1 + local ns + local ns_list=("${NS_LIST[@]}") + NS_LIST=() + + for ns in "${ns_list[@]}"; do + if [ "${ns}" != "${item}" ]; then + NS_LIST+=("${ns}") + fi done } cleanup_ns() { local ns="" - local errexit=0 local ret=0 - # disable errexit temporary - if [[ $- =~ "e" ]]; then - errexit=1 - set +e - fi - for ns in "$@"; do - ip netns delete "${ns}" &> /dev/null + [ -z "${ns}" ] && continue + ip netns pids "${ns}" 2> /dev/null | xargs -r kill || true + ip netns delete "${ns}" &> /dev/null || true if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" ret=1 + else + remove_ns_list "${ns}" fi done - [ $errexit -eq 1 ] && set -e return $ret } cleanup_all_ns() { - cleanup_ns $NS_LIST + cleanup_ns "${NS_LIST[@]}" } # setup netns with given names as prefix. e.g # setup_ns local remote setup_ns() { - local ns="" local ns_name="" - local ns_list="" + local ns_list=() for ns_name in "$@"; do + # avoid conflicts with local var: internal error + if [ "${ns_name}" = "ns_name" ]; then + echo "Failed to setup namespace '${ns_name}': invalid name" + cleanup_ns "${ns_list[@]}" + exit $ksft_fail + fi + # Some test may setup/remove same netns multi times - if unset ${ns_name} 2> /dev/null; then - ns="${ns_name,,}-$(mktemp -u XXXXXX)" - eval readonly ${ns_name}="$ns" + if [ -z "${!ns_name}" ]; then + eval "${ns_name}=${ns_name,,}-$(mktemp -u XXXXXX)" else - eval ns='$'${ns_name} - cleanup_ns "$ns" - + cleanup_ns "${!ns_name}" fi - if ! ip netns add "$ns"; then + if ! ip netns add "${!ns_name}"; then echo "Failed to create namespace $ns_name" - cleanup_ns "$ns_list" + cleanup_ns "${ns_list[@]}" return $ksft_skip fi - ip -n "$ns" link set lo up - ns_list="$ns_list $ns" + ip -n "${!ns_name}" link set lo up + ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ns_list+=("${!ns_name}") + done + NS_LIST+=("${ns_list[@]}") +} + +# Create netdevsim with given id and net namespace. +create_netdevsim() { + local id="$1" + local ns="$2" + + modprobe netdevsim &> /dev/null + udevadm settle + + echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null + local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net) + ip -netns $ns link set dev $dev name nsim$id + ip -netns $ns link set dev nsim$id up + + echo nsim$id +} + +# Remove netdevsim with given id. +cleanup_netdevsim() { + local id="$1" + + if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then + echo "$id" > /sys/bus/netdevsim/del_device + fi +} + +tc_rule_stats_get() +{ + local dev=$1; shift + local pref=$1; shift + local dir=${1:-ingress}; shift + local selector=${1:-.packets}; shift + + tc -j -s filter show dev $dev $dir pref $pref \ + | jq ".[1].options.actions[].stats$selector" +} + +tc_rule_handle_stats_get() +{ + local id=$1; shift + local handle=$1; shift + local selector=${1:-.packets}; shift + local netns=${1:-""}; shift + + tc $netns -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats$selector" +} + +# attach a qdisc with two children match/no-match and a flower filter to match +tc_set_flower_counter() { + local -r ns=$1 + local -r ipver=$2 + local -r dev=$3 + local -r flower_expr=$4 + + tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \ + priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo + tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo + + tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \ + flower $flower_expr classid 1:2 +} + +tc_get_flower_counter() { + local -r ns=$1 + local -r dev=$2 + + tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets +} + +ret_set_ksft_status() +{ + local ksft_status=$1; shift + local msg=$1; shift + + RET=$(ksft_status_merge $RET $ksft_status) + if (( $? )); then + retmsg=$msg + fi +} + +log_test_result() +{ + local test_name=$1; shift + local opt_str=$1; shift + local result=$1; shift + local retmsg=$1; shift + + printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" + if [[ $retmsg ]]; then + printf "\t%s\n" "$retmsg" + fi +} + +pause_on_fail() +{ + if [[ $PAUSE_ON_FAIL == yes ]]; then + echo "Hit enter to continue, 'q' to quit" + read a + [[ $a == q ]] && exit 1 + fi +} + +handle_test_result_pass() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" " OK " +} + +handle_test_result_fail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" FAIL "$retmsg" + pause_on_fail +} + +handle_test_result_xfail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" XFAIL "$retmsg" + pause_on_fail +} + +handle_test_result_skip() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" SKIP "$retmsg" +} + +log_test() +{ + local test_name=$1 + local opt_str=$2 + + if [[ $# -eq 2 ]]; then + opt_str="($opt_str)" + fi + + if ((RET == ksft_pass)); then + handle_test_result_pass "$test_name" "$opt_str" + elif ((RET == ksft_xfail)); then + handle_test_result_xfail "$test_name" "$opt_str" + elif ((RET == ksft_skip)); then + handle_test_result_skip "$test_name" "$opt_str" + else + handle_test_result_fail "$test_name" "$opt_str" + fi + + EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET) + return $RET +} + +log_test_skip() +{ + RET=$ksft_skip retmsg= log_test "$@" +} + +log_test_xfail() +{ + RET=$ksft_xfail retmsg= log_test "$@" +} + +log_info() +{ + local msg=$1 + + echo "INFO: $msg" +} + +tests_run() +{ + local current_test + + for current_test in ${TESTS:-$ALL_TESTS}; do + in_defer_scope \ + $current_test + done +} + +# Whether FAILs should be interpreted as XFAILs. Internal. +FAIL_TO_XFAIL= + +check_err() +{ + local err=$1 + local msg=$2 + + if ((err)); then + if [[ $FAIL_TO_XFAIL = yes ]]; then + ret_set_ksft_status $ksft_xfail "$msg" + else + ret_set_ksft_status $ksft_fail "$msg" + fi + fi +} + +check_fail() +{ + local err=$1 + local msg=$2 + + check_err $((!err)) "$msg" +} + +check_err_fail() +{ + local should_fail=$1; shift + local err=$1; shift + local what=$1; shift + + if ((should_fail)); then + check_fail $err "$what succeeded, but should have failed" + else + check_err $err "$what failed" + fi +} + +xfail() +{ + FAIL_TO_XFAIL=yes "$@" +} + +xfail_on_slow() +{ + if [[ $KSFT_MACHINE_SLOW = yes ]]; then + FAIL_TO_XFAIL=yes "$@" + else + "$@" + fi +} + +omit_on_slow() +{ + if [[ $KSFT_MACHINE_SLOW != yes ]]; then + "$@" + fi +} + +xfail_on_veth() +{ + local dev=$1; shift + local kind + + kind=$(ip -j -d link show dev $dev | + jq -r '.[].linkinfo.info_kind') + if [[ $kind = veth ]]; then + FAIL_TO_XFAIL=yes "$@" + else + "$@" + fi +} + +mac_get() +{ + local if_name=$1 + + ip -j link show dev $if_name | jq -r '.[]["address"]' +} + +kill_process() +{ + local pid=$1; shift + + # Suppress noise from killing the process. + { kill $pid && wait $pid; } 2>/dev/null +} + +check_command() +{ + local cmd=$1; shift + + if [[ ! -x "$(command -v "$cmd")" ]]; then + log_test_skip "$cmd not installed" + return $EXIT_STATUS + fi +} + +require_command() +{ + local cmd=$1; shift + + if ! check_command "$cmd"; then + exit $EXIT_STATUS + fi +} + +ip_link_add() +{ + local name=$1; shift + + ip link add name "$name" "$@" + defer ip link del dev "$name" +} + +ip_link_set_master() +{ + local member=$1; shift + local master=$1; shift + + ip link set dev "$member" master "$master" + defer ip link set dev "$member" nomaster +} + +ip_link_set_addr() +{ + local name=$1; shift + local addr=$1; shift + + local old_addr=$(mac_get "$name") + ip link set dev "$name" address "$addr" + defer ip link set dev "$name" address "$old_addr" +} + +ip_link_is_up() +{ + local name=$1; shift + + local state=$(ip -j link show "$name" | + jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"') + [[ $state == "UP" ]] +} + +ip_link_set_up() +{ + local name=$1; shift + + if ! ip_link_is_up "$name"; then + ip link set dev "$name" up + defer ip link set dev "$name" down + fi +} + +ip_link_set_down() +{ + local name=$1; shift + + if ip_link_is_up "$name"; then + ip link set dev "$name" down + defer ip link set dev "$name" up + fi +} + +ip_addr_add() +{ + local name=$1; shift + + ip addr add dev "$name" "$@" + defer ip addr del dev "$name" "$@" +} + +ip_route_add() +{ + ip route add "$@" + defer ip route del "$@" +} + +bridge_vlan_add() +{ + bridge vlan add "$@" + defer bridge vlan del "$@" +} + +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + local protocol="${3}" + local pattern + local i + + pattern=":$(printf "%04X" "${port}") " + + # for tcp protocol additionally check the socket state + [ ${protocol} = "tcp" ] && pattern="${pattern}0A" + for i in $(seq 10); do + if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \ + /proc/net/"${protocol}"* | grep -q "${pattern}"; then + break + fi + sleep 0.1 done - NS_LIST="$NS_LIST $ns_list" } diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore new file mode 100644 index 000000000000..bbc97d6bf556 --- /dev/null +++ b/tools/testing/selftests/net/lib/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +csum +xdp_helper diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile new file mode 100644 index 000000000000..88c4bc461459 --- /dev/null +++ b/tools/testing/selftests/net/lib/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES) +# Additional include paths needed by kselftest.h +CFLAGS += -I../../ + +TEST_FILES := ../../../../../Documentation/netlink/specs +TEST_FILES += ../../../../net/ynl + +TEST_GEN_FILES += csum +TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) +TEST_GEN_FILES += xdp_helper + +TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) + +include ../../lib.mk + +include ../bpf.mk diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/lib/csum.c index 90eb06fefa59..27437590eeb5 100644 --- a/tools/testing/selftests/net/csum.c +++ b/tools/testing/selftests/net/lib/csum.c @@ -654,10 +654,16 @@ static int recv_verify_packet_ipv4(void *nh, int len) { struct iphdr *iph = nh; uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + uint16_t ip_len; if (len < sizeof(*iph) || iph->protocol != proto) return -1; + ip_len = ntohs(iph->tot_len); + if (ip_len > len || ip_len < sizeof(*iph)) + return -1; + + len = ip_len; iph_addr_p = &iph->saddr; if (proto == IPPROTO_TCP) return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph)); @@ -669,20 +675,24 @@ static int recv_verify_packet_ipv6(void *nh, int len) { struct ipv6hdr *ip6h = nh; uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + uint16_t payload_len; if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) return -1; - iph_addr_p = &ip6h->saddr; + payload_len = ntohs(ip6h->payload_len); + if (payload_len > len - sizeof(*ip6h)) + return -1; + iph_addr_p = &ip6h->saddr; if (proto == IPPROTO_TCP) - return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h)); + return recv_verify_packet_tcp(ip6h + 1, payload_len); else - return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h)); + return recv_verify_packet_udp(ip6h + 1, payload_len); } /* return whether auxdata includes TP_STATUS_CSUM_VALID */ -static bool recv_verify_packet_csum(struct msghdr *msg) +static uint32_t recv_get_packet_csum_status(struct msghdr *msg) { struct tpacket_auxdata *aux = NULL; struct cmsghdr *cm; @@ -706,7 +716,7 @@ static bool recv_verify_packet_csum(struct msghdr *msg) if (!aux) error(1, 0, "cmsg: no auxdata"); - return aux->tp_status & TP_STATUS_CSUM_VALID; + return aux->tp_status; } static int recv_packet(int fd) @@ -716,6 +726,7 @@ static int recv_packet(int fd) char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))]; struct pkt *buf = (void *)_buf; struct msghdr msg = {0}; + uint32_t tp_status; struct iovec iov; int len, ret; @@ -737,6 +748,17 @@ static int recv_packet(int fd) if (len == -1) error(1, errno, "recv p"); + tp_status = recv_get_packet_csum_status(&msg); + + /* GRO might coalesce randomized packets. Such GSO packets are + * then reinitialized for csum offload (CHECKSUM_PARTIAL), with + * a pseudo csum. Do not try to validate these checksums. + */ + if (tp_status & TP_STATUS_CSUMNOTREADY) { + fprintf(stderr, "cmsg: GSO packet has partial csum: skip\n"); + continue; + } + if (cfg_family == PF_INET6) ret = recv_verify_packet_ipv6(buf, len); else @@ -753,7 +775,7 @@ static int recv_packet(int fd) * Do not fail if kernel does not validate a good csum: * Absence of validation does not imply invalid. */ - if (recv_verify_packet_csum(&msg) && cfg_bad_csum) { + if (tp_status & TP_STATUS_CSUM_VALID && cfg_bad_csum) { fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n"); bad_validations++; } diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h new file mode 100644 index 000000000000..17dc34a612c6 --- /dev/null +++ b/tools/testing/selftests/net/lib/ksft.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(__NET_KSFT_H__) +#define __NET_KSFT_H__ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +static inline void ksft_ready(void) +{ + const char msg[7] = "ready\n"; + char *env_str; + int fd; + + env_str = getenv("KSFT_READY_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n", + env_str); + return; + } + } else { + fd = STDOUT_FILENO; + } + + write(fd, msg, sizeof(msg)); + if (fd != STDOUT_FILENO) + close(fd); +} + +static inline void ksft_wait(void) +{ + char *env_str; + char byte; + int fd; + + env_str = getenv("KSFT_WAIT_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n", + env_str); + return; + } + } else { + /* Not running in KSFT env, wait for input from STDIN instead */ + fd = STDIN_FILENO; + } + + read(fd, &byte, sizeof(byte)); + if (fd != STDIN_FILENO) + close(fd); +} + +#endif diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py new file mode 100644 index 000000000000..8697bd27dc30 --- /dev/null +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +from .consts import KSRC +from .ksft import * +from .netns import NetNS, NetNSEnter +from .nsim import * +from .utils import * +from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily +from .ynl import NetshaperFamily diff --git a/tools/testing/selftests/net/lib/py/consts.py b/tools/testing/selftests/net/lib/py/consts.py new file mode 100644 index 000000000000..f518ce79d82c --- /dev/null +++ b/tools/testing/selftests/net/lib/py/consts.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +import sys +from pathlib import Path + +KSFT_DIR = (Path(__file__).parent / "../../..").resolve() +KSRC = (Path(__file__).parent / "../../../../../..").resolve() + +KSFT_MAIN_NAME = Path(sys.argv[0]).with_suffix("").name diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py new file mode 100644 index 000000000000..61287c203b6e --- /dev/null +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -0,0 +1,280 @@ +# SPDX-License-Identifier: GPL-2.0 + +import builtins +import functools +import inspect +import signal +import sys +import time +import traceback +from .consts import KSFT_MAIN_NAME +from .utils import global_defer_queue + +KSFT_RESULT = None +KSFT_RESULT_ALL = True +KSFT_DISRUPTIVE = True + + +class KsftFailEx(Exception): + pass + + +class KsftSkipEx(Exception): + pass + + +class KsftXfailEx(Exception): + pass + + +class KsftTerminate(KeyboardInterrupt): + pass + + +def ksft_pr(*objs, **kwargs): + print("#", *objs, **kwargs) + + +def _fail(*args): + global KSFT_RESULT + KSFT_RESULT = False + + stack = inspect.stack() + started = False + for frame in reversed(stack[2:]): + # Start printing from the test case function + if not started: + if frame.function == 'ksft_run': + started = True + continue + + ksft_pr("Check| At " + frame.filename + ", line " + str(frame.lineno) + + ", in " + frame.function + ":") + ksft_pr("Check| " + frame.code_context[0].strip()) + ksft_pr(*args) + + +def ksft_eq(a, b, comment=""): + global KSFT_RESULT + if a != b: + _fail("Check failed", a, "!=", b, comment) + + +def ksft_ne(a, b, comment=""): + global KSFT_RESULT + if a == b: + _fail("Check failed", a, "==", b, comment) + + +def ksft_true(a, comment=""): + if not a: + _fail("Check failed", a, "does not eval to True", comment) + + +def ksft_in(a, b, comment=""): + if a not in b: + _fail("Check failed", a, "not in", b, comment) + + +def ksft_not_in(a, b, comment=""): + if a in b: + _fail("Check failed", a, "in", b, comment) + + +def ksft_is(a, b, comment=""): + if a is not b: + _fail("Check failed", a, "is not", b, comment) + + +def ksft_ge(a, b, comment=""): + if a < b: + _fail("Check failed", a, "<", b, comment) + + +def ksft_lt(a, b, comment=""): + if a >= b: + _fail("Check failed", a, ">=", b, comment) + + +class ksft_raises: + def __init__(self, expected_type): + self.exception = None + self.expected_type = expected_type + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type is None: + _fail(f"Expected exception {str(self.expected_type.__name__)}, none raised") + elif self.expected_type != exc_type: + _fail(f"Expected exception {str(self.expected_type.__name__)}, raised {str(exc_type.__name__)}") + self.exception = exc_val + # Suppress the exception if its the expected one + return self.expected_type == exc_type + + +def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""): + end = time.monotonic() + deadline + while True: + if cond(): + return + if time.monotonic() > end: + _fail("Waiting for condition timed out", comment) + return + time.sleep(sleep) + + +def ktap_result(ok, cnt=1, case="", comment=""): + global KSFT_RESULT_ALL + KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok + + res = "" + if not ok: + res += "not " + res += "ok " + res += str(cnt) + " " + res += KSFT_MAIN_NAME + if case: + res += "." + str(case.__name__) + if comment: + res += " # " + comment + print(res) + + +def ksft_flush_defer(): + global KSFT_RESULT + + i = 0 + qlen_start = len(global_defer_queue) + while global_defer_queue: + i += 1 + entry = global_defer_queue.pop() + try: + entry.exec_only() + except: + ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!") + tb = traceback.format_exc() + for line in tb.strip().split('\n'): + ksft_pr("Defer Exception|", line) + KSFT_RESULT = False + + +def ksft_disruptive(func): + """ + Decorator that marks the test as disruptive (e.g. the test + that can down the interface). Disruptive tests can be skipped + by passing DISRUPTIVE=False environment variable. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not KSFT_DISRUPTIVE: + raise KsftSkipEx(f"marked as disruptive") + return func(*args, **kwargs) + return wrapper + + +def ksft_setup(env): + """ + Setup test framework global state from the environment. + """ + + def get_bool(env, name): + value = env.get(name, "").lower() + if value in ["yes", "true"]: + return True + if value in ["no", "false"]: + return False + try: + return bool(int(value)) + except: + raise Exception(f"failed to parse {name}") + + if "DISRUPTIVE" in env: + global KSFT_DISRUPTIVE + KSFT_DISRUPTIVE = get_bool(env, "DISRUPTIVE") + + return env + + +def _ksft_intr(signum, frame): + # ksft runner.sh sends 2 SIGTERMs in a row on a timeout + # if we don't ignore the second one it will stop us from handling cleanup + global term_cnt + term_cnt += 1 + if term_cnt == 1: + raise KsftTerminate() + else: + ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...") + + +def ksft_run(cases=None, globs=None, case_pfx=None, args=()): + cases = cases or [] + + if globs and case_pfx: + for key, value in globs.items(): + if not callable(value): + continue + for prefix in case_pfx: + if key.startswith(prefix): + cases.append(value) + break + + global term_cnt + term_cnt = 0 + prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr) + + totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} + + print("TAP version 13") + print("1.." + str(len(cases))) + + global KSFT_RESULT + cnt = 0 + stop = False + for case in cases: + KSFT_RESULT = True + cnt += 1 + comment = "" + cnt_key = "" + + try: + case(*args) + except KsftSkipEx as e: + comment = "SKIP " + str(e) + cnt_key = 'skip' + except KsftXfailEx as e: + comment = "XFAIL " + str(e) + cnt_key = 'xfail' + except BaseException as e: + stop |= isinstance(e, KeyboardInterrupt) + tb = traceback.format_exc() + for line in tb.strip().split('\n'): + ksft_pr("Exception|", line) + if stop: + ksft_pr(f"Stopping tests due to {type(e).__name__}.") + KSFT_RESULT = False + cnt_key = 'fail' + + ksft_flush_defer() + + if not cnt_key: + cnt_key = 'pass' if KSFT_RESULT else 'fail' + + ktap_result(KSFT_RESULT, cnt, case, comment=comment) + totals[cnt_key] += 1 + + if stop: + break + + signal.signal(signal.SIGTERM, prev_sigterm) + + print( + f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" + ) + + +def ksft_exit(): + global KSFT_RESULT_ALL + sys.exit(0 if KSFT_RESULT_ALL else 1) diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py new file mode 100644 index 000000000000..8e9317044eef --- /dev/null +++ b/tools/testing/selftests/net/lib/py/netns.py @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: GPL-2.0 + +from .utils import ip +import ctypes +import random +import string + +libc = ctypes.cdll.LoadLibrary('libc.so.6') + + +class NetNS: + def __init__(self, name=None): + if name: + self.name = name + else: + self.name = ''.join(random.choice(string.ascii_lowercase) for _ in range(8)) + ip('netns add ' + self.name) + + def __del__(self): + if self.name: + ip('netns del ' + self.name) + self.name = None + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + self.__del__() + + def __str__(self): + return self.name + + def __repr__(self): + return f"NetNS({self.name})" + + +class NetNSEnter: + def __init__(self, ns_name): + self.ns_path = f"/run/netns/{ns_name}" + + def __enter__(self): + self.saved = open("/proc/thread-self/ns/net") + with open(self.ns_path) as ns_file: + libc.setns(ns_file.fileno(), 0) + return self + + def __exit__(self, exc_type, exc_value, traceback): + libc.setns(self.saved.fileno(), 0) + self.saved.close() diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py new file mode 100644 index 000000000000..1a8cbe9acc48 --- /dev/null +++ b/tools/testing/selftests/net/lib/py/nsim.py @@ -0,0 +1,135 @@ +# SPDX-License-Identifier: GPL-2.0 + +import errno +import json +import os +import random +import re +import time +from .utils import cmd, ip + + +class NetdevSim: + """ + Class for netdevsim netdevice and its attributes. + """ + + def __init__(self, nsimdev, port_index, ifname, ns=None): + # In case udev renamed the netdev to according to new schema, + # check if the name matches the port_index. + nsimnamere = re.compile(r"eni\d+np(\d+)") + match = nsimnamere.match(ifname) + if match and int(match.groups()[0]) != port_index + 1: + raise Exception("netdevice name mismatches the expected one") + + self.ifname = ifname + self.nsimdev = nsimdev + self.port_index = port_index + self.ns = ns + self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index) + ret = ip("-j link show dev %s" % ifname, ns=ns) + self.dev = json.loads(ret.stdout)[0] + self.ifindex = self.dev["ifindex"] + + def dfs_write(self, path, val): + self.nsimdev.dfs_write(f'ports/{self.port_index}/' + path, val) + + +class NetdevSimDev: + """ + Class for netdevsim bus device and its attributes. + """ + @staticmethod + def ctrl_write(path, val): + fullpath = os.path.join("/sys/bus/netdevsim/", path) + with open(fullpath, "w") as f: + f.write(val) + + def dfs_write(self, path, val): + fullpath = os.path.join(f"/sys/kernel/debug/netdevsim/netdevsim{self.addr}/", path) + with open(fullpath, "w") as f: + f.write(val) + + def __init__(self, port_count=1, queue_count=1, ns=None): + # nsim will spawn in init_net, we'll set to actual ns once we switch it there + self.ns = None + + if not os.path.exists("/sys/bus/netdevsim"): + cmd("modprobe netdevsim") + + addr = random.randrange(1 << 15) + while True: + try: + self.ctrl_write("new_device", "%u %u %u" % (addr, port_count, queue_count)) + except OSError as e: + if e.errno == errno.ENOSPC: + addr = random.randrange(1 << 15) + continue + raise e + break + self.addr = addr + + # As probe of netdevsim device might happen from a workqueue, + # so wait here until all netdevs appear. + self.wait_for_netdevs(port_count) + + if ns: + cmd(f"devlink dev reload netdevsim/netdevsim{addr} netns {ns.name}") + self.ns = ns + + cmd("udevadm settle", ns=self.ns) + ifnames = self.get_ifnames() + + self.dfs_dir = "/sys/kernel/debug/netdevsim/netdevsim%u/" % addr + + self.nsims = [] + for port_index in range(port_count): + self.nsims.append(self._make_port(port_index, ifnames[port_index])) + + self.removed = False + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + """ + __exit__ gets called at the end of a "with" block. + """ + self.remove() + + def _make_port(self, port_index, ifname): + return NetdevSim(self, port_index, ifname, self.ns) + + def get_ifnames(self): + ifnames = [] + listdir = cmd(f"ls /sys/bus/netdevsim/devices/netdevsim{self.addr}/net/", + ns=self.ns).stdout.split() + for ifname in listdir: + ifnames.append(ifname) + ifnames.sort() + return ifnames + + def wait_for_netdevs(self, port_count): + timeout = 5 + timeout_start = time.time() + + while True: + try: + ifnames = self.get_ifnames() + except FileNotFoundError as e: + ifnames = [] + if len(ifnames) == port_count: + break + if time.time() < timeout_start + timeout: + continue + raise Exception("netdevices did not appear within timeout") + + def remove(self): + if not self.removed: + self.ctrl_write("del_device", "%u" % (self.addr, )) + self.removed = True + + def remove_nsim(self, nsim): + self.nsims.remove(nsim) + self.ctrl_write("devices/netdevsim%u/del_port" % (self.addr, ), + "%u" % (nsim.port_index, )) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py new file mode 100644 index 000000000000..34470d65d871 --- /dev/null +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -0,0 +1,212 @@ +# SPDX-License-Identifier: GPL-2.0 + +import errno +import json as _json +import os +import random +import re +import select +import socket +import subprocess +import time + + +class CmdExitFailure(Exception): + def __init__(self, msg, cmd_obj): + super().__init__(msg) + self.cmd = cmd_obj + + +def fd_read_timeout(fd, timeout): + rlist, _, _ = select.select([fd], [], [], timeout) + if rlist: + return os.read(fd, 1024) + else: + raise TimeoutError("Timeout waiting for fd read") + + +class cmd: + """ + Execute a command on local or remote host. + + Use bkg() instead to run a command in the background. + """ + def __init__(self, comm, shell=True, fail=True, ns=None, background=False, + host=None, timeout=5, ksft_wait=None): + if ns: + comm = f'ip netns exec {ns} ' + comm + + self.stdout = None + self.stderr = None + self.ret = None + self.ksft_term_fd = None + + self.comm = comm + if host: + self.proc = host.cmd(comm) + else: + # ksft_wait lets us wait for the background process to fully start, + # we pass an FD to the child process, and wait for it to write back. + # Similarly term_fd tells child it's time to exit. + pass_fds = () + env = os.environ.copy() + if ksft_wait is not None: + rfd, ready_fd = os.pipe() + wait_fd, self.ksft_term_fd = os.pipe() + pass_fds = (ready_fd, wait_fd, ) + env["KSFT_READY_FD"] = str(ready_fd) + env["KSFT_WAIT_FD"] = str(wait_fd) + + self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, pass_fds=pass_fds, + env=env) + if ksft_wait is not None: + os.close(ready_fd) + os.close(wait_fd) + msg = fd_read_timeout(rfd, ksft_wait) + os.close(rfd) + if not msg: + raise Exception("Did not receive ready message") + if not background: + self.process(terminate=False, fail=fail, timeout=timeout) + + def process(self, terminate=True, fail=None, timeout=5): + if fail is None: + fail = not terminate + + if self.ksft_term_fd: + os.write(self.ksft_term_fd, b"1") + if terminate: + self.proc.terminate() + stdout, stderr = self.proc.communicate(timeout) + self.stdout = stdout.decode("utf-8") + self.stderr = stderr.decode("utf-8") + self.proc.stdout.close() + self.proc.stderr.close() + self.ret = self.proc.returncode + + if self.proc.returncode != 0 and fail: + if len(stderr) > 0 and stderr[-1] == "\n": + stderr = stderr[:-1] + raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % + (self.proc.args, stdout, stderr), self) + + +class bkg(cmd): + """ + Run a command in the background. + + Examples usage: + + Run a command on remote host, and wait for it to finish. + This is usually paired with wait_port_listen() to make sure + the command has initialized: + + with bkg("socat ...", exit_wait=True, host=cfg.remote) as nc: + ... + + Run a command and expect it to let us know that it's ready + by writing to a special file descriptor passed via KSFT_READY_FD. + Command will be terminated when we exit the context manager: + + with bkg("my_binary", ksft_wait=5): + """ + def __init__(self, comm, shell=True, fail=None, ns=None, host=None, + exit_wait=False, ksft_wait=None): + super().__init__(comm, background=True, + shell=shell, fail=fail, ns=ns, host=host, + ksft_wait=ksft_wait) + self.terminate = not exit_wait and not ksft_wait + self.check_fail = fail + + if shell and self.terminate: + print("# Warning: combining shell and terminate is risky!") + print("# SIGTERM may not reach the child on zsh/ksh!") + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + return self.process(terminate=self.terminate, fail=self.check_fail) + + +global_defer_queue = [] + + +class defer: + def __init__(self, func, *args, **kwargs): + global global_defer_queue + + if not callable(func): + raise Exception("defer created with un-callable object, did you call the function instead of passing its name?") + + self.func = func + self.args = args + self.kwargs = kwargs + + self._queue = global_defer_queue + self._queue.append(self) + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + return self.exec() + + def exec_only(self): + self.func(*self.args, **self.kwargs) + + def cancel(self): + self._queue.remove(self) + + def exec(self): + self.cancel() + self.exec_only() + + +def tool(name, args, json=None, ns=None, host=None): + cmd_str = name + ' ' + if json: + cmd_str += '--json ' + cmd_str += args + cmd_obj = cmd(cmd_str, ns=ns, host=host) + if json: + return _json.loads(cmd_obj.stdout) + return cmd_obj + + +def ip(args, json=None, ns=None, host=None): + if ns: + args = f'-netns {ns} ' + args + return tool('ip', args, json=json, host=host) + + +def ethtool(args, json=None, ns=None, host=None): + return tool('ethtool', args, json=json, ns=ns, host=host) + + +def rand_port(type=socket.SOCK_STREAM): + """ + Get a random unprivileged port. + """ + with socket.socket(socket.AF_INET6, type) as s: + s.bind(("", 0)) + return s.getsockname()[1] + + +def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5): + end = time.monotonic() + deadline + + pattern = f":{port:04X} .* " + if proto == "tcp": # for tcp protocol additionally check the socket state + pattern += "0A" + pattern = re.compile(pattern) + + while True: + data = cmd(f'cat /proc/net/{proto}*', ns=ns, host=host, shell=True).stdout + for row in data.split("\n"): + if pattern.search(row): + return + if time.monotonic() > end: + raise Exception("Waiting for port listen timed out") + time.sleep(sleep) diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py new file mode 100644 index 000000000000..6329ae805abf --- /dev/null +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: GPL-2.0 + +import sys +from pathlib import Path +from .consts import KSRC, KSFT_DIR +from .ksft import ksft_pr, ktap_result + +# Resolve paths +try: + if (KSFT_DIR / "kselftest-list.txt").exists(): + # Running in "installed" selftests + tools_full_path = KSFT_DIR + SPEC_PATH = KSFT_DIR / "net/lib/specs" + + sys.path.append(tools_full_path.as_posix()) + from net.lib.ynl.pyynl.lib import YnlFamily, NlError + else: + # Running in tree + tools_full_path = KSRC / "tools" + SPEC_PATH = KSRC / "Documentation/netlink/specs" + + sys.path.append(tools_full_path.as_posix()) + from net.ynl.pyynl.lib import YnlFamily, NlError +except ModuleNotFoundError as e: + ksft_pr("Failed importing `ynl` library from kernel sources") + ksft_pr(str(e)) + ktap_result(True, comment="SKIP") + sys.exit(4) + +# +# Wrapper classes, loading the right specs +# Set schema='' to avoid jsonschema validation, it's slow +# +class EthtoolFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(), + schema='', recv_size=recv_size) + + +class RtnlFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(), + schema='', recv_size=recv_size) + +class RtnlAddrFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(), + schema='', recv_size=recv_size) + +class NetdevFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(), + schema='', recv_size=recv_size) + +class NetshaperFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), + schema='', recv_size=recv_size) diff --git a/tools/testing/selftests/net/lib/sh/defer.sh b/tools/testing/selftests/net/lib/sh/defer.sh new file mode 100644 index 000000000000..082f5d38321b --- /dev/null +++ b/tools/testing/selftests/net/lib/sh/defer.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# map[(scope_id,track,cleanup_id) -> cleanup_command] +# track={d=default | p=priority} +declare -A __DEFER__JOBS + +# map[(scope_id,track) -> # cleanup_commands] +declare -A __DEFER__NJOBS + +# scope_id of the topmost scope. +__DEFER__SCOPE_ID=0 + +__defer__ndefer_key() +{ + local track=$1; shift + + echo $__DEFER__SCOPE_ID,$track +} + +__defer__defer_key() +{ + local track=$1; shift + local defer_ix=$1; shift + + echo $__DEFER__SCOPE_ID,$track,$defer_ix +} + +__defer__ndefers() +{ + local track=$1; shift + + echo ${__DEFER__NJOBS[$(__defer__ndefer_key $track)]} +} + +__defer__run() +{ + local track=$1; shift + local defer_ix=$1; shift + local defer_key=$(__defer__defer_key $track $defer_ix) + + ${__DEFER__JOBS[$defer_key]} + unset __DEFER__JOBS[$defer_key] +} + +__defer__schedule() +{ + local track=$1; shift + local ndefers=$(__defer__ndefers $track) + local ndefers_key=$(__defer__ndefer_key $track) + local defer_key=$(__defer__defer_key $track $ndefers) + local defer="$@" + + __DEFER__JOBS[$defer_key]="$defer" + __DEFER__NJOBS[$ndefers_key]=$((ndefers + 1)) +} + +__defer__scope_wipe() +{ + __DEFER__NJOBS[$(__defer__ndefer_key d)]=0 + __DEFER__NJOBS[$(__defer__ndefer_key p)]=0 +} + +defer_scope_push() +{ + ((__DEFER__SCOPE_ID++)) + __defer__scope_wipe +} + +defer_scope_pop() +{ + local defer_ix + + for ((defer_ix=$(__defer__ndefers p); defer_ix-->0; )); do + __defer__run p $defer_ix + done + + for ((defer_ix=$(__defer__ndefers d); defer_ix-->0; )); do + __defer__run d $defer_ix + done + + __defer__scope_wipe + ((__DEFER__SCOPE_ID--)) +} + +defer() +{ + __defer__schedule d "$@" +} + +defer_prio() +{ + __defer__schedule p "$@" +} + +defer_scopes_cleanup() +{ + while ((__DEFER__SCOPE_ID >= 0)); do + defer_scope_pop + done +} + +in_defer_scope() +{ + local ret + + defer_scope_push + "$@" + ret=$? + defer_scope_pop + + return $ret +} + +__defer__scope_wipe diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c index d988b2e0cee8..e73fab3edd9f 100644 --- a/tools/testing/selftests/net/xdp_dummy.c +++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c @@ -10,4 +10,10 @@ int xdp_dummy_prog(struct xdp_md *ctx) return XDP_PASS; } +SEC("xdp.frags") +int xdp_dummy_prog_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/lib/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c new file mode 100644 index 000000000000..eb025a9f35b1 --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_helper.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <linux/if_xdp.h> +#include <linux/if_link.h> +#include <net/if.h> +#include <inttypes.h> + +#include "ksft.h" + +#define UMEM_SZ (1U << 16) +#define NUM_DESC (UMEM_SZ / 2048) + + +static void print_usage(const char *bin) +{ + fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n" + "where:\n\t-z: force zerocopy mode", bin); +} + +/* this is a simple helper program that creates an XDP socket and does the + * minimum necessary to get bind() to succeed. + * + * this test program is not intended to actually process packets, but could be + * extended in the future if that is actually needed. + * + * it is used by queues.py to ensure the xsk netlinux attribute is set + * correctly. + */ +int main(int argc, char **argv) +{ + struct xdp_umem_reg umem_reg = { 0 }; + struct sockaddr_xdp sxdp = { 0 }; + int num_desc = NUM_DESC; + void *umem_area; + int retry = 0; + int ifindex; + int sock_fd; + int queue; + + if (argc != 3 && argc != 4) { + print_usage(argv[0]); + return 1; + } + + sock_fd = socket(AF_XDP, SOCK_RAW, 0); + if (sock_fd < 0) { + perror("socket creation failed"); + /* if the kernel doesn't support AF_XDP, let the test program + * know with -1. All other error paths return 1. + */ + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + /* "Probing mode", just checking if AF_XDP sockets are supported */ + if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-")) { + printf("AF_XDP support detected\n"); + close(sock_fd); + return 0; + } + + ifindex = atoi(argv[1]); + queue = atoi(argv[2]); + + umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (umem_area == MAP_FAILED) { + perror("mmap failed"); + return 1; + } + + umem_reg.addr = (uintptr_t)umem_area; + umem_reg.len = UMEM_SZ; + umem_reg.chunk_size = 2048; + umem_reg.headroom = 0; + + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg, + sizeof(umem_reg)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc)); + + sxdp.sxdp_family = AF_XDP; + sxdp.sxdp_ifindex = ifindex; + sxdp.sxdp_queue_id = queue; + sxdp.sxdp_flags = 0; + + if (argc > 3) { + if (!strcmp(argv[3], "-z")) { + sxdp.sxdp_flags = XDP_ZEROCOPY; + } else { + print_usage(argv[0]); + return 1; + } + } + + while (1) { + if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0) + break; + + if (errno == EBUSY && retry < 3) { + retry++; + sleep(1); + continue; + } else { + perror("bind failed"); + munmap(umem_area, UMEM_SZ); + close(sock_fd); + return 1; + } + } + + ksft_ready(); + ksft_wait(); + + /* parent program will write a byte to stdin when its ready for this + * helper to exit + */ + + close(sock_fd); + return 0; +} diff --git a/tools/testing/selftests/net/link_netns.py b/tools/testing/selftests/net/link_netns.py new file mode 100755 index 000000000000..aab043c59d69 --- /dev/null +++ b/tools/testing/selftests/net/link_netns.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import time + +from lib.py import ksft_run, ksft_exit, ksft_true +from lib.py import ip +from lib.py import NetNS, NetNSEnter +from lib.py import RtnlFamily + + +LINK_NETNSID = 100 + + +def test_event() -> None: + with NetNS() as ns1, NetNS() as ns2: + with NetNSEnter(str(ns2)): + rtnl = RtnlFamily() + + rtnl.ntf_subscribe("rtnlgrp-link") + + ip(f"netns set {ns2} {LINK_NETNSID}", ns=str(ns1)) + ip(f"link add netns {ns1} link-netnsid {LINK_NETNSID} dummy1 type dummy") + ip(f"link add netns {ns1} dummy2 type dummy", ns=str(ns2)) + + ip("link del dummy1", ns=str(ns1)) + ip("link del dummy2", ns=str(ns1)) + + time.sleep(1) + rtnl.check_ntf() + ksft_true(rtnl.async_msg_queue.empty(), + "Received unexpected link notification") + + +def validate_link_netns(netns, ifname, link_netnsid) -> bool: + link_info = ip(f"-d link show dev {ifname}", ns=netns, json=True) + if not link_info: + return False + return link_info[0].get("link_netnsid") == link_netnsid + + +def test_link_net() -> None: + configs = [ + # type, common args, type args, fallback to dev_net + ("ipvlan", "link dummy1", "", False), + ("macsec", "link dummy1", "", False), + ("macvlan", "link dummy1", "", False), + ("macvtap", "link dummy1", "", False), + ("vlan", "link dummy1", "id 100", False), + ("gre", "", "local 192.0.2.1", True), + ("vti", "", "local 192.0.2.1", True), + ("ipip", "", "local 192.0.2.1", True), + ("ip6gre", "", "local 2001:db8::1", True), + ("ip6tnl", "", "local 2001:db8::1", True), + ("vti6", "", "local 2001:db8::1", True), + ("sit", "", "local 192.0.2.1", True), + ("xfrm", "", "if_id 1", True), + ] + + with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3: + net1, net2, net3 = str(ns1), str(ns2), str(ns3) + + # prepare link netnsid and a dummy link needed by certain drivers + ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2)) + ip("link add dummy1 type dummy", ns=net3) + + cases = [ + # source, "netns", "link-netns", expected link-netns + (net3, None, None, None, None), + (net3, net2, None, None, LINK_NETNSID), + (net2, None, net3, LINK_NETNSID, LINK_NETNSID), + (net1, net2, net3, LINK_NETNSID, LINK_NETNSID), + ] + + for src_net, netns, link_netns, exp1, exp2 in cases: + tgt_net = netns or src_net + for typ, cargs, targs, fb_dev_net in configs: + cmd = "link add" + if netns: + cmd += f" netns {netns}" + if link_netns: + cmd += f" link-netns {link_netns}" + cmd += f" {cargs} foo type {typ} {targs}" + ip(cmd, ns=src_net) + if fb_dev_net: + ksft_true(validate_link_netns(tgt_net, "foo", exp1), + f"{typ} link_netns validation failed") + else: + ksft_true(validate_link_netns(tgt_net, "foo", exp2), + f"{typ} link_netns validation failed") + ip(f"link del foo", ns=tgt_net) + + +def test_peer_net() -> None: + types = [ + "vxcan", + "netkit", + "veth", + ] + + with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3, NetNS() as ns4: + net1, net2, net3, net4 = str(ns1), str(ns2), str(ns3), str(ns4) + + ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2)) + + cases = [ + # source, "netns", "link-netns", "peer netns", expected + (net1, None, None, None, None), + (net1, net2, None, None, None), + (net2, None, net3, None, LINK_NETNSID), + (net1, net2, net3, None, None), + (net2, None, None, net3, LINK_NETNSID), + (net1, net2, None, net3, LINK_NETNSID), + (net2, None, net2, net3, LINK_NETNSID), + (net1, net2, net4, net3, LINK_NETNSID), + ] + + for src_net, netns, link_netns, peer_netns, exp in cases: + tgt_net = netns or src_net + for typ in types: + cmd = "link add" + if netns: + cmd += f" netns {netns}" + if link_netns: + cmd += f" link-netns {link_netns}" + cmd += f" foo type {typ}" + if peer_netns: + cmd += f" peer netns {peer_netns}" + ip(cmd, ns=src_net) + ksft_true(validate_link_netns(tgt_net, "foo", exp), + f"{typ} peer_netns validation failed") + ip(f"link del foo", ns=tgt_net) + + +def main() -> None: + ksft_run([test_event, test_link_net, test_peer_net]) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh new file mode 100755 index 000000000000..881eb399798f --- /dev/null +++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh @@ -0,0 +1,246 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Author: Justin Iurman <justin.iurman@uliege.be> +# +# WARNING +# ------- +# This is just a dummy script that triggers encap cases with possible dst cache +# reference loops in affected lwt users (see list below). Some cases are +# pathological configurations for simplicity, others are valid. Overall, we +# don't want this issue to happen, no matter what. In order to catch any +# reference loops, kmemleak MUST be used. The results alone are always blindly +# successful, don't rely on them. Note that the following tests may crash the +# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is +# not present. +# +# Affected lwt users so far (please update accordingly if needed): +# - ila_lwt (output only) +# - ioam6_iptunnel (output only) +# - rpl_iptunnel (both input and output) +# - seg6_iptunnel (both input and output) + +source lib.sh + +check_compatibility() +{ + setup_ns tmp_node &>/dev/null + if [ $? != 0 ]; then + echo "SKIP: Cannot create netns." + exit $ksft_skip + fi + + ip link add name veth0 netns $tmp_node type veth \ + peer name veth1 netns $tmp_node &>/dev/null + local ret=$? + + ip -netns $tmp_node link set veth0 up &>/dev/null + ret=$((ret + $?)) + + ip -netns $tmp_node link set veth1 up &>/dev/null + ret=$((ret + $?)) + + if [ $ret != 0 ]; then + echo "SKIP: Cannot configure links." + cleanup_ns $tmp_node + exit $ksft_skip + fi + + lsmod 2>/dev/null | grep -q "ila" + ila_lsmod=$? + [ $ila_lsmod != 0 ] && modprobe ila &>/dev/null + + ip -netns $tmp_node route add 2001:db8:1::/64 \ + encap ila 1:2:3:4 csum-mode no-action ident-type luid \ + hook-type output \ + dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 0 size 4 \ + dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:3::/64 \ + encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:4::/64 \ + encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila" + skip_ila=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6" + skip_ioam6=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl" + skip_rpl=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6" + skip_seg6=$? + + cleanup_ns $tmp_node +} + +setup() +{ + setup_ns alpha beta gamma &>/dev/null + + ip link add name veth-alpha netns $alpha type veth \ + peer name veth-betaL netns $beta &>/dev/null + + ip link add name veth-betaR netns $beta type veth \ + peer name veth-gamma netns $gamma &>/dev/null + + ip -netns $alpha link set veth-alpha name veth0 &>/dev/null + ip -netns $beta link set veth-betaL name veth0 &>/dev/null + ip -netns $beta link set veth-betaR name veth1 &>/dev/null + ip -netns $gamma link set veth-gamma name veth0 &>/dev/null + + ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null + ip -netns $alpha link set veth0 up &>/dev/null + ip -netns $alpha link set lo up &>/dev/null + ip -netns $alpha route add 2001:db8:2::/64 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null + ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null + ip -netns $beta link set veth0 up &>/dev/null + ip -netns $beta link set veth1 up &>/dev/null + ip -netns $beta link set lo up &>/dev/null + ip -netns $beta route del 2001:db8:2::/64 + ip -netns $beta route add 2001:db8:2::/64 dev veth1 + ip netns exec $beta \ + sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null + + ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null + ip -netns $gamma link set veth0 up &>/dev/null + ip -netns $gamma link set lo up &>/dev/null + ip -netns $gamma route add 2001:db8:1::/64 \ + via 2001:db8:2::1 dev veth0 &>/dev/null + + sleep 1 + + ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null + if [ $? != 0 ]; then + echo "SKIP: Setup failed." + exit $ksft_skip + fi + + sleep 1 +} + +cleanup() +{ + cleanup_ns $alpha $beta $gamma + [ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null +} + +run_ila() +{ + if [ $skip_ila != 0 ]; then + echo "SKIP: ila (output)" + return + fi + + ip -netns $beta route del 2001:db8:2::/64 + ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \ + encap ila 2001:db8:2:0 csum-mode no-action ident-type luid \ + hook-type output \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: ila (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128 + ip -netns $beta route add 2001:db8:2::/64 dev veth1 + sleep 1 +} + +run_ioam6() +{ + if [ $skip_ioam6 != 0 ]; then + echo "SKIP: ioam6 (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: ioam6 (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run_rpl() +{ + if [ $skip_rpl != 0 ]; then + echo "SKIP: rpl (input)" + echo "SKIP: rpl (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap rpl segs 2001:db8:2::2 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: rpl (input)" + ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + echo "TEST: rpl (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run_seg6() +{ + if [ $skip_seg6 != 0 ]; then + echo "SKIP: seg6 (input)" + echo "SKIP: seg6 (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap seg6 mode inline segs 2001:db8:2::2 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: seg6 (input)" + ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + echo "TEST: seg6 (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run() +{ + run_ila + run_ioam6 + run_rpl + run_seg6 +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges." + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool." + exit $ksft_skip +fi + +check_compatibility + +trap cleanup EXIT + +setup +run + +exit $ksft_pass diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index 49daae73c41e..833279fb34e2 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only mptcp_connect +mptcp_diag mptcp_inq mptcp_sockopt pm_nl_ctl diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 7b936a926859..e47788bfa671 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -2,15 +2,17 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh -TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq +TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag TEST_FILES := mptcp_lib.sh settings +TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) + EXTRA_CLEAN := *.pcap include ../../lib.mk diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index bc97ab33a00e..7a3cb4c09e45 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -200,6 +200,114 @@ chk_msk_cestab() "${expected}" "${msg}" "" } +chk_dump_one() +{ + local ss_token + local token + local msg + + ss_token="$(ss -inmHMN $ns | + mptcp_lib_get_info_value "token" "token")" + + token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\ + awk -F':[ \t]+' '/^token/ {print $2}')" + + msg="....chk dump_one" + + mptcp_lib_print_title "$msg" + if [ -n "$ss_token" ] && [ "$ss_token" = "$token" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "expected $ss_token found $token" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + +chk_dump_subflow() +{ + local inet_diag_token + local subflow_line + local ss_output + local ss_token + local msg + + ss_output=$(ss -tniN $ns) + + subflow_line=$(echo "$ss_output" | \ + grep -m1 -Eo '[0-9.]+:[0-9].+ +[0-9.]+:[0-9.]+') + + ss_token=$(echo "$ss_output" | grep -m1 -Eo 'token:[^ ]+') + + inet_diag_token=$(ip netns exec $ns ./mptcp_diag -s "$subflow_line" | \ + grep -Eo 'token:[^ ]+') + + msg="....chk dump_subflow" + + mptcp_lib_print_title "$msg" + if [ -n "$ss_token" ] && [ "$ss_token" = "$inet_diag_token" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "expected $ss_token found $inet_diag_token" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + +msk_info_get_value() +{ + local port="${1}" + local info="${2}" + + ss -N "${ns}" -inHM dport "${port}" | \ + mptcp_lib_get_info_value "${info}" "${info}" +} + +chk_msk_info() +{ + local port="${1}" + local info="${2}" + local cnt="${3}" + local msg="....chk ${info}" + local delta_ms=250 # half what we waited before, just to be sure + local now + + now=$(msk_info_get_value "${port}" "${info}") + + mptcp_lib_print_title "${msg}" + if { [ -z "${cnt}" ] || [ -z "${now}" ]; } && + ! mptcp_lib_expect_all_features; then + mptcp_lib_pr_skip "Feature probably not supported" + mptcp_lib_result_skip "${msg}" + elif [ "$((cnt + delta_ms))" -lt "${now}" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "value of ${info} changed by $((now - cnt))ms," \ + "expected at least ${delta_ms}ms" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + +chk_last_time_info() +{ + local port="${1}" + local data_sent data_recv ack_recv + + data_sent=$(msk_info_get_value "${port}" "last_data_sent") + data_recv=$(msk_info_get_value "${port}" "last_data_recv") + ack_recv=$(msk_info_get_value "${port}" "last_ack_recv") + + sleep 0.5 # wait to check after if the timestamps difference + + chk_msk_info "${port}" "last_data_sent" "${data_sent}" + chk_msk_info "${port}" "last_data_recv" "${data_recv}" + chk_msk_info "${port}" "last_ack_recv" "${ack_recv}" +} + wait_connected() { local listener_ns="${1}" @@ -232,11 +340,14 @@ echo "b" | \ ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10000 -chk_msk_nr 2 "after MPC handshake " +chk_msk_nr 2 "after MPC handshake" +chk_last_time_info 10000 chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" chk_msk_inuse 2 chk_msk_cestab 2 +chk_dump_one +chk_dump_subflow flush_pids chk_msk_inuse 0 "2->0" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index d2043ec3bf6d..ac1349c4b9e5 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -25,6 +25,8 @@ #include <sys/types.h> #include <sys/mman.h> +#include <arpa/inet.h> + #include <netdb.h> #include <netinet/in.h> @@ -178,13 +180,26 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen, } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { +again: int err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + /* glibc starts to support MPTCP since v2.42. + * For older versions, use IPPROTO_TCP to resolve, + * and use TCP/MPTCP to create socket. + * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82 + */ + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -290,7 +305,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -354,7 +369,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, int infd, struct wstate *winfo) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; @@ -1115,11 +1130,11 @@ again: return 1; } - if (--cfg_repeat > 0) { - if (cfg_input) - close(fd); + if (cfg_input) + close(fd); + + if (--cfg_repeat > 0) goto again; - } return 0; } @@ -1211,23 +1226,42 @@ static void parse_setsock_options(const char *name) exit(1); } -void xdisconnect(int fd, int addrlen) +void xdisconnect(int fd) { - struct sockaddr_storage empty; + socklen_t addrlen = sizeof(struct sockaddr_storage); + struct sockaddr_storage addr, empty; int msec_sleep = 10; - int queued = 1; - int i; + void *raw_addr; + int i, cmdlen; + char cmd[128]; + + /* get the local address and convert it to string */ + if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0) + xerror("getsockname"); + + if (addr.ss_family == AF_INET) + raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr); + else if (addr.ss_family == AF_INET6) + raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr); + else + xerror("bad family"); + + strcpy(cmd, "ss -M | grep -q "); + cmdlen = strlen(cmd); + if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen], + sizeof(cmd) - cmdlen)) + xerror("inet_ntop"); shutdown(fd, SHUT_WR); - /* while until the pending data is completely flushed, the later + /* + * wait until the pending data is completely flushed and all + * the MPTCP sockets reached the closed status. * disconnect will bypass/ignore/drop any pending data. */ for (i = 0; ; i += msec_sleep) { - if (ioctl(fd, SIOCOUTQ, &queued) < 0) - xerror("can't query out socket queue: %d", errno); - - if (!queued) + /* closed socket are not listed by 'ss' */ + if (system(cmd) != 0) break; if (i > poll_timeout) @@ -1249,7 +1283,7 @@ int main_loop(void) if (cfg_input && cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); - if (fd < 0) + if (fd_in < 0) xerror("can't open %s:%d", cfg_input, errno); } @@ -1272,18 +1306,18 @@ again: if (cfg_input && !cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); - if (fd < 0) + if (fd_in < 0) xerror("can't open %s:%d", cfg_input, errno); } ret = copyfd_io(fd_in, fd, 1, 0, &winfo); if (ret) - return ret; + goto out; if (cfg_truncate > 0) { - xdisconnect(fd, peer->ai_addrlen); + shutdown(fd, SHUT_WR); } else if (--cfg_repeat > 0) { - xdisconnect(fd, peer->ai_addrlen); + xdisconnect(fd); /* the socket could be unblocking at this point, we need the * connect to be blocking @@ -1299,7 +1333,10 @@ again: close(fd); } - return 0; +out: + if (cfg_input) + close(fd_in); + return ret; } int parse_proto(const char *proto) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 4131f3263a48..5e3c56253274 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -137,7 +137,7 @@ TEST_GROUP="" #shellcheck disable=SC2317 cleanup() { - rm -f "$cin_disconnect" "$cout_disconnect" + rm -f "$cin_disconnect" rm -f "$cin" "$cout" rm -f "$sin" "$sout" rm -f "$capout" @@ -147,7 +147,7 @@ cleanup() mptcp_lib_check_mptcp mptcp_lib_check_kallsyms -mptcp_lib_check_tools ip +mptcp_lib_check_tools ip tc sin=$(mktemp) sout=$(mktemp) @@ -155,7 +155,6 @@ cin=$(mktemp) cout=$(mktemp) capout=$(mktemp) cin_disconnect="$cin".disconnect -cout_disconnect="$cout".disconnect trap cleanup EXIT mptcp_lib_ns_init ns1 ns2 ns3 ns4 @@ -259,6 +258,15 @@ check_mptcp_disabled() mptcp_lib_ns_init disabled_ns print_larger_title "New MPTCP socket can be blocked via sysctl" + + # mainly to cover more code + if ! ip netns exec ${disabled_ns} sysctl net.mptcp >/dev/null; then + mptcp_lib_pr_fail "not able to list net.mptcp sysctl knobs" + mptcp_lib_result_fail "not able to list net.mptcp sysctl knobs" + ret=${KSFT_FAIL} + return 1 + fi + # net.mptcp.enabled should be enabled by default if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default" @@ -345,9 +353,11 @@ do_transfer() local addr_port addr_port=$(printf "%s:%d" ${connect_addr} ${port}) - local result_msg - result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" - mptcp_lib_print_title "${result_msg}" + local pretty_title + pretty_title="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" + mptcp_lib_print_title "${pretty_title}" + + local tap_title="${connector_ns:0:3} ${cl_proto} -> ${listener_ns:0:3} (${addr_port}) ${srv_proto}" if $capture; then local capuser @@ -431,20 +441,15 @@ do_transfer() local duration duration=$((stop-start)) - result_msg+=" # time=${duration}ms" printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" - cat /tmp/${listener_ns}.out - echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" - [ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ + "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" echo cat "$capout" - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}" return 1 fi @@ -544,12 +549,12 @@ do_transfer() if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then mptcp_lib_pr_ok "${extra:1}" - mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_pass "${TEST_GROUP}: ${tap_title}" else if [ -n "${extra}" ]; then mptcp_lib_print_warn "${extra:1}" fi - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}" fi cat "$capout" @@ -577,7 +582,7 @@ make_file() mptcp_lib_make_file $name 1024 $ksize dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null - echo "Created $name (size $(du -b "$name")) containing data sent by $who" + echo "Created $name (size $(stat -c "%s" "$name") B) containing data sent by $who" } run_tests_lo() @@ -848,6 +853,8 @@ stop_if_error() make_file "$cin" "client" make_file "$sin" "server" +mptcp_lib_subtests_last_ts_reset + check_mptcp_disabled stop_if_error "The kernel configuration is not valid for MPTCP" diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c new file mode 100644 index 000000000000..e084796e804d --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c @@ -0,0 +1,435 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025, Kylin Software */ + +#include <linux/sock_diag.h> +#include <linux/rtnetlink.h> +#include <linux/inet_diag.h> +#include <linux/netlink.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <linux/tcp.h> +#include <arpa/inet.h> + +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + +#define parse_rtattr_nested(tb, max, rta) \ + (parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \ + NLA_F_NESTED)) + +struct params { + __u32 target_token; + char subflow_addrs[1024]; +}; + +struct mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; + __u8 mptcpi_local_addr_used; + __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; + __u8 mptcpi_subflows_total; + __u8 reserved[3]; + __u32 mptcpi_last_data_sent; + __u32 mptcpi_last_data_recv; + __u32 mptcpi_last_ack_recv; +}; + +enum { + MPTCP_SUBFLOW_ATTR_UNSPEC, + MPTCP_SUBFLOW_ATTR_TOKEN_REM, + MPTCP_SUBFLOW_ATTR_TOKEN_LOC, + MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ, + MPTCP_SUBFLOW_ATTR_MAP_SEQ, + MPTCP_SUBFLOW_ATTR_MAP_SFSEQ, + MPTCP_SUBFLOW_ATTR_SSN_OFFSET, + MPTCP_SUBFLOW_ATTR_MAP_DATALEN, + MPTCP_SUBFLOW_ATTR_FLAGS, + MPTCP_SUBFLOW_ATTR_ID_REM, + MPTCP_SUBFLOW_ATTR_ID_LOC, + MPTCP_SUBFLOW_ATTR_PAD, + + __MPTCP_SUBFLOW_ATTR_MAX +}; + +#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1) + +#define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0) +#define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1) +#define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2) +#define MPTCP_SUBFLOW_FLAG_JOIN_LOC _BITUL(3) +#define MPTCP_SUBFLOW_FLAG_BKUP_REM _BITUL(4) +#define MPTCP_SUBFLOW_FLAG_BKUP_LOC _BITUL(5) +#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED _BITUL(6) +#define MPTCP_SUBFLOW_FLAG_CONNECTED _BITUL(7) +#define MPTCP_SUBFLOW_FLAG_MAPVALID _BITUL(8) + +#define rta_getattr(type, value) (*(type *)RTA_DATA(value)) + +static void die_perror(const char *msg) +{ + perror(msg); + exit(1); +} + +static void die_usage(int r) +{ + fprintf(stderr, "Usage:\n" + "mptcp_diag -t <token>\n" + "mptcp_diag -s \"<saddr>:<sport> <daddr>:<dport>\"\n"); + exit(r); +} + +static void send_query(int fd, struct inet_diag_req_v2 *r, __u32 proto) +{ + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct { + struct nlmsghdr nlh; + struct inet_diag_req_v2 r; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST + }, + .r = *r + }; + struct rtattr rta_proto; + struct iovec iov[6]; + int iovlen = 0; + + iov[iovlen++] = (struct iovec) { + .iov_base = &req, + .iov_len = sizeof(req) + }; + + if (proto == IPPROTO_MPTCP) { + rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL; + rta_proto.rta_len = RTA_LENGTH(sizeof(proto)); + + iov[iovlen++] = (struct iovec){ &rta_proto, sizeof(rta_proto)}; + iov[iovlen++] = (struct iovec){ &proto, sizeof(proto)}; + req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto)); + } + + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = iovlen + }; + + for (;;) { + if (sendmsg(fd, &msg, 0) < 0) { + if (errno == EINTR) + continue; + die_perror("sendmsg"); + } + break; + } +} + +static void parse_rtattr_flags(struct rtattr *tb[], int max, struct rtattr *rta, + int len, unsigned short flags) +{ + unsigned short type; + + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + while (RTA_OK(rta, len)) { + type = rta->rta_type & ~flags; + if (type <= max && !tb[type]) + tb[type] = rta; + rta = RTA_NEXT(rta, len); + } +} + +static void print_info_msg(struct mptcp_info *info) +{ + printf("Token & Flags\n"); + printf("token: %x\n", info->mptcpi_token); + printf("flags: %x\n", info->mptcpi_flags); + printf("csum_enabled: %u\n", info->mptcpi_csum_enabled); + + printf("\nBasic Info\n"); + printf("subflows: %u\n", info->mptcpi_subflows); + printf("subflows_max: %u\n", info->mptcpi_subflows_max); + printf("subflows_total: %u\n", info->mptcpi_subflows_total); + printf("local_addr_used: %u\n", info->mptcpi_local_addr_used); + printf("local_addr_max: %u\n", info->mptcpi_local_addr_max); + printf("add_addr_signal: %u\n", info->mptcpi_add_addr_signal); + printf("add_addr_accepted: %u\n", info->mptcpi_add_addr_accepted); + printf("add_addr_signal_max: %u\n", info->mptcpi_add_addr_signal_max); + printf("add_addr_accepted_max: %u\n", info->mptcpi_add_addr_accepted_max); + + printf("\nTransmission Info\n"); + printf("write_seq: %llu\n", info->mptcpi_write_seq); + printf("snd_una: %llu\n", info->mptcpi_snd_una); + printf("rcv_nxt: %llu\n", info->mptcpi_rcv_nxt); + printf("last_data_sent: %u\n", info->mptcpi_last_data_sent); + printf("last_data_recv: %u\n", info->mptcpi_last_data_recv); + printf("last_ack_recv: %u\n", info->mptcpi_last_ack_recv); + printf("retransmits: %u\n", info->mptcpi_retransmits); + printf("retransmit bytes: %llu\n", info->mptcpi_bytes_retrans); + printf("bytes_sent: %llu\n", info->mptcpi_bytes_sent); + printf("bytes_received: %llu\n", info->mptcpi_bytes_received); + printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked); +} + +/* + * 'print_subflow_info' is from 'mptcp_subflow_info' + * which is a function in 'misc/ss.c' of iproute2. + */ +static void print_subflow_info(struct rtattr *tb[]) +{ + u_int32_t flags = 0; + + printf("It's a mptcp subflow, the subflow info:\n"); + if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) { + char caps[32 + 1] = { 0 }, *cap = &caps[0]; + + flags = rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_FLAGS]); + + if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM) + *cap++ = 'M'; + if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC) + *cap++ = 'm'; + if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM) + *cap++ = 'J'; + if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC) + *cap++ = 'j'; + if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM) + *cap++ = 'B'; + if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC) + *cap++ = 'b'; + if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED) + *cap++ = 'e'; + if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED) + *cap++ = 'c'; + if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID) + *cap++ = 'v'; + + if (flags) + printf(" flags:%s", caps); + } + if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] && + tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] && + tb[MPTCP_SUBFLOW_ATTR_ID_REM] && + tb[MPTCP_SUBFLOW_ATTR_ID_LOC]) + printf(" token:%04x(id:%u)/%04x(id:%u)", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]), + rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_REM]), + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]), + rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_LOC])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]) + printf(" seq:%llu", + rta_getattr(__u64, tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]) + printf(" sfseq:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])); + if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]) + printf(" ssnoff:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]) + printf(" maplen:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])); + printf("\n"); +} + +static void parse_nlmsg(struct nlmsghdr *nlh, __u32 proto) +{ + struct inet_diag_msg *r = NLMSG_DATA(nlh); + struct rtattr *tb[INET_DIAG_MAX + 1]; + + parse_rtattr_flags(tb, INET_DIAG_MAX, (struct rtattr *)(r + 1), + nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)), + NLA_F_NESTED); + + if (proto == IPPROTO_MPTCP && tb[INET_DIAG_INFO]) { + int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]); + struct mptcp_info *info; + + /* workaround fort older kernels with less fields */ + if (len < sizeof(*info)) { + info = alloca(sizeof(*info)); + memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len); + memset((char *)info + len, 0, sizeof(*info) - len); + } else { + info = RTA_DATA(tb[INET_DIAG_INFO]); + } + print_info_msg(info); + } + if (proto == IPPROTO_TCP && tb[INET_DIAG_ULP_INFO]) { + struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 }; + + parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX, + tb[INET_DIAG_ULP_INFO]); + + if (ulpinfo[INET_ULP_INFO_MPTCP]) { + struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = { 0 }; + + parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX, + ulpinfo[INET_ULP_INFO_MPTCP]); + print_subflow_info(sfinfo); + } else { + printf("It's a normal TCP!\n"); + } + } +} + +static void recv_nlmsg(int fd, __u32 proto) +{ + char rcv_buff[8192]; + struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff; + struct sockaddr_nl rcv_nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec rcv_iov = { + .iov_base = rcv_buff, + .iov_len = sizeof(rcv_buff) + }; + struct msghdr rcv_msg = { + .msg_name = &rcv_nladdr, + .msg_namelen = sizeof(rcv_nladdr), + .msg_iov = &rcv_iov, + .msg_iovlen = 1 + }; + int len; + + len = recvmsg(fd, &rcv_msg, 0); + + while (NLMSG_OK(nlh, len)) { + if (nlh->nlmsg_type == NLMSG_DONE) { + printf("NLMSG_DONE\n"); + break; + } else if (nlh->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err; + + err = (struct nlmsgerr *)NLMSG_DATA(nlh); + printf("Error %d:%s\n", + -(err->error), strerror(-(err->error))); + break; + } + parse_nlmsg(nlh, proto); + nlh = NLMSG_NEXT(nlh, len); + } +} + +static void get_mptcpinfo(__u32 token) +{ + struct inet_diag_req_v2 r = { + .sdiag_family = AF_INET, + /* Real proto is set via INET_DIAG_REQ_PROTOCOL */ + .sdiag_protocol = IPPROTO_TCP, + .idiag_ext = 1 << (INET_DIAG_INFO - 1), + .id.idiag_cookie[0] = token, + }; + __u32 proto = IPPROTO_MPTCP; + int fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (fd < 0) + die_perror("Netlink socket"); + + send_query(fd, &r, proto); + recv_nlmsg(fd, proto); + + close(fd); +} + +static void get_subflow_info(char *subflow_addrs) +{ + struct inet_diag_req_v2 r = { + .sdiag_family = AF_INET, + .sdiag_protocol = IPPROTO_TCP, + .idiag_ext = 1 << (INET_DIAG_INFO - 1), + .id.idiag_cookie[0] = INET_DIAG_NOCOOKIE, + .id.idiag_cookie[1] = INET_DIAG_NOCOOKIE, + }; + char saddr[64], daddr[64]; + int sport, dport; + int ret; + int fd; + + ret = sscanf(subflow_addrs, "%[^:]:%d %[^:]:%d", saddr, &sport, daddr, &dport); + if (ret != 4) + die_perror("IP PORT Pairs has style problems!"); + + printf("%s:%d -> %s:%d\n", saddr, sport, daddr, dport); + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (fd < 0) + die_perror("Netlink socket"); + + r.id.idiag_sport = htons(sport); + r.id.idiag_dport = htons(dport); + + inet_pton(AF_INET, saddr, &r.id.idiag_src); + inet_pton(AF_INET, daddr, &r.id.idiag_dst); + send_query(fd, &r, IPPROTO_TCP); + recv_nlmsg(fd, IPPROTO_TCP); +} + +static void parse_opts(int argc, char **argv, struct params *p) +{ + int c; + + if (argc < 2) + die_usage(1); + + while ((c = getopt(argc, argv, "ht:s:")) != -1) { + switch (c) { + case 'h': + die_usage(0); + break; + case 't': + sscanf(optarg, "%x", &p->target_token); + break; + case 's': + strncpy(p->subflow_addrs, optarg, + sizeof(p->subflow_addrs) - 1); + break; + default: + die_usage(1); + break; + } + } +} + +int main(int argc, char *argv[]) +{ + struct params p = { 0 }; + + parse_opts(argc, argv, &p); + + if (p.target_token) + get_mptcpinfo(p.target_token); + + if (p.subflow_addrs[0] != '\0') + get_subflow_info(p.subflow_addrs); + + return 0; +} + diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 218aac467321..3cf1e2a612ce 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -72,13 +72,21 @@ static const char *getxinfo_strerr(int err) } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { +again: int err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -91,7 +99,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -136,7 +144,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index e4403236f655..b8af65373b3a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -23,6 +23,7 @@ tmpfile="" cout="" err="" capout="" +cappid="" ns1="" ns2="" iptables="iptables" @@ -31,7 +32,6 @@ timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) capture=false checksum=false -ip_mptcp=0 check_invert=0 validate_checksum=false init=0 @@ -62,6 +62,17 @@ unset sflags unset fastclose unset fullmesh unset speed +unset join_syn_rej +unset join_csum_ns1 +unset join_csum_ns2 +unset join_fail_nr +unset join_rst_nr +unset join_infi_nr +unset join_corrupted_pkts +unset join_syn_tx +unset join_create_err +unset join_bind_err +unset join_connect_err # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || # (ip6 && (ip6[74] & 0xf0) == 0x30)'" @@ -125,8 +136,8 @@ init_shapers() { local i for i in $(seq 1 4); do - tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1 - tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1 + tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1ms + tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1ms done } @@ -142,7 +153,7 @@ init() { mptcp_lib_check_mptcp mptcp_lib_check_kallsyms - mptcp_lib_check_tools ip ss "${iptables}" "${ip6tables}" + mptcp_lib_check_tools ip tc ss "${iptables}" "${ip6tables}" sin=$(mktemp) sout=$(mktemp) @@ -197,6 +208,22 @@ print_skip() mptcp_lib_pr_skip "${@}" } +# $1: check name; $2: rc +print_results() +{ + local check="${1}" + local rc=${2} + + print_check "${check}" + if [ ${rc} = ${KSFT_PASS} ]; then + print_ok + elif [ ${rc} = ${KSFT_SKIP} ]; then + print_skip + else + fail_test "see above" + fi +} + # [ $1: fail msg ] mark_as_skipped() { @@ -262,6 +289,8 @@ reset() TEST_NAME="${1}" + MPTCP_LIB_SUBTEST_FLAKY=0 # reset if modified + if skip_test; then MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) last_test_ignored=1 @@ -336,7 +365,7 @@ reset_with_checksum() local ns1_enable=$1 local ns2_enable=$2 - reset "checksum test ${1} ${2}" || return 1 + reset "checksum test ${ns1_enable} ${ns2_enable}" || return 1 ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable @@ -419,12 +448,17 @@ reset_with_fail() fi } +start_events() +{ + mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid + mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid +} + reset_with_events() { reset "${1}" || return 1 - mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid - mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid + start_events } reset_with_tcp_filter() @@ -435,9 +469,10 @@ reset_with_tcp_filter() local ns="${!1}" local src="${2}" local target="${3}" + local chain="${4:-INPUT}" if ! ip netns exec "${ns}" ${iptables} \ - -A INPUT \ + -A "${chain}" \ -s "${src}" \ -p tcp \ -j "${target}"; then @@ -449,7 +484,9 @@ reset_with_tcp_filter() # $1: err msg fail_test() { - ret=${KSFT_FAIL} + if ! mptcp_lib_subtest_is_flaky; then + ret=${KSFT_FAIL} + fi if [ ${#} -gt 0 ]; then print_fail "${@}" @@ -606,173 +643,65 @@ kill_events_pids() pm_nl_set_limits() { - local ns=$1 - local addrs=$2 - local subflows=$3 - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows - else - ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows - fi + mptcp_lib_pm_nl_set_limits "${@}" } pm_nl_add_endpoint() { - local ns=$1 - local addr=$2 - local flags _flags - local port _port - local dev _dev - local id _id - local nr=2 - - local p - for p in "${@}" - do - if [ $p = "flags" ]; then - eval _flags=\$"$nr" - [ -n "$_flags" ]; flags="flags $_flags" - fi - if [ $p = "dev" ]; then - eval _dev=\$"$nr" - [ -n "$_dev" ]; dev="dev $_dev" - fi - if [ $p = "id" ]; then - eval _id=\$"$nr" - [ -n "$_id" ]; id="id $_id" - fi - if [ $p = "port" ]; then - eval _port=\$"$nr" - [ -n "$_port" ]; port="port $_port" - fi - - nr=$((nr + 1)) - done - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port - else - ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port - fi + mptcp_lib_pm_nl_add_endpoint "${@}" } pm_nl_del_endpoint() { - local ns=$1 - local id=$2 - local addr=$3 - - if [ $ip_mptcp -eq 1 ]; then - [ $id -ne 0 ] && addr='' - ip -n $ns mptcp endpoint delete id $id $addr - else - ip netns exec $ns ./pm_nl_ctl del $id $addr - fi + mptcp_lib_pm_nl_del_endpoint "${@}" } pm_nl_flush_endpoint() { - local ns=$1 - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp endpoint flush - else - ip netns exec $ns ./pm_nl_ctl flush - fi + mptcp_lib_pm_nl_flush_endpoint "${@}" } pm_nl_show_endpoints() { - local ns=$1 - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp endpoint show - else - ip netns exec $ns ./pm_nl_ctl dump - fi + mptcp_lib_pm_nl_show_endpoints "${@}" } pm_nl_change_endpoint() { - local ns=$1 - local id=$2 - local flags=$3 - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp endpoint change id $id ${flags//","/" "} - else - ip netns exec $ns ./pm_nl_ctl set id $id flags $flags - fi + mptcp_lib_pm_nl_change_endpoint "${@}" } pm_nl_check_endpoint() { - local line expected_line local msg="$1" local ns=$2 local addr=$3 - local _flags="" - local flags - local _port - local port - local dev - local _id - local id + local flags dev id port print_check "${msg}" shift 3 while [ -n "$1" ]; do - if [ $1 = "flags" ]; then - _flags=$2 - [ -n "$_flags" ]; flags="flags $_flags" + case "${1}" in + "flags" | "dev" | "id" | "port") + eval "${1}"="${2}" shift - elif [ $1 = "dev" ]; then - [ -n "$2" ]; dev="dev $2" - shift - elif [ $1 = "id" ]; then - _id=$2 - [ -n "$_id" ]; id="id $_id" - shift - elif [ $1 = "port" ]; then - _port=$2 - [ -n "$_port" ]; port=" port $_port" - shift - fi + ;; + *) + ;; + esac shift done - if [ -z "$id" ]; then - test_fail "bad test - missing endpoint id" + if [ -z "${id}" ]; then + fail_test "bad test - missing endpoint id" return fi - if [ $ip_mptcp -eq 1 ]; then - # get line and trim trailing whitespace - line=$(ip -n $ns mptcp endpoint show $id) - line="${line% }" - # the dump order is: address id flags port dev - [ -n "$addr" ] && expected_line="$addr" - expected_line+=" $id" - [ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}" - [ -n "$dev" ] && expected_line+=" $dev" - [ -n "$port" ] && expected_line+=" $port" - else - line=$(ip netns exec $ns ./pm_nl_ctl get $_id) - # the dump order is: id flags dev address port - expected_line="$id" - [ -n "$flags" ] && expected_line+=" $flags" - [ -n "$dev" ] && expected_line+=" $dev" - [ -n "$addr" ] && expected_line+=" $addr" - [ -n "$_port" ] && expected_line+=" $_port" - fi - if [ "$line" = "$expected_line" ]; then - print_ok - else - fail_test "expected '$expected_line' found '$line'" - fi + check_output "mptcp_lib_pm_nl_get_endpoint ${ns} ${id}" \ + "$(mptcp_lib_pm_nl_format_endpoints \ + "${id},${addr},${flags//","/" "},${dev},${port}")" } pm_nl_set_endpoint() @@ -938,7 +867,7 @@ chk_cestab_nr() local cestab=$2 local count - print_check "cestab $cestab" + print_check "currently established: $cestab" count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab") if [ -z "$count" ]; then print_skip @@ -960,40 +889,62 @@ check_cestab() fi } -do_transfer() +cond_start_capture() { - local listener_ns="$1" - local connector_ns="$2" - local cl_proto="$3" - local srv_proto="$4" - local connect_addr="$5" - - local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1)) - local cappid - local FAILING_LINKS=${FAILING_LINKS:-""} - local fastclose=${fastclose:-""} - local speed=${speed:-"fast"} + local ns="$1" - :> "$cout" - :> "$sout" :> "$capout" if $capture; then - local capuser - if [ -z $SUDO_USER ] ; then + local capuser capfile + if [ -z $SUDO_USER ]; then capuser="" else capuser="-Z $SUDO_USER" fi - capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}") + capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "$ns") echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" - ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + ip netns exec "$ns" tcpdump -i any -s 65535 -B 32768 $capuser -w "$capfile" > "$capout" 2>&1 & cappid=$! sleep 1 fi +} + +cond_stop_capture() +{ + if $capture; then + sleep 1 + kill $cappid + cat "$capout" + fi +} + +get_port() +{ + echo "$((10000 + MPTCP_LIB_TEST_COUNTER - 1))" +} + +do_transfer() +{ + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" + local port + + local FAILING_LINKS=${FAILING_LINKS:-""} + local fastclose=${fastclose:-""} + local speed=${speed:-"fast"} + port=$(get_port) + + :> "$cout" + :> "$sout" + + cond_start_capture ${listener_ns} NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat -n @@ -1080,10 +1031,7 @@ do_transfer() wait $spid local rets=$? - if $capture; then - sleep 1 - kill $cappid - fi + cond_stop_capture NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat | grep Tcp > /tmp/${listener_ns}.out @@ -1092,14 +1040,8 @@ do_transfer() if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then fail_test "client exit code $retc, server $rets" - echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" - cat /tmp/${listener_ns}.out - echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" - cat /tmp/${connector_ns}.out - - cat "$capout" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ + "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" return 1 fi @@ -1116,13 +1058,7 @@ do_transfer() fi rets=$? - if [ $retc -eq 0 ] && [ $rets -eq 0 ];then - cat "$capout" - return 0 - fi - - cat "$capout" - return 1 + [ $retc -eq 0 ] && [ $rets -eq 0 ] } make_file() @@ -1214,28 +1150,29 @@ chk_csum_nr() csum_ns2=${csum_ns2:1} fi - print_check "sum" + print_check "checksum server" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") - if [ "$count" != "$csum_ns1" ]; then + if [ -n "$count" ] && [ "$count" != "$csum_ns1" ]; then extra_msg+=" ns1=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || - { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then + { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then fail_test "got $count data checksum error[s] expected $csum_ns1" else print_ok fi - print_check "csum" + + print_check "checksum client" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") - if [ "$count" != "$csum_ns2" ]; then + if [ -n "$count" ] && [ "$count" != "$csum_ns2" ]; then extra_msg+=" ns2=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || - { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then + { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then fail_test "got $count data checksum error[s] expected $csum_ns2" else print_ok @@ -1252,6 +1189,8 @@ chk_fail_nr() local count local ns_tx=$ns1 local ns_rx=$ns2 + local tx="server" + local rx="client" local extra_msg="" local allow_tx_lost=0 local allow_rx_lost=0 @@ -1259,7 +1198,8 @@ chk_fail_nr() if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 ns_rx=$ns1 - extra_msg="invert" + tx="client" + rx="server" fi if [[ "${fail_tx}" = "-"* ]]; then @@ -1271,29 +1211,29 @@ chk_fail_nr() fail_rx=${fail_rx:1} fi - print_check "ftx" + print_check "fail tx ${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") - if [ "$count" != "$fail_tx" ]; then - extra_msg+=",tx=$count" + if [ -n "$count" ] && [ "$count" != "$fail_tx" ]; then + extra_msg+=" tx=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || - { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then + { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then fail_test "got $count MP_FAIL[s] TX expected $fail_tx" else print_ok fi - print_check "failrx" + print_check "fail rx ${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") - if [ "$count" != "$fail_rx" ]; then - extra_msg+=",rx=$count" + if [ -n "$count" ] && [ "$count" != "$fail_rx" ]; then + extra_msg+=" rx=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || - { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then + { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then fail_test "got $count MP_FAIL[s] RX expected $fail_rx" else print_ok @@ -1310,37 +1250,35 @@ chk_fclose_nr() local count local ns_tx=$ns2 local ns_rx=$ns1 - local extra_msg="" + local tx="client" + local rx="server" if [[ $ns_invert = "invert" ]]; then ns_tx=$ns1 ns_rx=$ns2 - extra_msg="invert" + tx="server" + rx="client" fi - print_check "ctx" + print_check "fast close tx ${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_tx" ]; then - extra_msg+=",tx=$count" fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx" else print_ok fi - print_check "fclzrx" + print_check "fast close rx ${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_rx" ]; then - extra_msg+=",rx=$count" fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx" else print_ok fi - - print_info "$extra_msg" } chk_rst_nr() @@ -1351,15 +1289,17 @@ chk_rst_nr() local count local ns_tx=$ns1 local ns_rx=$ns2 - local extra_msg="" + local tx="server" + local rx="client" if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 ns_rx=$ns1 - extra_msg="invert" + tx="client" + rx="server" fi - print_check "rtx" + print_check "reset tx ${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx") if [ -z "$count" ]; then print_skip @@ -1371,7 +1311,7 @@ chk_rst_nr() print_ok fi - print_check "rstrx" + print_check "reset rx ${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx") if [ -z "$count" ]; then print_skip @@ -1382,8 +1322,6 @@ chk_rst_nr() else print_ok fi - - print_info "$extra_msg" } chk_infi_nr() @@ -1392,7 +1330,7 @@ chk_infi_nr() local infi_rx=$2 local count - print_check "itx" + print_check "infi tx client" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx") if [ -z "$count" ]; then print_skip @@ -1402,7 +1340,7 @@ chk_infi_nr() print_ok fi - print_check "infirx" + print_check "infi rx server" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx") if [ -z "$count" ]; then print_skip @@ -1413,17 +1351,67 @@ chk_infi_nr() fi } +chk_join_tx_nr() +{ + local syn_tx=${join_syn_tx:-0} + local create=${join_create_err:-0} + local bind=${join_bind_err:-0} + local connect=${join_connect_err:-0} + local rc=${KSFT_PASS} + local count + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTx") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$syn_tx" ]; then + rc=${KSFT_FAIL} + print_check "syn tx" + fail_test "got $count JOIN[s] syn tx expected $syn_tx" + fi + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$create" ]; then + rc=${KSFT_FAIL} + print_check "syn tx create socket error" + fail_test "got $count JOIN[s] syn tx create socket error expected $create" + fi + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$bind" ]; then + rc=${KSFT_FAIL} + print_check "syn tx bind error" + fail_test "got $count JOIN[s] syn tx bind error expected $bind" + fi + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$connect" ]; then + rc=${KSFT_FAIL} + print_check "syn tx connect error" + fail_test "got $count JOIN[s] syn tx connect error expected $connect" + fi + + print_results "join Tx" ${rc} +} + chk_join_nr() { local syn_nr=$1 local syn_ack_nr=$2 local ack_nr=$3 - local csum_ns1=${4:-0} - local csum_ns2=${5:-0} - local fail_nr=${6:-0} - local rst_nr=${7:-0} - local infi_nr=${8:-0} - local corrupted_pkts=${9:-0} + local syn_rej=${join_syn_rej:-0} + local csum_ns1=${join_csum_ns1:-0} + local csum_ns2=${join_csum_ns2:-0} + local fail_nr=${join_fail_nr:-0} + local rst_nr=${join_rst_nr:-0} + local infi_nr=${join_infi_nr:-0} + local corrupted_pkts=${join_corrupted_pkts:-0} + local rc=${KSFT_PASS} local count local with_cookie @@ -1431,43 +1419,71 @@ chk_join_nr() print_info "${corrupted_pkts} corrupted pkts" fi - print_check "syn" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx") if [ -z "$count" ]; then - print_skip + rc=${KSFT_SKIP} elif [ "$count" != "$syn_nr" ]; then - fail_test "got $count JOIN[s] syn expected $syn_nr" - else - print_ok + rc=${KSFT_FAIL} + print_check "syn rx" + fail_test "got $count JOIN[s] syn rx expected $syn_nr" fi - print_check "synack" with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") if [ -z "$count" ]; then - print_skip + rc=${KSFT_SKIP} elif [ "$count" != "$syn_ack_nr" ]; then # simult connections exceeding the limit with cookie enabled could go up to # synack validation as the conn limit can be enforced reliably only after # the subflow creation - if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then - print_ok - else - fail_test "got $count JOIN[s] synack expected $syn_ack_nr" + if [ "$with_cookie" != 2 ] || [ "$count" -le "$syn_ack_nr" ] || [ "$count" -gt "$syn_nr" ]; then + rc=${KSFT_FAIL} + print_check "synack rx" + fail_test "got $count JOIN[s] synack rx expected $syn_ack_nr" fi - else - print_ok fi - print_check "ack" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "synack HMAC" + fail_test "got $count JOIN[s] synack HMAC failure expected 0" + fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then - print_skip + rc=${KSFT_SKIP} elif [ "$count" != "$ack_nr" ]; then - fail_test "got $count JOIN[s] ack expected $ack_nr" - else - print_ok + rc=${KSFT_FAIL} + print_check "ack rx" + fail_test "got $count JOIN[s] ack rx expected $ack_nr" + fi + + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "ack HMAC" + fail_test "got $count JOIN[s] ack HMAC failure expected 0" + fi + + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$syn_rej" ]; then + rc=${KSFT_FAIL} + print_check "syn rejected" + fail_test "got $count JOIN[s] syn rejected expected $syn_rej" fi + + print_results "join Rx" ${rc} + + join_syn_tx="${join_syn_tx:-${syn_nr}}" \ + chk_join_tx_nr + if $validate_checksum; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr @@ -1520,18 +1536,30 @@ chk_add_nr() local add_nr=$1 local echo_nr=$2 local port_nr=${3:-0} - local syn_nr=${4:-$port_nr} - local syn_ack_nr=${5:-$port_nr} - local ack_nr=${6:-$port_nr} - local mis_syn_nr=${7:-0} - local mis_ack_nr=${8:-0} + local ns_invert=${4:-""} + local syn_nr=$port_nr + local syn_ack_nr=$port_nr + local ack_nr=$port_nr + local mis_syn_nr=0 + local mis_ack_nr=0 + local ns_tx=$ns1 + local ns_rx=$ns2 + local tx="" + local rx="" local count local timeout - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + tx=" client" + rx=" server" + fi - print_check "add" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") + timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) + + print_check "add addr rx${rx}" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1542,8 +1570,8 @@ chk_add_nr() print_ok fi - print_check "echo" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") + print_check "add addr echo rx${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_nr" ]; then @@ -1553,8 +1581,8 @@ chk_add_nr() fi if [ $port_nr -gt 0 ]; then - print_check "pt" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") + print_check "add addr rx with port${rx}" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$port_nr" ]; then @@ -1563,8 +1591,8 @@ chk_add_nr() print_ok fi - print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + print_check "syn rx port${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1574,8 +1602,8 @@ chk_add_nr() print_ok fi - print_check "synack" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + print_check "synack rx port${rx}" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1585,8 +1613,8 @@ chk_add_nr() print_ok fi - print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + print_check "ack rx port${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1596,8 +1624,8 @@ chk_add_nr() print_ok fi - print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + print_check "syn rx port mismatch${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_syn_nr" ]; then @@ -1607,8 +1635,8 @@ chk_add_nr() print_ok fi - print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + print_check "ack rx port mismatch${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_ack_nr" ]; then @@ -1629,7 +1657,7 @@ chk_add_tx_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) - print_check "add TX" + print_check "add addr tx" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") if [ -z "$count" ]; then print_skip @@ -1641,7 +1669,7 @@ chk_add_tx_nr() print_ok fi - print_check "echo TX" + print_check "add addr echo tx" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx") if [ -z "$count" ]; then print_skip @@ -1661,6 +1689,8 @@ chk_rm_nr() local count local addr_ns=$ns1 local subflow_ns=$ns2 + local addr="server" + local subflow="client" local extra_msg="" shift 2 @@ -1670,16 +1700,14 @@ chk_rm_nr() shift done - if [ -z $invert ]; then - addr_ns=$ns1 - subflow_ns=$ns2 - elif [ $invert = "true" ]; then + if [ "$invert" = "true" ]; then addr_ns=$ns2 subflow_ns=$ns1 - extra_msg="invert" + addr="client" + subflow="server" fi - print_check "rm" + print_check "rm addr rx ${addr}" count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr") if [ -z "$count" ]; then print_skip @@ -1689,7 +1717,7 @@ chk_rm_nr() print_ok fi - print_check "rmsf" + print_check "rm subflow ${subflow}" count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow") if [ -z "$count" ]; then print_skip @@ -1703,7 +1731,7 @@ chk_rm_nr() count=$((count + cnt)) if [ "$count" != "$rm_subflow_nr" ]; then suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" - extra_msg+=" simult" + extra_msg="simult" fi if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then @@ -1724,7 +1752,7 @@ chk_rm_tx_nr() { local rm_addr_tx_nr=$1 - print_check "rm TX" + print_check "rm addr tx client" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx") if [ -z "$count" ]; then print_skip @@ -1739,9 +1767,11 @@ chk_prio_nr() { local mp_prio_nr_tx=$1 local mp_prio_nr_rx=$2 + local mpj_syn=$3 + local mpj_syn_ack=$4 local count - print_check "ptx" + print_check "mp_prio tx server" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx") if [ -z "$count" ]; then print_skip @@ -1751,7 +1781,7 @@ chk_prio_nr() print_ok fi - print_check "prx" + print_check "mp_prio rx client" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx") if [ -z "$count" ]; then print_skip @@ -1760,6 +1790,26 @@ chk_prio_nr() else print_ok fi + + print_check "syn backup" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn" ]; then + fail_test "got $count JOIN[s] syn with Backup expected $mpj_syn" + else + print_ok + fi + + print_check "synack backup" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn_ack" ]; then + fail_test "got $count JOIN[s] synack with Backup expected $mpj_syn_ack" + else + print_ok + fi } chk_subflow_nr() @@ -1924,7 +1974,8 @@ subflows_tests() pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 fi # subflow @@ -1953,7 +2004,8 @@ subflows_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 2 2 1 + join_syn_rej=1 \ + chk_join_nr 2 2 1 fi # single subflow, dev @@ -1974,9 +2026,11 @@ subflows_error_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.12.2 flags subflow speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 + join_bind_err=1 \ + chk_join_nr 0 0 0 fi # multiple subflows, with subflow creation error @@ -1988,7 +2042,8 @@ subflows_error_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 fi # multiple subflows, with subflow timeout on MPJ @@ -2000,7 +2055,8 @@ subflows_error_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 fi # multiple subflows, check that the endpoint corresponding to @@ -2021,7 +2077,8 @@ subflows_error_tests() # additional subflow could be created only if the PM select # the later endpoint, skipping the already used one - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 fi } @@ -2060,6 +2117,21 @@ signal_address_tests() chk_add_nr 1 1 fi + # uncommon: subflow and signal flags on the same endpoint + # or because the user wrongly picked both, but still expects the client + # to create additional subflows + if reset "subflow and signal together"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags signal,subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 0 invert # only initiated by ns2 + chk_add_nr 0 0 0 # none initiated by ns1 + chk_rst_nr 0 0 invert # no RST sent by the client + chk_rst_nr 0 0 # no RST sent by the server + fi + # accept and use add_addr with additional subflows if reset "multiple subflows and signal"; then pm_nl_set_limits $ns1 0 3 @@ -2092,7 +2164,8 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=3 \ + chk_join_nr 1 1 1 chk_add_nr 3 3 fi @@ -2260,7 +2333,8 @@ add_addr_timeout_tests() pm_nl_set_limits $ns2 2 2 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 chk_add_nr 8 0 fi } @@ -2354,12 +2428,14 @@ remove_tests() if reset "remove invalid addresses"; then pm_nl_set_limits $ns1 3 3 pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal - pm_nl_set_limits $ns2 3 3 + pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 join_connect_err=1 \ + chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert chk_rst_nr 0 0 @@ -2424,7 +2500,8 @@ remove_tests() pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=3 \ + chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert chk_rst_nr 0 0 @@ -2716,33 +2793,46 @@ backup_tests() sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 fi # single address, backup if reset "single address, backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup + pm_nl_set_limits $ns2 1 1 + sflags=nobackup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 0 0 1 + fi + + # single address, switch to backup + if reset "single address, switch to backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi # single address with port, backup if reset "single address with port, backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup port 10100 pm_nl_set_limits $ns2 1 1 - sflags=backup speed=slow \ + sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 0 0 1 fi if reset "mpc backup" && @@ -2751,17 +2841,26 @@ backup_tests() speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc backup both sides" && continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then - pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + + # 10.0.2.2 (non-backup) -> 10.0.1.1 (backup) + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + # 10.0.1.2 (backup) -> 10.0.2.1 (non-backup) + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + ip -net "$ns2" route add 10.0.2.1 via 10.0.1.1 dev ns2eth1 # force this path + speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_join_nr 2 2 2 + chk_prio_nr 1 1 1 1 fi if reset "mpc switch to backup" && @@ -2770,7 +2869,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc switch to backup both sides" && @@ -2780,7 +2879,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi } @@ -2813,6 +2912,32 @@ verify_listener_events() fail_test } +chk_mpc_endp_attempt() +{ + local retl=$1 + local attempts=$2 + + print_check "Connect" + + if [ ${retl} = 124 ]; then + fail_test "timeout on connect" + elif [ ${retl} = 0 ]; then + fail_test "unexpected successful connect" + else + print_ok + + print_check "Attempts" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPCapableEndpAttempt") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$attempts" ]; then + fail_test "got ${count} MPC attempt[s] on port-based endpoint, expected ${attempts}" + else + print_ok + fi + fi +} + add_addr_ports_tests() { # signal address with port @@ -2903,6 +3028,22 @@ add_addr_ports_tests() chk_join_nr 2 2 2 chk_add_nr 2 2 2 fi + + if reset "port-based signal endpoint must not accept mpc"; then + local port retl count + port=$(get_port) + + cond_start_capture ${ns1} + pm_nl_add_endpoint ${ns1} 10.0.2.1 flags signal port ${port} + mptcp_lib_wait_local_port_listen ${ns1} ${port} + + timeout 1 ip netns exec ${ns2} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s MPTCP 10.0.2.1 >/dev/null 2>&1 + retl=$? + cond_stop_capture + + chk_mpc_endp_attempt ${retl} 1 + fi } syncookies_tests() @@ -2933,7 +3074,8 @@ syncookies_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 2 1 1 + join_syn_rej=1 \ + chk_join_nr 2 1 1 fi # test signal address with cookies @@ -2972,37 +3114,16 @@ syncookies_tests() checksum_tests() { - # checksum test 0 0 - if reset_with_checksum 0 0; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi - - # checksum test 1 1 - if reset_with_checksum 1 1; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi - - # checksum test 0 1 - if reset_with_checksum 0 1; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi - - # checksum test 1 0 - if reset_with_checksum 1 0; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi + local checksum_enable + for checksum_enable in "0 0" "1 1" "0 1" "1 0"; do + # checksum test 0 0, 1 1, 0 1, 1 0 + if reset_with_checksum ${checksum_enable}; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + done } deny_join_id0_tests() @@ -3091,6 +3212,9 @@ fullmesh_tests() pm_nl_set_limits $ns1 1 3 pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh + fi fullmesh=1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -3157,7 +3281,7 @@ fullmesh_tests() addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi @@ -3170,7 +3294,7 @@ fullmesh_tests() sflags=nobackup,nofullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi } @@ -3178,6 +3302,7 @@ fullmesh_tests() fastclose_tests() { if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then + MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=1024 fastclose=client \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 @@ -3186,9 +3311,11 @@ fastclose_tests() fi if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then + MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=1024 fastclose=server \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 0 0 0 1 + join_rst_nr=1 \ + chk_join_nr 0 0 0 chk_fclose_nr 1 1 invert chk_rst_nr 1 1 fi @@ -3204,21 +3331,29 @@ fail_tests() { # single subflow if reset_with_fail "Infinite map" 1; then + MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=128 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)" + join_csum_ns1=+1 join_csum_ns2=+0 \ + join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \ + join_corrupted_pkts="$(pedit_action_pkts)" \ + chk_join_nr 0 0 0 chk_fail_nr 1 -1 invert fi # multiple subflows if reset_with_fail "MP_FAIL MP_RST" 2; then - tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5 + MPTCP_LIB_SUBTEST_FLAKY=1 + tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5ms pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow test_linkfail=1024 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)" + join_csum_ns1=1 join_csum_ns2=0 \ + join_fail_nr=1 join_rst_nr=1 join_infi_nr=0 \ + join_corrupted_pkts="$(pedit_action_pkts)" \ + chk_join_nr 1 1 1 fi } @@ -3360,6 +3495,36 @@ userspace_pm_chk_get_addr() fi } +# $1: ns ; $2: event type ; $3: count +chk_evt_nr() +{ + local ns=${1} + local evt_name="${2}" + local exp="${3}" + + local evts="${evts_ns1}" + local evt="${!evt_name}" + local count + + evt_name="${evt_name:16}" # without MPTCP_LIB_EVENT_ + [ "${ns}" == "ns2" ] && evts="${evts_ns2}" + + print_check "event ${ns} ${evt_name} (${exp})" + + if [[ "${evt_name}" = "LISTENER_"* ]] && + ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + print_skip "event not supported" + return + fi + + count=$(grep -cw "type:${evt}" "${evts}") + if [ "${count}" != "${exp}" ]; then + fail_test "got ${count} events, expected ${exp}" + else + print_ok + fi +} + userspace_tests() { # userspace pm type prevents add_addr @@ -3394,7 +3559,8 @@ userspace_tests() pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 fi # userspace pm type does not send join @@ -3417,8 +3583,9 @@ userspace_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 - chk_prio_nr 0 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 + chk_prio_nr 0 0 0 0 fi # userspace pm type prevents rm_addr @@ -3440,8 +3607,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 2 2 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 userspace_pm_add_addr $ns1 10.0.2.1 10 @@ -3456,14 +3623,12 @@ userspace_tests() "signal" userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1" userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1" - userspace_pm_rm_addr $ns1 10 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" \ - "id 20 flags signal 10.0.3.1" "after rm_addr 10" + "id 20 flags signal 10.0.3.1" "after rm_sf 10" userspace_pm_rm_addr $ns1 20 - userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20" - chk_rm_nr 2 2 invert + chk_rm_nr 1 1 invert chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids @@ -3475,8 +3640,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 userspace_pm_add_sf $ns2 10.0.3.2 20 @@ -3487,12 +3652,11 @@ userspace_tests() "id 20 flags subflow 10.0.3.2" \ "subflow" userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2" - userspace_pm_rm_addr $ns2 20 userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns2}" \ "" \ - "after rm_addr 20" - chk_rm_nr 1 1 + "after rm_sf 20" + chk_rm_nr 0 1 chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids @@ -3504,8 +3668,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 chk_mptcp_info subflows 0 subflows 0 @@ -3525,8 +3689,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 userspace_pm_add_sf $ns2 10.0.3.2 20 @@ -3549,8 +3713,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 userspace_pm_add_addr $ns1 10.0.2.1 10 @@ -3580,8 +3744,8 @@ endpoint_tests() pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - speed=slow \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 @@ -3600,31 +3764,185 @@ endpoint_tests() mptcp_lib_kill_wait $tests_pid fi - if reset "delete and re-add" && + if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 1 1 - pm_nl_set_limits $ns2 1 1 + start_events + pm_nl_set_limits $ns1 0 3 + pm_nl_set_limits $ns2 0 3 + pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - test_linkfail=4 speed=20 \ - run_tests $ns1 $ns2 10.0.1.1 & + { test_linkfail=4 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 pm_nl_check_endpoint "creation" \ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 - chk_subflow_nr "before delete" 2 + chk_subflow_nr "before delete id 2" 2 chk_mptcp_info subflows 1 subflows 1 pm_nl_del_endpoint $ns2 2 10.0.2.2 sleep 0.5 - chk_subflow_nr "after delete" 1 + chk_subflow_nr "after delete id 2" 1 chk_mptcp_info subflows 0 subflows 0 - pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow wait_mpj $ns2 - chk_subflow_nr "after re-add" 2 + chk_subflow_nr "after re-add id 2" 2 + chk_mptcp_info subflows 1 subflows 1 + + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_attempt_fail $ns2 + chk_subflow_nr "after new reject" 2 chk_mptcp_info subflows 1 subflows 1 + + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_del_endpoint $ns2 3 10.0.3.2 + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after no reject" 3 + chk_mptcp_info subflows 2 subflows 2 + + local i + for i in $(seq 3); do + pm_nl_del_endpoint $ns2 1 10.0.1.2 + sleep 0.5 + chk_subflow_nr "after delete id 0 ($i)" 2 + chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf + + pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after re-add id 0 ($i)" 3 + chk_mptcp_info subflows 3 subflows 3 + done + mptcp_lib_kill_wait $tests_pid + + kill_events_pids + chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 4 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 4 + + chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 0 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 5 # one has been closed before estab + + join_syn_tx=7 \ + chk_join_nr 6 6 6 + chk_rm_nr 4 4 + fi + + # remove and re-add + if reset_with_events "delete re-add signal" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 3 + pm_nl_set_limits $ns2 3 3 + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal + { test_linkfail=4 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null + local tests_pid=$! + + wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns1 10.0.2.1 id 1 flags signal + chk_subflow_nr "before delete" 2 + chk_mptcp_info subflows 1 subflows 1 + + pm_nl_del_endpoint $ns1 1 10.0.2.1 + pm_nl_del_endpoint $ns1 2 224.0.0.1 + sleep 0.5 + chk_subflow_nr "after delete" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add" 3 + chk_mptcp_info subflows 2 subflows 2 + + pm_nl_del_endpoint $ns1 42 10.0.1.1 + sleep 0.5 + chk_subflow_nr "after delete ID 0" 2 + chk_mptcp_info subflows 2 subflows 2 + + pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add ID 0" 3 + chk_mptcp_info subflows 3 subflows 3 + + pm_nl_del_endpoint $ns1 99 10.0.1.1 + sleep 0.5 + chk_subflow_nr "after re-delete ID 0" 2 + chk_mptcp_info subflows 2 subflows 2 + + pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-re-add ID 0" 3 + chk_mptcp_info subflows 3 subflows 3 + mptcp_lib_kill_wait $tests_pid + + kill_events_pids + chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 3 + + chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 6 + chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 4 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 3 + + join_connect_err=1 \ + chk_join_nr 5 5 5 + chk_add_nr 6 6 + chk_rm_nr 4 3 invert + fi + + # flush and re-add + if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + { test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null + local tests_pid=$! + + wait_attempt_fail $ns2 + chk_subflow_nr "before flush" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_flush_endpoint $ns2 + pm_nl_flush_endpoint $ns1 + wait_rm_addr $ns2 0 + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal + wait_mpj $ns2 + mptcp_lib_kill_wait $tests_pid + + join_syn_tx=3 join_connect_err=1 \ + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_rm_nr 1 0 invert fi } @@ -3702,7 +4020,7 @@ while getopts "${all_tests_args}cCih" opt; do checksum=true ;; i) - ip_mptcp=1 + mptcp_lib_set_ip_mptcp ;; h) usage @@ -3727,9 +4045,11 @@ if [ ${#tests[@]} -eq 0 ]; then tests=("${all_tests_names[@]}") fi +mptcp_lib_subtests_last_ts_reset for subtests in "${tests[@]}"; do "${subtests}" done +append_prev_results if [ ${ret} -ne 0 ]; then echo @@ -3740,7 +4060,6 @@ if [ ${ret} -ne 0 ]; then echo fi -append_prev_results mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index d529b4b37af8..09cd24b2ae46 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -1,6 +1,8 @@ #! /bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/../lib.sh" + readonly KSFT_PASS=0 readonly KSFT_FAIL=1 readonly KSFT_SKIP=4 @@ -9,10 +11,14 @@ readonly KSFT_SKIP=4 readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" # These variables are used in some selftests, read-only +declare -rx MPTCP_LIB_EVENT_CREATED=1 # MPTCP_EVENT_CREATED +declare -rx MPTCP_LIB_EVENT_ESTABLISHED=2 # MPTCP_EVENT_ESTABLISHED +declare -rx MPTCP_LIB_EVENT_CLOSED=3 # MPTCP_EVENT_CLOSED declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED +declare -rx MPTCP_LIB_EVENT_SUB_PRIORITY=13 # MPTCP_EVENT_SUB_PRIORITY declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED @@ -21,8 +27,11 @@ declare -rx MPTCP_LIB_AF_INET6=10 MPTCP_LIB_SUBTESTS=() MPTCP_LIB_SUBTESTS_DUPLICATED=0 +MPTCP_LIB_SUBTEST_FLAKY=0 +MPTCP_LIB_SUBTESTS_LAST_TS_MS= MPTCP_LIB_TEST_COUNTER=0 MPTCP_LIB_TEST_FORMAT="%02u %-50s" +MPTCP_LIB_IP_MPTCP=0 # only if supported (or forced) and not disabled, see no-color.org if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } && @@ -40,6 +49,16 @@ else readonly MPTCP_LIB_COLOR_RESET= fi +# SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY env var can be set not to ignore errors +# from subtests marked as flaky +mptcp_lib_override_flaky() { + [ "${SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY:-}" = 1 ] +} + +mptcp_lib_subtest_is_flaky() { + [ "${MPTCP_LIB_SUBTEST_FLAKY}" = 1 ] && ! mptcp_lib_override_flaky +} + # $1: color, $2: text mptcp_lib_print_color() { echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}" @@ -71,13 +90,43 @@ mptcp_lib_pr_skip() { } mptcp_lib_pr_fail() { - mptcp_lib_print_err "[FAIL]${1:+ ${*}}" + local title cmt + + if mptcp_lib_subtest_is_flaky; then + title="IGNO" + cmt=" (flaky)" + else + title="FAIL" + fi + + mptcp_lib_print_err "[${title}]${cmt}${1:+ ${*}}" } mptcp_lib_pr_info() { mptcp_lib_print_info "INFO: ${*}" } +# $1-2: listener/connector ns ; $3 port ; $4-5 listener/connector stat file +mptcp_lib_pr_err_stats() { + local lns="${1}" + local cns="${2}" + local port="${3}" + local lstat="${4}" + local cstat="${5}" + + echo -en "${MPTCP_LIB_COLOR_RED}" + { + printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}" + ip netns exec "${lns}" ss -Menitam -o "sport = :${port}" + cat "${lstat}" + + printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}" + ip netns exec "${cns}" ss -Menitam -o "dport = :${port}" + [ "${lstat}" != "${cstat}" ] && cat "${cstat}" + } 1>&2 + echo -en "${MPTCP_LIB_COLOR_RESET}" +} + # SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all # features using the last version of the kernel and the selftests to make sure # a test is not being skipped by mistake. @@ -177,6 +226,11 @@ mptcp_lib_kversion_ge() { mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}" } +mptcp_lib_subtests_last_ts_reset() { + MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)" +} +mptcp_lib_subtests_last_ts_reset + __mptcp_lib_result_check_duplicated() { local subtest @@ -191,13 +245,22 @@ __mptcp_lib_result_check_duplicated() { __mptcp_lib_result_add() { local result="${1}" + local time="time=" + local ts_prev_ms shift local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) __mptcp_lib_result_check_duplicated "${*}" - MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}") + # not to add two '#' + [[ "${*}" != *"#"* ]] && time="# ${time}" + + ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}" + mptcp_lib_subtests_last_ts_reset + time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms" + + MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}") } # $1: test name @@ -207,7 +270,13 @@ mptcp_lib_result_pass() { # $1: test name mptcp_lib_result_fail() { - __mptcp_lib_result_add "not ok" "${1}" + if mptcp_lib_subtest_is_flaky; then + # It might sound better to use 'not ok # TODO' or 'ok # SKIP', + # but some CIs don't understand 'TODO' and treat SKIP as errors. + __mptcp_lib_result_add "ok" "${1} # IGNORE Flaky" + else + __mptcp_lib_result_add "not ok" "${1}" + fi } # $1: test name @@ -261,12 +330,15 @@ mptcp_lib_result_print_all_tap() { # get the value of keyword $1 in the line marked by keyword $2 mptcp_lib_get_info_value() { - grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' + grep "${2}" 2>/dev/null | + sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' + # the ';q' at the end limits to the first matched entry. } # $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]] mptcp_lib_evts_get_info() { - grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1}," + grep "${4:-}" "${2}" 2>/dev/null | + mptcp_lib_get_info_value "${1}" "^type:${3:-1}," } # $1: PID @@ -334,20 +406,7 @@ mptcp_lib_check_transfer() { # $1: ns, $2: port mptcp_lib_wait_local_port_listen() { - local listener_ns="${1}" - local port="${2}" - - local port_hex - port_hex="$(printf "%04X" "${port}")" - - local _ - for _ in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \ - {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done + wait_local_port_listen "${@}" "tcp" } mptcp_lib_check_output() { @@ -384,6 +443,12 @@ mptcp_lib_check_tools() { exit ${KSFT_SKIP} fi ;; + "tc") + if ! tc -help &> /dev/null; then + mptcp_lib_pr_skip "Could not run test without tc tool" + exit ${KSFT_SKIP} + fi + ;; "ss") if ! ss -h | grep -q MPTCP; then mptcp_lib_pr_skip "ss tool does not support MPTCP" @@ -405,27 +470,22 @@ mptcp_lib_check_tools() { } mptcp_lib_ns_init() { - local sec rndh - - sec=$(date +%s) - rndh=$(printf %x "${sec}")-$(mktemp -u XXXXXX) + if ! setup_ns "${@}"; then + mptcp_lib_pr_fail "Failed to setup namespaces ${*}" + exit ${KSFT_FAIL} + fi local netns for netns in "${@}"; do - eval "${netns}=${netns}-${rndh}" - - ip netns add "${!netns}" || exit ${KSFT_SKIP} - ip -net "${!netns}" link set lo up ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1 - ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0 done } mptcp_lib_ns_exit() { + cleanup_ns "${@}" + local netns for netns in "${@}"; do - ip netns del "${netns}" rm -f /tmp/"${netns}".{nstat,out} done } @@ -505,3 +565,131 @@ mptcp_lib_verify_listener_events() { mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}" return "${rc}" } + +mptcp_lib_set_ip_mptcp() { + MPTCP_LIB_IP_MPTCP=1 +} + +mptcp_lib_is_ip_mptcp() { + [ "${MPTCP_LIB_IP_MPTCP}" = "1" ] +} + +# format: <id>,<ip>,<flags>,<dev> +mptcp_lib_pm_nl_format_endpoints() { + local entry id ip flags dev port + + for entry in "${@}"; do + IFS=, read -r id ip flags dev port <<< "${entry}" + if mptcp_lib_is_ip_mptcp; then + echo -n "${ip}" + [ -n "${port}" ] && echo -n " port ${port}" + echo -n " id ${id}" + [ -n "${flags}" ] && echo -n " ${flags}" + [ -n "${dev}" ] && echo -n " dev ${dev}" + echo " " # always a space at the end + else + echo -n "id ${id}" + echo -n " flags ${flags//" "/","}" + [ -n "${dev}" ] && echo -n " dev ${dev}" + echo -n " ${ip}" + [ -n "${port}" ] && echo -n " ${port}" + echo "" + fi + done +} + +mptcp_lib_pm_nl_get_endpoint() { + local ns=${1} + local id=${2} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp endpoint show id "${id}" + else + ip netns exec "${ns}" ./pm_nl_ctl get "${id}" + fi +} + +mptcp_lib_pm_nl_set_limits() { + local ns=${1} + local addrs=${2} + local subflows=${3} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp limits set add_addr_accepted "${addrs}" subflows "${subflows}" + else + ip netns exec "${ns}" ./pm_nl_ctl limits "${addrs}" "${subflows}" + fi +} + +mptcp_lib_pm_nl_add_endpoint() { + local ns=${1} + local addr=${2} + local flags dev id port + local nr=2 + + local p + for p in "${@}"; do + case "${p}" in + "flags" | "dev" | "id" | "port") + eval "${p}"=\$"${nr}" + ;; + esac + + nr=$((nr + 1)) + done + + if mptcp_lib_is_ip_mptcp; then + # shellcheck disable=SC2086 # blanks in flags, no double quote + ip -n "${ns}" mptcp endpoint add "${addr}" ${flags//","/" "} \ + ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"} + else + ip netns exec "${ns}" ./pm_nl_ctl add "${addr}" ${flags:+flags "${flags}"} \ + ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"} + fi +} + +mptcp_lib_pm_nl_del_endpoint() { + local ns=${1} + local id=${2} + local addr=${3} + + if mptcp_lib_is_ip_mptcp; then + [ "${id}" -ne 0 ] && addr='' + ip -n "${ns}" mptcp endpoint delete id "${id}" ${addr:+"${addr}"} + else + ip netns exec "${ns}" ./pm_nl_ctl del "${id}" "${addr}" + fi +} + +mptcp_lib_pm_nl_flush_endpoint() { + local ns=${1} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp endpoint flush + else + ip netns exec "${ns}" ./pm_nl_ctl flush + fi +} + +mptcp_lib_pm_nl_show_endpoints() { + local ns=${1} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp endpoint show + else + ip netns exec "${ns}" ./pm_nl_ctl dump + fi +} + +mptcp_lib_pm_nl_change_endpoint() { + local ns=${1} + local id=${2} + local flags=${3} + + if mptcp_lib_is_ip_mptcp; then + # shellcheck disable=SC2086 # blanks in flags, no double quote + ip -n "${ns}" mptcp endpoint change id "${id}" ${flags//","/" "} + else + ip netns exec "${ns}" ./pm_nl_ctl set id "${id}" flags "${flags}" + fi +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 926b0be87c99..9934a68df237 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -159,13 +159,21 @@ static const char *getxinfo_strerr(int err) } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { +again: int err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -178,7 +186,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -223,7 +231,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index e2d70c18786e..418a903c3a4d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -22,6 +22,28 @@ ns1="" ns2="" ns_sbox="" +usage() { + echo "Usage: $0 [ -i ] [ -h ]" + echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" + echo -e "\t-h: help" +} + +while getopts "hi" option;do + case "$option" in + "h") + usage "$0" + exit ${KSFT_PASS} + ;; + "i") + mptcp_lib_set_ip_mptcp + ;; + "?") + usage "$0" + exit ${KSFT_FAIL} + ;; + esac +done + add_mark_rules() { local ns=$1 @@ -58,15 +80,15 @@ init() # let $ns2 reach any $ns1 address from any interface ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i - ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal + mptcp_lib_pm_nl_add_endpoint "${ns1}" "10.0.${i}.1" flags signal + mptcp_lib_pm_nl_add_endpoint "${ns1}" "dead:beef:${i}::1" flags signal - ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal + mptcp_lib_pm_nl_add_endpoint "${ns2}" "10.0.${i}.2" flags signal + mptcp_lib_pm_nl_add_endpoint "${ns2}" "dead:beef:${i}::2" flags signal done - ip netns exec $ns1 ./pm_nl_ctl limits 8 8 - ip netns exec $ns2 ./pm_nl_ctl limits 8 8 + mptcp_lib_pm_nl_set_limits "${ns1}" 8 8 + mptcp_lib_pm_nl_set_limits "${ns2}" 8 8 add_mark_rules $ns1 1 add_mark_rules $ns2 2 @@ -147,6 +169,11 @@ do_transfer() cmsg+=",TCPINQ" fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat -n + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat -n + timeout ${timeout_test} \ ip netns exec ${listener_ns} \ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ @@ -167,14 +194,16 @@ do_transfer() wait $spid local rets=$? + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat | grep Tcp > /tmp/${listener_ns}.out + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat | grep Tcp > /tmp/${connector_ns}.out + print_title "Transfer ${ip:2}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" - - echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ + "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" mptcp_lib_result_fail "transfer ${ip}" @@ -327,6 +356,7 @@ init make_file "$cin" "client" 1 make_file "$sin" "server" 1 trap cleanup EXIT +mptcp_lib_subtests_last_ts_reset run_tests $ns1 $ns2 10.0.1.1 run_tests $ns1 $ns2 dead:beef:1::1 diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 6ab8c5d36340..2e6648a2b2c0 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -1,28 +1,28 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Double quotes to prevent globbing and word splitting is recommended in new -# code but we accept it, especially because there were too many before having -# address all other issues detected by shellcheck. -#shellcheck disable=SC2086 - . "$(dirname "${0}")/mptcp_lib.sh" ret=0 usage() { - echo "Usage: $0 [ -h ]" + echo "Usage: $0 [ -i ] [ -h ]" + echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" + echo -e "\t-h: help" } -optstring=h +optstring=hi while getopts "$optstring" option;do case "$option" in "h") - usage $0 + usage "$0" exit ${KSFT_PASS} ;; + "i") + mptcp_lib_set_ip_mptcp + ;; "?") - usage $0 + usage "$0" exit ${KSFT_FAIL} ;; esac @@ -35,7 +35,7 @@ err=$(mktemp) #shellcheck disable=SC2317 cleanup() { - rm -f $err + rm -f "${err}" mptcp_lib_ns_exit "${ns1}" } @@ -46,6 +46,76 @@ trap cleanup EXIT mptcp_lib_ns_init ns1 +format_limits() { + local accept="${1}" + local subflows="${2}" + + if mptcp_lib_is_ip_mptcp; then + # with a space at the end + printf "add_addr_accepted %d subflows %d \n" "${accept}" "${subflows}" + else + printf "accept %d\nsubflows %d\n" "${accept}" "${subflows}" + fi +} + +get_limits() { + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns1}" mptcp limits + else + ip netns exec "${ns1}" ./pm_nl_ctl limits + fi +} + +format_endpoints() { + mptcp_lib_pm_nl_format_endpoints "${@}" +} + +get_endpoint() { + # shellcheck disable=SC2317 # invoked indirectly + mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}" +} + +change_address() { + local addr=${1} + local flags=${2} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns1}" mptcp endpoint change "${addr}" "${flags}" + else + ip netns exec "${ns1}" ./pm_nl_ctl set "${addr}" flags "${flags}" + fi +} + +set_limits() +{ + mptcp_lib_pm_nl_set_limits "${ns1}" "${@}" +} + +add_endpoint() +{ + mptcp_lib_pm_nl_add_endpoint "${ns1}" "${@}" +} + +del_endpoint() +{ + mptcp_lib_pm_nl_del_endpoint "${ns1}" "${@}" +} + +flush_endpoint() +{ + mptcp_lib_pm_nl_flush_endpoint "${ns1}" +} + +show_endpoints() +{ + mptcp_lib_pm_nl_show_endpoints "${ns1}" +} + +change_endpoint() +{ + mptcp_lib_pm_nl_change_endpoint "${ns1}" "${@}" +} + check() { local cmd="$1" @@ -67,125 +137,128 @@ check() fi } -check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list" +mptcp_lib_subtests_last_ts_reset + +check "show_endpoints" "" "defaults addr list" -default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)" +default_limits="$(get_limits)" if mptcp_lib_expect_all_features; then - check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 2" "defaults limits" + check "get_limits" "$(format_limits 0 2)" "defaults limits" fi -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup -check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags 10.0.1.1" "simple add/get addr" +add_endpoint 10.0.1.1 +add_endpoint 10.0.1.2 flags subflow dev lo +add_endpoint 10.0.1.3 flags signal,backup +check "get_endpoint 1" "$(format_endpoints "1,10.0.1.1")" "simple add/get addr" -check "ip netns exec $ns1 ./pm_nl_ctl dump" \ -"id 1 flags 10.0.1.1 -id 2 flags subflow dev lo 10.0.1.2 -id 3 flags signal,backup 10.0.1.3" "dump addrs" +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "2,10.0.1.2,subflow,lo" \ + "3,10.0.1.3,signal backup")" "dump addrs" -ip netns exec $ns1 ./pm_nl_ctl del 2 -check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr" -check "ip netns exec $ns1 ./pm_nl_ctl dump" \ -"id 1 flags 10.0.1.1 -id 3 flags signal,backup 10.0.1.3" "dump addrs after del" +del_endpoint 2 +check "get_endpoint 2" "" "simple del addr" +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "3,10.0.1.3,signal backup")" "dump addrs after del" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 2>/dev/null -check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr" +add_endpoint 10.0.1.3 2>/dev/null +check "get_endpoint 4" "" "duplicate addr" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal -check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment" +add_endpoint 10.0.1.4 flags signal +check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment" for i in $(seq 5 9); do - ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1 + add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1 done -check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit" -check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit" +check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit" +check "get_endpoint 10" "" "above hard addr limit" -ip netns exec $ns1 ./pm_nl_ctl del 9 +del_endpoint 9 for i in $(seq 10 255); do - ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i - ip netns exec $ns1 ./pm_nl_ctl del $i + add_endpoint 10.0.0.9 id "${i}" + del_endpoint "${i}" done -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 -id 3 flags signal,backup 10.0.1.3 -id 4 flags signal 10.0.1.4 -id 5 flags signal 10.0.1.5 -id 6 flags signal 10.0.1.6 -id 7 flags signal 10.0.1.7 -id 8 flags signal 10.0.1.8" "id limit" - -ip netns exec $ns1 ./pm_nl_ctl flush -check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs" - -ip netns exec $ns1 ./pm_nl_ctl limits 9 1 2>/dev/null -check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit" - -ip netns exec $ns1 ./pm_nl_ctl limits 1 9 2>/dev/null -check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit" - -ip netns exec $ns1 ./pm_nl_ctl limits 8 8 -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 -subflows 8" "set limits" - -ip netns exec $ns1 ./pm_nl_ctl flush -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 id 100 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.5 id 254 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.6 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.7 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.8 -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 -id 2 flags 10.0.1.2 -id 3 flags 10.0.1.7 -id 4 flags 10.0.1.8 -id 100 flags 10.0.1.3 -id 101 flags 10.0.1.4 -id 254 flags 10.0.1.5 -id 255 flags 10.0.1.6" "set ids" - -ip netns exec $ns1 ./pm_nl_ctl flush -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.2 id 254 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.4 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.5 id 253 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.6 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.7 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.8 -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.0.1 -id 2 flags 10.0.0.4 -id 3 flags 10.0.0.6 -id 4 flags 10.0.0.7 -id 5 flags 10.0.0.8 -id 253 flags 10.0.0.5 -id 254 flags 10.0.0.2 -id 255 flags 10.0.0.3" "wrap-around ids" - -ip netns exec $ns1 ./pm_nl_ctl flush -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow -ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow,backup 10.0.1.1" "set flags (backup)" -ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow 10.0.1.1" " (nobackup)" +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "3,10.0.1.3,signal backup" \ + "4,10.0.1.4,signal" \ + "5,10.0.1.5,signal" \ + "6,10.0.1.6,signal" \ + "7,10.0.1.7,signal" \ + "8,10.0.1.8,signal")" "id limit" + +flush_endpoint +check "show_endpoints" "" "flush addrs" + +set_limits 9 1 2>/dev/null +check "get_limits" "${default_limits}" "rcv addrs above hard limit" + +set_limits 1 9 2>/dev/null +check "get_limits" "${default_limits}" "subflows above hard limit" + +set_limits 8 8 +check "get_limits" "$(format_limits 8 8)" "set limits" + +flush_endpoint +add_endpoint 10.0.1.1 +add_endpoint 10.0.1.2 +add_endpoint 10.0.1.3 id 100 +add_endpoint 10.0.1.4 +add_endpoint 10.0.1.5 id 254 +add_endpoint 10.0.1.6 +add_endpoint 10.0.1.7 +add_endpoint 10.0.1.8 +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "2,10.0.1.2" \ + "3,10.0.1.7" \ + "4,10.0.1.8" \ + "100,10.0.1.3" \ + "101,10.0.1.4" \ + "254,10.0.1.5" \ + "255,10.0.1.6")" "set ids" + +flush_endpoint +add_endpoint 10.0.0.1 +add_endpoint 10.0.0.2 id 254 +add_endpoint 10.0.0.3 +add_endpoint 10.0.0.4 +add_endpoint 10.0.0.5 id 253 +add_endpoint 10.0.0.6 +add_endpoint 10.0.0.7 +add_endpoint 10.0.0.8 +check "show_endpoints" \ + "$(format_endpoints "1,10.0.0.1" \ + "2,10.0.0.4" \ + "3,10.0.0.6" \ + "4,10.0.0.7" \ + "5,10.0.0.8" \ + "253,10.0.0.5" \ + "254,10.0.0.2" \ + "255,10.0.0.3")" "wrap-around ids" + +flush_endpoint +add_endpoint 10.0.1.1 flags subflow +change_address 10.0.1.1 backup +check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup")" \ + "set flags (backup)" +change_address 10.0.1.1 nobackup +check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \ + " (nobackup)" # fullmesh support has been added later -ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null -if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" || +change_endpoint 1 fullmesh 2>/dev/null +if show_endpoints | grep -q "fullmesh" || mptcp_lib_expect_all_features; then - check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow,fullmesh 10.0.1.1" " (fullmesh)" - ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh - check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow 10.0.1.1" " (nofullmesh)" - ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh - check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" + check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow fullmesh")" \ + " (fullmesh)" + change_endpoint 1 nofullmesh + check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \ + " (nofullmesh)" + change_endpoint 1 backup,fullmesh + check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup fullmesh")" \ + " (backup,fullmesh)" else for st in fullmesh nofullmesh backup,fullmesh; do st=" (${st})" diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 7426a2cbd4a0..994a556f46c1 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -19,12 +19,6 @@ #include "linux/mptcp.h" -#ifndef MPTCP_PM_NAME -#define MPTCP_PM_NAME "mptcp_pm" -#endif -#ifndef MPTCP_PM_EVENTS -#define MPTCP_PM_EVENTS "mptcp_pm_events" -#endif #ifndef IPPROTO_MPTCP #define IPPROTO_MPTCP 262 #endif @@ -116,7 +110,7 @@ static int capture_events(int fd, int event_group) if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &event_group, sizeof(event_group)) < 0) - error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group"); + error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group"); do { FD_ZERO(&rfds); @@ -288,7 +282,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family, if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID) *events_mcast_grp = *(__u32 *)RTA_DATA(grp); else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME && - !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS)) + !strcmp(RTA_DATA(grp), MPTCP_PM_EV_GRP_NAME)) got_events_grp = 1; grp = RTA_NEXT(grp, grp_len); @@ -1276,7 +1270,7 @@ int add_listener(int argc, char *argv[]) struct sockaddr_storage addr; struct sockaddr_in6 *a6; struct sockaddr_in *a4; - u_int16_t family; + u_int16_t family = AF_UNSPEC; int enable = 1; int sock; int err; diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 1b2366220388..2329c2f8519b 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -27,10 +27,11 @@ capout="" size=0 usage() { - echo "Usage: $0 [ -b ] [ -c ] [ -d ]" - echo -e "\t-b: bail out after first error, otherwise runs al testcases" + echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]" + echo -e "\t-b: bail out after first error, otherwise runs all testcases" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" echo -e "\t-d: debug this script" + echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" } # This function is used in the cleanup trap @@ -45,7 +46,7 @@ cleanup() } mptcp_lib_check_mptcp -mptcp_lib_check_tools ip +mptcp_lib_check_tools ip tc # "$ns1" ns2 ns3 # ns1eth1 ns2eth1 ns2eth3 ns3eth1 @@ -85,8 +86,8 @@ setup() ip -net "$ns1" route add default via 10.0.2.2 metric 101 ip -net "$ns1" route add default via dead:beef:2::2 metric 101 - ip netns exec "$ns1" ./pm_nl_ctl limits 1 1 - ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow + mptcp_lib_pm_nl_set_limits "${ns1}" 1 1 + mptcp_lib_pm_nl_add_endpoint "${ns1}" 10.0.2.1 dev ns1eth2 flags subflow ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad @@ -108,7 +109,7 @@ setup() ip -net "$ns3" route add default via 10.0.3.2 ip -net "$ns3" route add default via dead:beef:3::2 - ip netns exec "$ns3" ./pm_nl_ctl limits 1 1 + mptcp_lib_pm_nl_set_limits "${ns3}" 1 1 # debug build can slow down measurably the test program # we use quite tight time limit on the run-time, to ensure @@ -154,6 +155,11 @@ do_transfer() sleep 1 fi + NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \ + nstat -n + NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \ + nstat -n + timeout ${timeout_test} \ ip netns exec ${ns3} \ ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ @@ -179,25 +185,27 @@ do_transfer() kill ${cappid_connector} fi + NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \ + nstat | grep Tcp > /tmp/${ns3}.out + NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \ + nstat | grep Tcp > /tmp/${ns1}.out + cmp $sin $cout > /dev/null 2>&1 local cmps=$? cmp $cin $sout > /dev/null 2>&1 local cmpc=$? - printf "%-16s" " max $max_time " if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then + printf "%-16s" " max $max_time " mptcp_lib_pr_ok cat "$capout" return 0 fi - mptcp_lib_pr_fail - echo "client exit code $retc, server $rets" 1>&2 - echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2 - ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port" - echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2 - ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port" + mptcp_lib_pr_fail "client exit code $retc, server $rets" + mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" \ + "/tmp/${ns3}.out" "/tmp/${ns1}.out" ls -l $sin $cout ls -l $cin $sout @@ -216,8 +224,8 @@ run_test() shift 4 local msg=$* - [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1="" - [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2="" + [ $delay1 -gt 0 ] && delay1="delay ${delay1}ms" || delay1="" + [ $delay2 -gt 0 ] && delay2="delay ${delay2}ms" || delay2="" for dev in ns1eth1 ns1eth2; do tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1 @@ -243,7 +251,7 @@ run_test() do_transfer $small $large $time lret=$? mptcp_lib_result_code "${lret}" "${msg}" - if [ $lret -ne 0 ]; then + if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then ret=$lret [ $bail -eq 0 ] || exit $ret fi @@ -253,13 +261,13 @@ run_test() do_transfer $large $small $time lret=$? mptcp_lib_result_code "${lret}" "${msg}" - if [ $lret -ne 0 ]; then + if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then ret=$lret [ $bail -eq 0 ] || exit $ret fi } -while getopts "bcdh" option;do +while getopts "bcdhi" option;do case "$option" in "h") usage $0 @@ -274,6 +282,9 @@ while getopts "bcdh" option;do "d") set -x ;; + "i") + mptcp_lib_set_ip_mptcp + ;; "?") usage $0 exit ${KSFT_FAIL} @@ -282,11 +293,12 @@ while getopts "bcdh" option;do done setup +mptcp_lib_subtests_last_ts_reset run_test 10 10 0 0 "balanced bwidth" run_test 10 10 1 25 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -run_test 10 3 0 0 "unbalanced bwidth" +MPTCP_LIB_SUBTEST_FLAKY=1 run_test 10 3 0 0 "unbalanced bwidth" run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 9e2981f2d7f5..333064b0b5ac 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -117,7 +117,36 @@ cleanup() trap cleanup EXIT # Create and configure network namespaces for testing +print_title "Init" mptcp_lib_ns_init ns1 ns2 + +# check path_manager and pm_type sysctl mapping +if [ -f /proc/sys/net/mptcp/path_manager ]; then + ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=userspace + pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)" + if [ "${pm_type}" != "1" ]; then + test_fail "unexpected pm_type: ${pm_type}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi + + ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=error 2>/dev/null + pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)" + if [ "${pm_type}" != "1" ]; then + test_fail "unexpected pm_type after error: ${pm_type}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi + + ip netns exec "$ns1" sysctl -q net.mptcp.pm_type=0 + pm_name="$(ip netns exec "$ns1" sysctl -n net.mptcp.path_manager)" + if [ "${pm_name}" != "kernel" ]; then + test_fail "unexpected path-manager: ${pm_name}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi +fi + for i in "$ns1" "$ns2" ;do ip netns exec "$i" sysctl -q net.mptcp.pm_type=1 done @@ -150,8 +179,8 @@ mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid server_evts=$(mktemp) mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid sleep 0.5 +mptcp_lib_subtests_last_ts_reset -print_title "Init" print_test "Created network namespaces ns1, ns2" test_pass @@ -160,10 +189,12 @@ make_connection() local is_v6=$1 local app_port=$app4_port local connect_addr="10.0.1.1" + local client_addr="10.0.1.2" local listen_addr="0.0.0.0" if [ "$is_v6" = "v6" ] then connect_addr="dead:beef:1::1" + client_addr="dead:beef:1::2" listen_addr="::" app_port=$app6_port else @@ -206,6 +237,7 @@ make_connection() [ "$server_serverside" = 1 ] then test_pass + print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}" else test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" mptcp_lib_result_print_all_tap @@ -297,7 +329,7 @@ test_announce() ip netns exec "$ns2"\ ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\ ns2eth1 - print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port" + print_test "ADD_ADDR id:client 10.0.2.2 (ns2) => ns1, reuse port" sleep 0.5 verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \ "$client4_port" @@ -306,7 +338,7 @@ test_announce() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann\ dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 - print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port" + print_test "ADD_ADDR6 id:client dead:beef:2::2 (ns2) => ns1, reuse port" sleep 0.5 verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ "$client_addr_id" "$client6_port" "v6" @@ -316,7 +348,7 @@ test_announce() client_addr_id=$((client_addr_id+1)) ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ $client_addr_id dev ns2eth1 port $new4_port - print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port" + print_test "ADD_ADDR id:client+1 10.0.2.2 (ns2) => ns1, new port" sleep 0.5 verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ "$client_addr_id" "$new4_port" @@ -327,7 +359,7 @@ test_announce() # ADD_ADDR from the server to client machine reusing the subflow port ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR id:server 10.0.2.1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$app4_port" @@ -336,7 +368,7 @@ test_announce() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR6 id:server dead:beef:2::1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ "$server_addr_id" "$app6_port" "v6" @@ -346,7 +378,7 @@ test_announce() server_addr_id=$((server_addr_id+1)) ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 port $new4_port - print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port" + print_test "ADD_ADDR id:server+1 10.0.2.1 (ns1) => ns2, new port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$new4_port" @@ -380,7 +412,7 @@ test_remove() local invalid_token=$(( client4_token - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\ $client_addr_id > /dev/null 2>&1 - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token" + print_test "RM_ADDR id:client ns2 => ns1, invalid token" local type type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] @@ -394,7 +426,7 @@ test_remove() local invalid_id=$(( client_addr_id + 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $invalid_id > /dev/null 2>&1 - print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id" + print_test "RM_ADDR id:client+1 ns2 => ns1, invalid id" type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] then @@ -407,7 +439,7 @@ test_remove() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR id:client ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" @@ -416,7 +448,7 @@ test_remove() client_addr_id=$(( client_addr_id - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR id:client-1 ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" @@ -424,7 +456,7 @@ test_remove() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\ $client_addr_id - print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR6 id:client-1 ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id" @@ -434,7 +466,7 @@ test_remove() # RM_ADDR from the server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id - print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR id:server ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" @@ -443,7 +475,7 @@ test_remove() server_addr_id=$(( server_addr_id - 1 )) ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id - print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR id:server-1 ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" @@ -451,7 +483,7 @@ test_remove() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\ $server_addr_id - print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR6 id:server-1 ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id" } @@ -479,8 +511,14 @@ verify_subflow_events() local locid local remid local info + local e_dport_txt + + # only display the fixed ports + if [ "${e_dport}" -ge "${app4_port}" ] && [ "${e_dport}" -le "${app6_port}" ]; then + e_dport_txt=":${e_dport}" + fi - info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})" + info="${e_saddr} (${e_from}) => ${e_daddr}${e_dport_txt} (${e_to})" if [ "$e_type" = "$SUB_ESTABLISHED" ] then @@ -766,7 +804,7 @@ test_subflows_v4_v6_mix() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR4 id:server 10.0.2.1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ "$server_addr_id" "$app6_port" @@ -861,7 +899,7 @@ test_listener() local listener_pid=$! sleep 0.5 - print_test "CREATE_LISTENER 10.0.2.2:$client4_port" + print_test "CREATE_LISTENER 10.0.2.2 (client port)" verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port # ADD_ADDR from client to server machine reusing the subflow port @@ -878,13 +916,14 @@ test_listener() mptcp_lib_kill_wait $listener_pid sleep 0.5 - print_test "CLOSE_LISTENER 10.0.2.2:$client4_port" + print_test "CLOSE_LISTENER 10.0.2.2 (client port)" verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port } print_title "Make connections" make_connection make_connection "v6" +print_title "Will be using address IDs ${client_addr_id} (client) and ${server_addr_id} (server)" test_announce test_remove diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index bdc03a2097e8..7ea5fb28c93d 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -85,6 +85,7 @@ static bool cfg_rx; static int cfg_runtime_ms = 4200; static int cfg_verbose; static int cfg_waittime_ms = 500; +static int cfg_notification_limit = 32; static bool cfg_zerocopy; static socklen_t cfg_alen; @@ -95,6 +96,7 @@ static char payload[IP_MAXPACKET]; static long packets, bytes, completions, expected_completions; static int zerocopied = -1; static uint32_t next_completion; +static uint32_t sends_since_notify; static unsigned long gettimeofday_ms(void) { @@ -208,6 +210,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain) error(1, errno, "send"); if (cfg_verbose && ret != len) fprintf(stderr, "send: ret=%u != %u\n", ret, len); + sends_since_notify++; if (len) { packets++; @@ -435,7 +438,7 @@ static bool do_recv_completion(int fd, int domain) /* Detect notification gaps. These should not happen often, if at all. * Gaps can occur due to drops, reordering and retransmissions. */ - if (lo != next_completion) + if (cfg_verbose && lo != next_completion) fprintf(stderr, "gap: %u..%u does not append to %u\n", lo, hi, next_completion); next_completion = hi + 1; @@ -460,6 +463,7 @@ static bool do_recv_completion(int fd, int domain) static void do_recv_completions(int fd, int domain) { while (do_recv_completion(fd, domain)) {} + sends_since_notify = 0; } /* Wait for all remaining completions on the errqueue */ @@ -549,6 +553,9 @@ static void do_tx(int domain, int type, int protocol) else do_sendmsg(fd, &msg, cfg_zerocopy, domain); + if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit) + do_recv_completions(fd, domain); + while (!do_poll(fd, POLLOUT)) { if (cfg_zerocopy) do_recv_completions(fd, domain); @@ -708,7 +715,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = max_payload_len; - while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) { + while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -736,6 +743,9 @@ static void parse_opts(int argc, char **argv) if (cfg_ifindex == 0) error(1, errno, "invalid iface: %s", optarg); break; + case 'l': + cfg_notification_limit = strtoul(optarg, NULL, 0); + break; case 'm': cfg_cork_mixed = true; break; diff --git a/tools/testing/selftests/net/nat6to4.c b/tools/testing/selftests/net/nat6to4.bpf.c index ac54c36b25fc..ac54c36b25fc 100644 --- a/tools/testing/selftests/net/nat6to4.c +++ b/tools/testing/selftests/net/nat6to4.bpf.c diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh deleted file mode 100644 index 6596fe03c77f..000000000000 --- a/tools/testing/selftests/net/net_helper.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Helper functions - -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - local protocol="${3}" - local pattern - local i - - pattern=":$(printf "%04X" "${port}") " - - # for tcp protocol additionally check the socket state - [ ${protocol} = "tcp" ] && pattern="${pattern}0A" - for i in $(seq 10); do - if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \ - /proc/net/"${protocol}"* | grep -q "${pattern}"; then - break - fi - sleep 0.1 - done -} diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh index e3afcb424710..438f7b2acc5f 100755 --- a/tools/testing/selftests/net/netdevice.sh +++ b/tools/testing/selftests/net/netdevice.sh @@ -67,8 +67,12 @@ kci_net_setup() return $ksft_skip fi - # TODO what ipaddr to set ? DHCP ? - echo "SKIP: $netdev: set IP address" + if [ "$veth_created" ]; then + echo "XFAIL: $netdev: set IP address unsupported for veth*" + else + # TODO what ipaddr to set ? DHCP ? + echo "SKIP: $netdev: set IP address" + fi return $ksft_skip } @@ -86,7 +90,7 @@ kci_netdev_ethtool_test() ret=$? if [ $ret -ne 0 ];then if [ $ret -eq "$1" ];then - echo "SKIP: $netdev: ethtool $2 not supported" + echo "XFAIL: $netdev: ethtool $2 not supported" return $ksft_skip else echo "FAIL: $netdev: ethtool $2" @@ -124,11 +128,45 @@ kci_netdev_ethtool() return 1 fi echo "PASS: $netdev: ethtool list features" - #TODO for each non fixed features, try to turn them on/off + + while read -r FEATURE VALUE FIXED; do + [ "$FEATURE" != "Features" ] || continue # Skip "Features" + [ "$FIXED" != "[fixed]" ] || continue # Skip fixed features + feature="${FEATURE%:*}" + + ethtool --offload "$netdev" "$feature" off + if [ $? -eq 0 ]; then + echo "PASS: $netdev: Turned off feature: $feature" + else + echo "FAIL: $netdev: Failed to turn off feature:" \ + "$feature" + fi + + ethtool --offload "$netdev" "$feature" on + if [ $? -eq 0 ]; then + echo "PASS: $netdev: Turned on feature: $feature" + else + echo "FAIL: $netdev: Failed to turn on feature:" \ + "$feature" + fi + + #restore the feature to its initial state + ethtool --offload "$netdev" "$feature" "$VALUE" + if [ $? -eq 0 ]; then + echo "PASS: $netdev: Restore feature $feature" \ + "to initial state $VALUE" + else + echo "FAIL: $netdev: Failed to restore feature" \ + "$feature to initial state $VALUE" + fi + + done < "$TMP_ETHTOOL_FEATURES" + rm "$TMP_ETHTOOL_FEATURES" kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev" kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev" + return 0 } @@ -196,10 +234,24 @@ if [ ! -e "$TMP_LIST_NETDEV" ];then fi ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\ -f2 | cut -d: -f1> "$TMP_LIST_NETDEV" + +if [ ! -s "$TMP_LIST_NETDEV" ]; then + echo "No valid network device found, creating veth pair" + ip link add veth0 type veth peer name veth1 + echo "veth0" > "$TMP_LIST_NETDEV" + veth_created=1 +fi + while read netdev do kci_test_netdev "$netdev" done < "$TMP_LIST_NETDEV" +#clean up veth interface pair if it was created +if [ "$veth_created" ]; then + ip link delete veth0 + echo "Removed veth pair" +fi + rm "$TMP_LIST_NETDEV" exit 0 diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore new file mode 100644 index 000000000000..64c4f8d9aa6c --- /dev/null +++ b/tools/testing/selftests/net/netfilter/.gitignore @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +audit_logread +connect_close +conntrack_dump_flush +conntrack_reverse_clash +sctp_collision +nf_queue diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile new file mode 100644 index 000000000000..e9b2f553588d --- /dev/null +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +HOSTPKG_CONFIG := pkg-config +MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) +MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) + +TEST_PROGS := br_netfilter.sh bridge_brouter.sh +TEST_PROGS += br_netfilter_queue.sh +TEST_PROGS += conntrack_dump_flush.sh +TEST_PROGS += conntrack_icmp_related.sh +TEST_PROGS += conntrack_ipip_mtu.sh +TEST_PROGS += conntrack_tcp_unreplied.sh +TEST_PROGS += conntrack_resize.sh +TEST_PROGS += conntrack_sctp_collision.sh +TEST_PROGS += conntrack_vrf.sh +TEST_PROGS += conntrack_reverse_clash.sh +TEST_PROGS += ipvs.sh +TEST_PROGS += nf_conntrack_packetdrill.sh +TEST_PROGS += nf_nat_edemux.sh +TEST_PROGS += nft_audit.sh +TEST_PROGS += nft_concat_range.sh +TEST_PROGS += nft_conntrack_helper.sh +TEST_PROGS += nft_fib.sh +TEST_PROGS += nft_flowtable.sh +TEST_PROGS += nft_interface_stress.sh +TEST_PROGS += nft_meta.sh +TEST_PROGS += nft_nat.sh +TEST_PROGS += nft_nat_zones.sh +TEST_PROGS += nft_queue.sh +TEST_PROGS += nft_synproxy.sh +TEST_PROGS += nft_tproxy_tcp.sh +TEST_PROGS += nft_tproxy_udp.sh +TEST_PROGS += nft_zones_many.sh +TEST_PROGS += rpath.sh +TEST_PROGS += vxlan_mtu_frag.sh +TEST_PROGS += xt_string.sh + +TEST_PROGS_EXTENDED = nft_concat_range_perf.sh + +TEST_GEN_FILES = audit_logread +TEST_GEN_FILES += connect_close nf_queue +TEST_GEN_FILES += conntrack_dump_flush +TEST_GEN_FILES += conntrack_reverse_clash +TEST_GEN_FILES += sctp_collision + +include ../../lib.mk + +$(OUTPUT)/nf_queue: CFLAGS += $(MNL_CFLAGS) +$(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS) + +$(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS) +$(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS) + +TEST_FILES := lib.sh +TEST_FILES += packetdrill + +TEST_INCLUDES := \ + ../lib.sh \ + $(wildcard ../lib/sh/*.sh) diff --git a/tools/testing/selftests/net/netfilter/audit_logread.c b/tools/testing/selftests/net/netfilter/audit_logread.c new file mode 100644 index 000000000000..a0a880fc2d9d --- /dev/null +++ b/tools/testing/selftests/net/netfilter/audit_logread.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <unistd.h> +#include <linux/audit.h> +#include <linux/netlink.h> + +static int fd; + +#define MAX_AUDIT_MESSAGE_LENGTH 8970 +struct audit_message { + struct nlmsghdr nlh; + union { + struct audit_status s; + char data[MAX_AUDIT_MESSAGE_LENGTH]; + } u; +}; + +int audit_recv(int fd, struct audit_message *rep) +{ + struct sockaddr_nl addr; + socklen_t addrlen = sizeof(addr); + int ret; + + do { + ret = recvfrom(fd, rep, sizeof(*rep), 0, + (struct sockaddr *)&addr, &addrlen); + } while (ret < 0 && errno == EINTR); + + if (ret < 0 || + addrlen != sizeof(addr) || + addr.nl_pid != 0 || + rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */ + return -1; + + return ret; +} + +int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val) +{ + static int seq = 0; + struct audit_message msg = { + .nlh = { + .nlmsg_len = NLMSG_SPACE(sizeof(msg.u.s)), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_seq = ++seq, + }, + .u.s = { + .mask = key, + .enabled = key == AUDIT_STATUS_ENABLED ? val : 0, + .pid = key == AUDIT_STATUS_PID ? val : 0, + } + }; + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + }; + int ret; + + do { + ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0, + (struct sockaddr *)&addr, sizeof(addr)); + } while (ret < 0 && errno == EINTR); + + if (ret != (int)msg.nlh.nlmsg_len) + return -1; + return 0; +} + +int audit_set(int fd, uint32_t key, uint32_t val) +{ + struct audit_message rep = { 0 }; + int ret; + + ret = audit_send(fd, AUDIT_SET, key, val); + if (ret) + return ret; + + ret = audit_recv(fd, &rep); + if (ret < 0) + return ret; + return 0; +} + +int readlog(int fd) +{ + struct audit_message rep = { 0 }; + int ret = audit_recv(fd, &rep); + const char *sep = ""; + char *k, *v; + + if (ret < 0) + return ret; + + if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG) + return 0; + + /* skip the initial "audit(...): " part */ + strtok(rep.u.data, " "); + + while ((k = strtok(NULL, "="))) { + v = strtok(NULL, " "); + + /* these vary and/or are uninteresting, ignore */ + if (!strcmp(k, "pid") || + !strcmp(k, "comm") || + !strcmp(k, "subj")) + continue; + + /* strip the varying sequence number */ + if (!strcmp(k, "table")) + *strchrnul(v, ':') = '\0'; + + printf("%s%s=%s", sep, k, v); + sep = " "; + } + if (*sep) { + printf("\n"); + fflush(stdout); + } + return 0; +} + +void cleanup(int sig) +{ + audit_set(fd, AUDIT_STATUS_ENABLED, 0); + close(fd); + if (sig) + exit(0); +} + +int main(int argc, char **argv) +{ + struct sigaction act = { + .sa_handler = cleanup, + }; + + fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT); + if (fd < 0) { + perror("Can't open netlink socket"); + return -1; + } + + if (sigaction(SIGTERM, &act, NULL) < 0 || + sigaction(SIGINT, &act, NULL) < 0) { + perror("Can't set signal handler"); + close(fd); + return -1; + } + + audit_set(fd, AUDIT_STATUS_ENABLED, 1); + audit_set(fd, AUDIT_STATUS_PID, getpid()); + + while (1) + readlog(fd); +} diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh new file mode 100755 index 000000000000..011de8763094 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -0,0 +1,175 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test for legacy br_netfilter module combined with connection tracking, +# a combination that doesn't really work. +# Multicast/broadcast packets race for hash table insertion. + +# eth0 br0 eth0 +# setup is: ns1 <->,ns0 <-> ns3 +# ns2 <-' `'-> ns4 + +source lib.sh + +checktool "nft --version" "run test without nft tool" + +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + +cleanup() { + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns ns0 ns1 ns2 ns3 ns4 + +ret=0 + +do_ping() +{ + fromns="$1" + dstip="$2" + + if ! ip netns exec "$fromns" ping -c 1 -q "$dstip" > /dev/null; then + echo "ERROR: ping from $fromns to $dstip" + ip netns exec "$ns0" nft list ruleset + ret=1 + fi +} + +bcast_ping() +{ + fromns="$1" + dstip="$2" + + local packets=500 + + [ "$KSFT_MACHINE_SLOW" = yes ] && packets=100 + + for i in $(seq 1 $packets); do + if ! ip netns exec "$fromns" ping -q -f -b -c 1 -q "$dstip" > /dev/null 2>&1; then + echo "ERROR: ping -b from $fromns to $dstip" + ip netns exec "$ns0" nft list ruleset + ret=1 + break + fi + done +} + +if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then + echo "SKIP: Can't create veth device" + exit $ksft_skip +fi + +ip link add veth2 netns "$ns0" type veth peer name eth0 netns "$ns2" +ip link add veth3 netns "$ns0" type veth peer name eth0 netns "$ns3" +ip link add veth4 netns "$ns0" type veth peer name eth0 netns "$ns4" + +for i in $(seq 1 4); do + ip -net "$ns0" link set "veth$i" up +done + +if ! ip -net "$ns0" link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1; then + echo "SKIP: Can't create bridge br0" + exit $ksft_skip +fi + +# make veth0,1,2 part of bridge. +for i in $(seq 1 3); do + ip -net "$ns0" link set "veth$i" master br0 +done + +# add a macvlan on top of the bridge. +MACVLAN_ADDR=ba:f3:13:37:42:23 +ip -net "$ns0" link add link br0 name macvlan0 type macvlan mode private +ip -net "$ns0" link set macvlan0 address ${MACVLAN_ADDR} +ip -net "$ns0" link set macvlan0 up +ip -net "$ns0" addr add 10.23.0.1/24 dev macvlan0 + +# add a macvlan on top of veth4. +MACVLAN_ADDR=ba:f3:13:37:42:24 +ip -net "$ns0" link add link veth4 name macvlan4 type macvlan mode passthru +ip -net "$ns0" link set macvlan4 address ${MACVLAN_ADDR} +ip -net "$ns0" link set macvlan4 up + +# make the macvlan part of the bridge. +# veth4 is not a bridge port, only the macvlan on top of it. +ip -net "$ns0" link set macvlan4 master br0 + +ip -net "$ns0" link set br0 up +ip -net "$ns0" addr add 10.0.0.1/24 dev br0 + +modprobe -q br_netfilter +if ! ip netns exec "$ns0" sysctl -q net.bridge.bridge-nf-call-iptables=1; then + echo "SKIP: bridge netfilter not available" + ret=$ksft_skip +fi + +# for testing, so namespaces will reply to ping -b probes. +ip netns exec "$ns0" sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0 + +# enable conntrack in ns0 and drop broadcast packets in forward to +# avoid them from getting confirmed in the postrouting hook before +# the cloned skb is passed up the stack. +ip netns exec "$ns0" nft -f - <<EOF +table ip filter { + chain input { + type filter hook input priority 1; policy accept + iifname br0 counter + ct state new accept + } +} + +table bridge filter { + chain forward { + type filter hook forward priority 0; policy accept + meta pkttype broadcast ip protocol icmp counter drop + } +} +EOF +if [ "$?" -ne 0 ];then + echo "SKIP: could not add nftables ruleset" + exit $ksft_skip +fi + +# place 1, 2 & 3 in same subnet, connected via ns0:br0. +# ns4 is placed in same subnet as well, but its not +# part of the bridge: the corresponding veth4 is not +# part of the bridge, only its macvlan interface. +for i in $(seq 1 4); do + eval ip -net \$ns"$i" link set eth0 up +done +for i in $(seq 1 2); do + eval ip -net \$ns"$i" addr add "10.0.0.1$i/24" dev eth0 +done + +ip -net "$ns3" addr add 10.23.0.13/24 dev eth0 +ip -net "$ns4" addr add 10.23.0.14/24 dev eth0 + +# test basic connectivity +do_ping "$ns1" 10.0.0.12 +do_ping "$ns3" 10.23.0.1 +do_ping "$ns4" 10.23.0.1 + +bcast_ping "$ns1" 10.0.0.255 + +# This should deliver broadcast to macvlan0, which is on top of ns0:br0. +bcast_ping "$ns3" 10.23.0.255 + +# same, this time via veth4:macvlan4. +bcast_ping "$ns4" 10.23.0.255 + +read t < /proc/sys/kernel/tainted +if [ "$t" -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + dmesg + ret=1 +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh new file mode 100755 index 000000000000..4788641717d9 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +source lib.sh + +checktool "nft --version" "run test without nft tool" + +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + +cleanup() { + cleanup_all_ns +} + +setup_ns c1 c2 c3 sender + +trap cleanup EXIT + +nf_queue_wait() +{ + grep -q "^ *$1 " "/proc/self/net/netfilter/nfnetlink_queue" +} + +port_add() { + ns="$1" + dev="$2" + a="$3" + + ip link add name "$dev" type veth peer name "$dev" netns "$ns" + + ip -net "$ns" addr add 192.168.1."$a"/24 dev "$dev" + ip -net "$ns" link set "$dev" up + + ip link set "$dev" master br0 + ip link set "$dev" up +} + +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } + +ip link add br0 type bridge +ip addr add 192.168.1.254/24 dev br0 + +port_add "$c1" "c1" 1 +port_add "$c2" "c2" 2 +port_add "$c3" "c3" 3 +port_add "$sender" "sender" 253 + +ip link set br0 up + +modprobe -q br_netfilter + +sysctl net.bridge.bridge-nf-call-iptables=1 || exit 1 + +ip netns exec "$sender" ping -I sender -c1 192.168.1.1 || exit 1 +ip netns exec "$sender" ping -I sender -c1 192.168.1.2 || exit 2 +ip netns exec "$sender" ping -I sender -c1 192.168.1.3 || exit 3 + +nft -f /dev/stdin <<EOF +table ip filter { + chain forward { + type filter hook forward priority 0; policy accept; + ct state new counter + ip protocol icmp counter queue num 0 bypass + } +} +EOF +./nf_queue -t 5 > /dev/null & + +busywait 5000 nf_queue_wait + +for i in $(seq 1 5); do conntrack -F > /dev/null 2> /dev/null; sleep 0.1 ; done & +ip netns exec "$sender" ping -I sender -f -c 50 -b 192.168.1.255 + +read t < /proc/sys/kernel/tainted +if [ "$t" -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + dmesg + exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh new file mode 100755 index 000000000000..ea76f2bc2f59 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# +# This test is for bridge 'brouting', i.e. make some packets being routed +# rather than getting bridged even though they arrive on interface that is +# part of a bridge. + +# eth0 br0 eth0 +# setup is: ns1 <-> nsbr <-> ns2 + +source lib.sh + +if ! ebtables -V > /dev/null 2>&1;then + echo "SKIP: Could not run test without ebtables" + exit $ksft_skip +fi + +cleanup() { + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns nsbr ns1 ns2 + +if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then + echo "SKIP: Can't create veth device" + exit $ksft_skip +fi +ip link add veth1 netns "$nsbr" type veth peer name eth0 netns "$ns2" + +if ! ip -net "$nsbr" link add br0 type bridge; then + echo "SKIP: Can't create bridge br0" + exit $ksft_skip +fi + +ip -net "$nsbr" link set veth0 up +ip -net "$nsbr" link set veth1 up + +ip -net "$nsbr" link set veth0 master br0 +ip -net "$nsbr" link set veth1 master br0 +ip -net "$nsbr" link set br0 up +ip -net "$nsbr" addr add 10.0.0.1/24 dev br0 + +# place both in same subnet, ${ns1} and ${ns2} connected via ${nsbr}:br0 +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up +ip -net "$ns1" addr add 10.0.0.11/24 dev eth0 +ip -net "$ns2" addr add 10.0.0.12/24 dev eth0 + +test_ebtables_broute() +{ + # redirect is needed so the dstmac is rewritten to the bridge itself, + # ip stack won't process OTHERHOST (foreign unicast mac) packets. + if ! ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP; then + echo "SKIP: Could not add ebtables broute redirect rule" + return $ksft_skip + fi + + ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=0 + + # ping net${ns1}, expected to not work (ip forwarding is off) + if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then + echo "ERROR: ping works, should have failed" 1>&2 + return 1 + fi + + # enable forwarding on both interfaces. + # neither needs an ip address, but at least the bridge needs + # an ip address in same network segment as ${ns1} and ${ns2} (${nsbr} + # needs to be able to determine route for to-be-forwarded packet). + ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=1 + ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth1.forwarding=1 + + if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then + echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2 + return 1 + fi + + echo "PASS: ${ns1}/${ns2} connectivity with active broute rule" + ip netns exec "$nsbr" ebtables -t broute -F + + # ping net${ns1}, expected to work (frames are bridged) + if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then + echo "ERROR: ping did not work, but it should (bridged)" 1>&2 + return 1 + fi + + ip netns exec "$nsbr" ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP + + # ping net${ns1}, expected to not work (DROP in bridge forward) + if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then + echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2 + return 1 + fi + + # re-activate brouter + ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.0.11 > /dev/null; then + echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2 + return 1 + fi + + echo "PASS: ${ns1}/${ns2} connectivity with active broute rule and bridge forward drop" + return 0 +} + +# test basic connectivity +if ! ip netns exec "$ns1" ping -c 1 -q 10.0.0.12 > /dev/null; then + echo "ERROR: Could not reach ${ns2} from ${ns1}" 1>&2 + exit 1 +fi + +if ! ip netns exec "$ns2" ping -c 1 -q 10.0.0.11 > /dev/null; then + echo "ERROR: Could not reach ${ns1} from ${ns2}" 1>&2 + exit 1 +fi + +test_ebtables_broute +exit $? diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config new file mode 100644 index 000000000000..363646f4fefe --- /dev/null +++ b/tools/testing/selftests/net/netfilter/config @@ -0,0 +1,95 @@ +CONFIG_AUDIT=y +CONFIG_BPF_SYSCALL=y +CONFIG_BRIDGE=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_REDIRECT=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_NETFILTER=m +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_CGROUP_BPF=y +CONFIG_DUMMY=m +CONFIG_INET_ESP=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP_NF_RAW=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP_SCTP=m +CONFIG_IP_VS=m +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_RR=m +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_MACVLAN=m +CONFIG_NAMESPACES=y +CONFIG_NET_CLS_U32=m +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_NS=y +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_IPIP=m +CONFIG_NET_VRF=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_SYNPROXY=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_NAT=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_PROCFS=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_ZONES=y +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_LOG_IPV4=m +CONFIG_NF_LOG_IPV6=m +CONFIG_NF_NAT=m +CONFIG_NF_NAT_REDIRECT=y +CONFIG_NF_NAT_MASQUERADE=y +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y +CONFIG_NF_TABLES_NETDEV=y +CONFIG_NF_FLOW_TABLE_INET=m +CONFIG_NFT_BRIDGE_META=m +CONFIG_NFT_COMPAT=m +CONFIG_NFT_CT=m +CONFIG_NFT_FIB=m +CONFIG_NFT_FIB_INET=m +CONFIG_NFT_FIB_IPV4=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NFT_FLOW_OFFLOAD=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_LOG=m +CONFIG_NFT_MASQ=m +CONFIG_NFT_NAT=m +CONFIG_NFT_NUMGEN=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_QUOTA=m +CONFIG_NFT_REDIR=m +CONFIG_NFT_SYNPROXY=m +CONFIG_NFT_TPROXY=m +CONFIG_VETH=m +CONFIG_VLAN_8021Q=m +CONFIG_VXLAN=m +CONFIG_XFRM_USER=m +CONFIG_XFRM_STATISTICS=y +CONFIG_NET_PKTGEN=m +CONFIG_TUN=m +CONFIG_INET_DIAG=m +CONFIG_SCTP_DIAG=m diff --git a/tools/testing/selftests/net/netfilter/connect_close.c b/tools/testing/selftests/net/netfilter/connect_close.c new file mode 100644 index 000000000000..1c3b0add54c4 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/connect_close.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> + +#include <arpa/inet.h> +#include <sys/socket.h> + +#define PORT 12345 +#define RUNTIME 10 + +static struct { + unsigned int timeout; + unsigned int port; +} opts = { + .timeout = RUNTIME, + .port = PORT, +}; + +static void handler(int sig) +{ + _exit(sig == SIGALRM ? 0 : 1); +} + +static void set_timeout(void) +{ + struct sigaction action = { + .sa_handler = handler, + }; + + sigaction(SIGALRM, &action, NULL); + + alarm(opts.timeout); +} + +static void do_connect(const struct sockaddr_in *dst) +{ + int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + + if (s >= 0) + fcntl(s, F_SETFL, O_NONBLOCK); + + connect(s, (struct sockaddr *)dst, sizeof(*dst)); + close(s); +} + +static void do_accept(const struct sockaddr_in *src) +{ + int c, one = 1, s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + + if (s < 0) + return; + + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); + setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + + bind(s, (struct sockaddr *)src, sizeof(*src)); + + listen(s, 16); + + c = accept(s, NULL, NULL); + if (c >= 0) + close(c); + + close(s); +} + +static int accept_loop(void) +{ + struct sockaddr_in src = { + .sin_family = AF_INET, + .sin_port = htons(opts.port), + }; + + inet_pton(AF_INET, "127.0.0.1", &src.sin_addr); + + set_timeout(); + + for (;;) + do_accept(&src); + + return 1; +} + +static int connect_loop(void) +{ + struct sockaddr_in dst = { + .sin_family = AF_INET, + .sin_port = htons(opts.port), + }; + + inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr); + + set_timeout(); + + for (;;) + do_connect(&dst); + + return 1; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "t:p:")) != -1) { + switch (c) { + case 't': + opts.timeout = atoi(optarg); + break; + case 'p': + opts.port = atoi(optarg); + break; + } + } +} + +int main(int argc, char *argv[]) +{ + pid_t p; + + parse_opts(argc, argv); + + p = fork(); + if (p < 0) + return 111; + + if (p > 0) + return accept_loop(); + + return connect_loop(); +} diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c new file mode 100644 index 000000000000..5f827e10717d --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <time.h> +#include <libmnl/libmnl.h> +#include <netinet/ip.h> + +#include <linux/netlink.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> +#include <linux/netfilter/nf_conntrack_tcp.h> +#include "../../kselftest_harness.h" + +#define TEST_ZONE_ID 123 +#define NF_CT_DEFAULT_ZONE_ID 0 + +static int reply_counter; + +static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type, + uint32_t src_ip, uint32_t dst_ip, + uint16_t src_port, uint16_t dst_port) +{ + struct nlattr *nest, *nest_ip, *nest_proto; + + nest = mnl_attr_nest_start(nlh, type); + if (!nest) + return -1; + + nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP); + if (!nest_ip) + return -1; + mnl_attr_put_u32(nlh, CTA_IP_V4_SRC, src_ip); + mnl_attr_put_u32(nlh, CTA_IP_V4_DST, dst_ip); + mnl_attr_nest_end(nlh, nest_ip); + + nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6); + mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port)); + mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port)); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); + + return 0; +} + +static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type, + struct in6_addr src_ip, struct in6_addr dst_ip, + uint16_t src_port, uint16_t dst_port) +{ + struct nlattr *nest, *nest_ip, *nest_proto; + + nest = mnl_attr_nest_start(nlh, type); + if (!nest) + return -1; + + nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP); + if (!nest_ip) + return -1; + mnl_attr_put(nlh, CTA_IP_V6_SRC, sizeof(struct in6_addr), &src_ip); + mnl_attr_put(nlh, CTA_IP_V6_DST, sizeof(struct in6_addr), &dst_ip); + mnl_attr_nest_end(nlh, nest_ip); + + nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6); + mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port)); + mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port)); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); + + return 0; +} + +static int build_cta_proto(struct nlmsghdr *nlh) +{ + struct nlattr *nest, *nest_proto; + + nest = mnl_attr_nest_start(nlh, CTA_PROTOINFO); + if (!nest) + return -1; + + nest_proto = mnl_attr_nest_start(nlh, CTA_PROTOINFO_TCP); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTOINFO_TCP_STATE, TCP_CONNTRACK_ESTABLISHED); + mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 0x0a0a); + mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_REPLY, 0x0a0a); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); + + return 0; +} + +static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh, + uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *rplnlh; + unsigned int portid; + int ret; + + portid = mnl_socket_get_portid(sock); + + ret = build_cta_proto(nlh); + if (ret < 0) { + perror("build_cta_proto"); + return -1; + } + mnl_attr_put_u32(nlh, CTA_TIMEOUT, htonl(20000)); + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + if (mnl_socket_sendto(sock, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + return -1; + } + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL); + if (ret < 0) { + if (errno == EEXIST) { + /* The entries are probably still there from a previous + * run. So we are good + */ + return 0; + } + perror("mnl_cb_run"); + return ret; + } + + return 0; +} + +static int conntrack_data_generate_v4(struct mnl_socket *sock, uint32_t src_ip, + uint32_t dst_ip, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + int ret; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_ACK | NLM_F_EXCL; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_INET; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + ret = build_cta_tuple_v4(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v4"); + return ret; + } + ret = build_cta_tuple_v4(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, 443, 12345); + if (ret < 0) { + perror("build_cta_tuple_v4"); + return ret; + } + return conntrack_data_insert(sock, nlh, zone); +} + +static int conntrack_data_generate_v6(struct mnl_socket *sock, + struct in6_addr src_ip, + struct in6_addr dst_ip, + uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + int ret; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_ACK | NLM_F_EXCL; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_INET6; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + ret = build_cta_tuple_v6(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, + 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v6"); + return ret; + } + ret = build_cta_tuple_v6(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, + 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v6"); + return ret; + } + return conntrack_data_insert(sock, nlh, zone); +} + +static int count_entries(const struct nlmsghdr *nlh, void *data) +{ + reply_counter++; + return MNL_CB_OK; +} + +static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh, *rplnlh; + struct nfgenmsg *nfh; + struct nlattr *nest; + unsigned int portid; + int ret; + + portid = mnl_socket_get_portid(sock); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return ret; + } + + reply_counter = 0; + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + while (ret > 0) { + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, + count_entries, NULL); + if (ret <= MNL_CB_STOP) + break; + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + } + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + return reply_counter; +} + +static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh, *rplnlh; + struct nfgenmsg *nfh; + struct nlattr *nest; + unsigned int portid; + int ret; + + portid = mnl_socket_get_portid(sock); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_DELETE; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return ret; + } + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL); + if (ret < 0) { + perror("mnl_cb_run"); + return ret; + } + + return 0; +} + +FIXTURE(conntrack_dump_flush) +{ + struct mnl_socket *sock; +}; + +FIXTURE_SETUP(conntrack_dump_flush) +{ + struct in6_addr src, dst; + int ret; + + self->sock = mnl_socket_open(NETLINK_NETFILTER); + if (!self->sock) { + perror("mnl_socket_open"); + SKIP(return, "cannot open netlink_netfilter socket"); + } + + ret = mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID); + EXPECT_EQ(ret, 0); + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + if (ret < 0 && errno == EPERM) + SKIP(return, "Needs to be run as root"); + else if (ret < 0 && errno == EOPNOTSUPP) + SKIP(return, "Kernel does not seem to support conntrack zones"); + + ret = conntrack_data_generate_v4(self->sock, 0xf0f0f0f0, 0xf1f1f1f1, + TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf2f2f2f2, 0xf3f3f3f3, + TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5, + TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7, + NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x01000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x02000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x03000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x04000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x05000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x06000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 0); + + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x07000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x08000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_GE(ret, 2); + if (ret > 2) + SKIP(return, "kernel does not support filtering by zone"); +} + +FIXTURE_TEARDOWN(conntrack_dump_flush) +{ +} + +TEST_F(conntrack_dump_flush, test_dump_by_zone) +{ + int ret; + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 2); +} + +TEST_F(conntrack_dump_flush, test_flush_by_zone) +{ + int ret; + + ret = conntrack_flush_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 2); +} + +TEST_F(conntrack_dump_flush, test_flush_by_zone_default) +{ + int ret; + + ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh new file mode 100755 index 000000000000..8b0935385849 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +exec unshare -n ./conntrack_dump_flush diff --git a/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh new file mode 100755 index 000000000000..c63d840ead61 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh @@ -0,0 +1,278 @@ +#!/bin/bash +# +# check that ICMP df-needed/pkttoobig icmp are set are set as related +# state +# +# Setup is: +# +# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2 +# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280). +# ping nsclient2 from nsclient1, checking that conntrack did set RELATED +# 'fragmentation needed' icmp packet. +# +# In addition, nsrouter1 will perform IP masquerading, i.e. also +# check the icmp errors are propagated to the correct host as per +# nat of "established" icmp-echo "connection". + +source lib.sh + +if ! nft --version > /dev/null 2>&1;then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +cleanup() { + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns nsclient1 nsclient2 nsrouter1 nsrouter2 + +ret=0 + +add_addr() +{ + ns=$1 + dev=$2 + i=$3 + + ip -net "$ns" link set "$dev" up + ip -net "$ns" addr add "192.168.$i.2/24" dev "$dev" + ip -net "$ns" addr add "dead:$i::2/64" dev "$dev" nodad +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + if ! ip netns exec "$ns" nft list counter inet filter "$name" | grep -q "$expect"; then + echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec "$ns" nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +check_unknown() +{ + expect="packets 0 bytes 0" + for n in ${nsclient1} ${nsclient2} ${nsrouter1} ${nsrouter2}; do + if ! check_counter "$n" "unknown" "$expect"; then + return 1 + fi + done + + return 0 +} + +DEV=veth0 +ip link add "$DEV" netns "$nsclient1" type veth peer name eth1 netns "$nsrouter1" +ip link add "$DEV" netns "$nsclient2" type veth peer name eth1 netns "$nsrouter2" +ip link add "$DEV" netns "$nsrouter1" type veth peer name eth2 netns "$nsrouter2" + +add_addr "$nsclient1" $DEV 1 +add_addr "$nsclient2" $DEV 2 + +ip -net "$nsrouter1" link set eth1 up +ip -net "$nsrouter1" link set $DEV up + +ip -net "$nsrouter2" link set eth1 mtu 1280 up +ip -net "$nsrouter2" link set eth2 up + +ip -net "$nsclient1" route add default via 192.168.1.1 +ip -net "$nsclient1" -6 route add default via dead:1::1 + +ip -net "$nsclient2" route add default via 192.168.2.1 +ip -net "$nsclient2" route add default via dead:2::1 +ip -net "$nsclient2" link set veth0 mtu 1280 + +ip -net "$nsrouter1" addr add 192.168.1.1/24 dev eth1 +ip -net "$nsrouter1" addr add 192.168.3.1/24 dev veth0 +ip -net "$nsrouter1" addr add dead:1::1/64 dev eth1 nodad +ip -net "$nsrouter1" addr add dead:3::1/64 dev veth0 nodad +ip -net "$nsrouter1" route add default via 192.168.3.10 +ip -net "$nsrouter1" -6 route add default via dead:3::10 + +ip -net "$nsrouter2" addr add 192.168.2.1/24 dev eth1 +ip -net "$nsrouter2" addr add 192.168.3.10/24 dev eth2 +ip -net "$nsrouter2" addr add dead:2::1/64 dev eth1 nodad +ip -net "$nsrouter2" addr add dead:3::10/64 dev eth2 nodad +ip -net "$nsrouter2" route add default via 192.168.3.1 +ip -net "$nsrouter2" route add default via dead:3::1 + +for i in 4 6; do + ip netns exec "$nsrouter1" sysctl -q net.ipv$i.conf.all.forwarding=1 + ip netns exec "$nsrouter2" sysctl -q net.ipv$i.conf.all.forwarding=1 +done + +for netns in "$nsrouter1" "$nsrouter2"; do +ip netns exec "$netns" nft -f - <<EOF +table inet filter { + counter unknown { } + counter related { } + chain forward { + type filter hook forward priority 0; policy accept; + meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept + meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept + meta l4proto { icmp, icmpv6 } ct state new,established accept + counter name "unknown" drop + } +} +EOF +done + +ip netns exec "$nsclient1" nft -f - <<EOF +table inet filter { + counter unknown { } + counter related { } + counter redir4 { } + counter redir6 { } + chain input { + type filter hook input priority 0; policy accept; + + icmp type "redirect" ct state "related" counter name "redir4" accept + icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept + + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept + + counter name "unknown" drop + } +} +EOF + +ip netns exec "$nsclient2" nft -f - <<EOF +table inet filter { + counter unknown { } + counter new { } + counter established { } + + chain input { + type filter hook input priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept + meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept + counter name "unknown" drop + } + chain output { + type filter hook output priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" + meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" + counter name "unknown" drop + } +} +EOF + +# make sure NAT core rewrites adress of icmp error if nat is used according to +# conntrack nat information (icmp error will be directed at nsrouter1 address, +# but it needs to be routed to nsclient1 address). +ip netns exec "$nsrouter1" nft -f - <<EOF +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + ip protocol icmp oifname "veth0" counter masquerade + } +} +table ip6 nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + ip6 nexthdr icmpv6 oifname "veth0" counter masquerade + } +} +EOF + +if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q -M "do" 192.168.2.2 >/dev/null; then + echo "ERROR: netns ip routing/connectivity broken" 1>&2 + exit 1 +fi +if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q dead:2::2 >/dev/null; then + echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2 + exit 1 +fi + +if ! check_unknown; then + ret=1 +fi + +expect="packets 0 bytes 0" +for netns in "$nsrouter1" "$nsrouter2" "$nsclient1";do + if ! check_counter "$netns" "related" "$expect"; then + ret=1 + fi +done + +expect="packets 2 bytes 2076" +if ! check_counter "$nsclient2" "new" "$expect"; then + ret=1 +fi + +if ip netns exec "$nsclient1" ping -W 0.5 -q -c 1 -s 1300 -M "do" 192.168.2.2 > /dev/null; then + echo "ERROR: ping should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +# nsrouter2 should have generated the icmp error, so +# related counter should be 0 (its in forward). +expect="packets 0 bytes 0" +if ! check_counter "$nsrouter2" "related" "$expect"; then + ret=1 +fi + +# but nsrouter1 should have seen it, same for nsclient1. +expect="packets 1 bytes 576" +for netns in ${nsrouter1} ${nsclient1};do + if ! check_counter "$netns" "related" "$expect"; then + ret=1 + fi +done + +if ip netns exec "${nsclient1}" ping6 -W 0.5 -c 1 -s 1300 dead:2::2 > /dev/null; then + echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +expect="packets 2 bytes 1856" +for netns in "${nsrouter1}" "${nsclient1}";do + if ! check_counter "$netns" "related" "$expect"; then + ret=1 + fi +done + +if [ $ret -eq 0 ];then + echo "PASS: icmp mtu error had RELATED state" +else + echo "ERROR: icmp error RELATED state test has failed" +fi + +# add 'bad' route, expect icmp REDIRECT to be generated +ip netns exec "${nsclient1}" ip route add 192.168.1.42 via 192.168.1.1 +ip netns exec "${nsclient1}" ip route add dead:1::42 via dead:1::1 + +ip netns exec "$nsclient1" ping -W 1 -q -i 0.5 -c 2 192.168.1.42 > /dev/null + +expect="packets 1 bytes 112" +if ! check_counter "$nsclient1" "redir4" "$expect"; then + ret=1 +fi + +ip netns exec "$nsclient1" ping -W 1 -c 1 dead:1::42 > /dev/null +expect="packets 1 bytes 192" +if ! check_counter "$nsclient1" "redir6" "$expect"; then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: icmp redirects had RELATED state" +else + echo "ERROR: icmp redirect RELATED state test has failed" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh new file mode 100755 index 000000000000..9832a5d0198a --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh @@ -0,0 +1,191 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +# Conntrack needs to reassemble fragments in order to have complete +# packets for rule matching. Reassembly can lead to packet loss. + +# Consider the following setup: +# +--------+ +---------+ +--------+ +# |Router A|-------|Wanrouter|-------|Router B| +# | |.IPIP..| |..IPIP.| | +# +--------+ +---------+ +--------+ +# / mtu 1400 \ +# / \ +#+--------+ +--------+ +#|Client A| |Client B| +#| | | | +#+--------+ +--------+ + +# Router A and Router B use IPIP tunnel interfaces to tunnel traffic +# between Client A and Client B over WAN. Wanrouter has MTU 1400 set +# on its interfaces. + +rx=$(mktemp) + +checktool "iptables --version" "run test without iptables" +checktool "socat -h" "run test without socat" + +setup_ns r_a r_b r_w c_a c_b + +cleanup() { + cleanup_all_ns + rm -f "$rx" +} + +trap cleanup EXIT + +listener_ready() +{ + ns="$1" + port="$2" + ss -N "$ns" -lnu -o "sport = :$port" | grep -q "$port" +} + +test_path() { + msg="$1" + + ip netns exec "$c_b" socat -t 3 - udp4-listen:5000,reuseaddr > "$rx" < /dev/null & + + busywait $BUSYWAIT_TIMEOUT listener_ready "$c_b" 5000 + + for i in 1 2 3; do + head -c1400 /dev/zero | tr "\000" "a" | \ + ip netns exec "$c_a" socat -t 1 -u STDIN UDP:192.168.20.2:5000 + done + + wait + + bytes=$(wc -c < "$rx") + + if [ "$bytes" -eq 1400 ];then + echo "OK: PMTU $msg connection tracking" + else + echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400" + exit 1 + fi +} + +# Detailed setup for Router A +# --------------------------- +# Interfaces: +# eth0: 10.2.2.1/24 +# eth1: 192.168.10.1/24 +# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1 +# Routes: +# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B) +# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns "$r_a" type veth peer name veth0 netns "$r_w" +ip link add veth1 netns "$r_a" type veth peer name veth0 netns "$c_a" + +l_addr="10.2.2.1" +r_addr="10.4.4.1" +ip netns exec "$r_a" ip link add ipip0 type ipip local "$l_addr" remote "$r_addr" mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net "$r_a" link set "$dev" up +done + +ip -net "$r_a" addr add 10.2.2.1/24 dev veth0 +ip -net "$r_a" addr add 192.168.10.1/24 dev veth1 + +ip -net "$r_a" route add 192.168.20.0/24 dev ipip0 +ip -net "$r_a" route add 10.4.4.0/24 via 10.2.2.254 + +ip netns exec "$r_a" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Detailed setup for Router B +# --------------------------- +# Interfaces: +# eth0: 10.4.4.1/24 +# eth1: 192.168.20.1/24 +# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1 +# Routes: +# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A) +# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns "$r_b" type veth peer name veth1 netns "$r_w" +ip link add veth1 netns "$r_b" type veth peer name veth0 netns "$c_b" + +l_addr="10.4.4.1" +r_addr="10.2.2.1" + +ip netns exec "$r_b" ip link add ipip0 type ipip local "${l_addr}" remote "${r_addr}" mode ipip || exit $ksft_skip + +for dev in veth0 veth1 ipip0; do + ip -net "$r_b" link set $dev up +done + +ip -net "$r_b" addr add 10.4.4.1/24 dev veth0 +ip -net "$r_b" addr add 192.168.20.1/24 dev veth1 + +ip -net "$r_b" route add 192.168.10.0/24 dev ipip0 +ip -net "$r_b" route add 10.2.2.0/24 via 10.4.4.254 +ip netns exec "$r_b" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Client A +ip -net "$c_a" addr add 192.168.10.2/24 dev veth0 +ip -net "$c_a" link set dev veth0 up +ip -net "$c_a" route add default via 192.168.10.1 + +# Client A +ip -net "$c_b" addr add 192.168.20.2/24 dev veth0 +ip -net "$c_b" link set dev veth0 up +ip -net "$c_b" route add default via 192.168.20.1 + +# Wan +ip -net "$r_w" addr add 10.2.2.254/24 dev veth0 +ip -net "$r_w" addr add 10.4.4.254/24 dev veth1 + +ip -net "$r_w" link set dev veth0 up mtu 1400 +ip -net "$r_w" link set dev veth1 up mtu 1400 + +ip -net "$r_a" link set dev veth0 mtu 1400 +ip -net "$r_b" link set dev veth0 mtu 1400 + +ip netns exec "$r_w" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Path MTU discovery +# ------------------ +# Running tracepath from Client A to Client B shows PMTU discovery is working +# as expected: +# +# clienta:~# tracepath 192.168.20.2 +# 1?: [LOCALHOST] pmtu 1500 +# 1: 192.168.10.1 0.867ms +# 1: 192.168.10.1 0.302ms +# 2: 192.168.10.1 0.312ms pmtu 1480 +# 2: no reply +# 3: 192.168.10.1 0.510ms pmtu 1380 +# 3: 192.168.20.2 2.320ms reached +# Resume: pmtu 1380 hops 3 back 3 + +# ip netns exec ${c_a} traceroute --mtu 192.168.20.2 + +# Router A has learned PMTU (1400) to Router B from Wanrouter. +# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B +# from Router A. + +#Send large UDP packet +#--------------------- +#Now we send a 1400 bytes UDP packet from Client A to Client B: + +# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000 +test_path "without" + +# The IPv4 stack on Client A already knows the PMTU to Client B, so the +# UDP packet is sent as two fragments (1380 + 20). Router A forwards the +# fragments between eth1 and ipip0. The fragments fit into the tunnel and +# reach their destination. + +#When sending the large UDP packet again, Router A now reassembles the +#fragments before routing the packet over ipip0. The resulting IPIP +#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is +#dropped on Router A before sending. + +ip netns exec "$r_a" iptables -A FORWARD -m conntrack --ctstate NEW +test_path "with" diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh new file mode 100755 index 000000000000..9e033e80219e --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -0,0 +1,427 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +checktool "conntrack --version" "run test without conntrack" +checktool "nft --version" "run test without nft tool" + +init_net_max=0 +ct_buckets=0 +tmpfile="" +tmpfile_proc="" +tmpfile_uniq="" +ret=0 + +insert_count=2000 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400 + +modprobe -q nf_conntrack +if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then + echo "SKIP: conntrack sysctls not available" + exit $KSFT_SKIP +fi + +init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1 +ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1 + +cleanup() { + cleanup_all_ns + + rm -f "$tmpfile" "$tmpfile_proc" "$tmpfile_uniq" + + # restore original sysctl setting + sysctl -q net.netfilter.nf_conntrack_max=$init_net_max + sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets +} +trap cleanup EXIT + +check_max_alias() +{ + local expected="$1" + # old name, expected to alias to the first, i.e. changing one + # changes the other as well. + local lv=$(sysctl -n net.nf_conntrack_max) + + if [ $expected -ne "$lv" ];then + echo "nf_conntrack_max sysctls should have identical values" + exit 1 + fi +} + +insert_ctnetlink() { + local ns="$1" + local count="$2" + local i=0 + local bulk=16 + + while [ $i -lt $count ] ;do + ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \ + if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \ + -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \ + --protonum 17 --timeout 3600 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \ + return;\ + fi & \ + done ; wait" 2>/dev/null + + i=$((i+bulk)) + done +} + +check_ctcount() { + local ns="$1" + local count="$2" + local msg="$3" + + local now=$(ip netns exec "$ns" conntrack -C) + + if [ $now -ne "$count" ] ;then + echo "expected $count entries in $ns, not $now: $msg" + exit 1 + fi + + echo "PASS: got $count connections: $msg" +} + +ctresize() { + local duration="$1" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM + now=$(date +%s) + done +} + +do_rsleep() { + local limit="$1" + local r=$RANDOM + + r=$((r%limit)) + sleep "$r" +} + +ct_flush_once() { + local ns="$1" + + ip netns exec "$ns" conntrack -F 2>/dev/null +} + +ctflush() { + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + do_rsleep "$duration" + + while [ $now -lt $end ]; do + ct_flush_once "$ns" + do_rsleep "$duration" + now=$(date +%s) + done +} + +ctflood() +{ + local ns="$1" + local duration="$2" + local msg="$3" + local now=$(date +%s) + local end=$((now + duration)) + local j=0 + local k=0 + + while [ $now -lt $end ]; do + j=$((j%256)) + k=$((k%256)) + + ip netns exec "$ns" bash -c \ + "j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1 + + j=$((j+1)) + + if [ $j -eq 256 ];then + k=$((k+1)) + fi + + now=$(date +%s) + done + + wait +} + +# dump to /dev/null. We don't want dumps to cause infinite loops +# or use-after-free even when conntrack table is altered while dumps +# are in progress. +ct_nulldump() +{ + local ns="$1" + + ip netns exec "$ns" conntrack -L > /dev/null 2>&1 & + + # Don't require /proc support in conntrack + if [ -r /proc/self/net/nf_conntrack ] ; then + ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null & + fi + + wait +} + +check_taint() +{ + local tainted_then="$1" + local msg="$2" + + local tainted_now=0 + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + + if [ "$tainted_now" -eq 0 ];then + echo "PASS: $msg" + else + echo "TAINT: $msg" + dmesg + exit 1 + fi +} + +insert_flood() +{ + local n="$1" + local r=0 + + r=$((RANDOM%$insert_count)) + + ctflood "$n" "$timeout" "floodresize" & + insert_ctnetlink "$n" "$r" & + ctflush "$n" "$timeout" & + ct_nulldump "$n" & + + wait +} + +test_floodresize_all() +{ + local timeout=20 + local n="" + local tainted_then="" + + read tainted_then < /proc/sys/kernel/tainted + + for n in "$nsclient1" "$nsclient2";do + insert_flood "$n" & + done + + # resize table constantly while flood/insert/dump/flushs + # are happening in parallel. + ctresize "$timeout" + + # wait for subshells to complete, everything is limited + # by $timeout. + wait + + check_taint "$tainted_then" "resize+flood" +} + +check_dump() +{ + local ns="$1" + local protoname="$2" + local c=0 + local proto=0 + local proc=0 + local unique="" + local lret=0 + + # NOTE: assumes timeouts are large enough to not have + # expirations in all following tests. + l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | sort | tee "$tmpfile" | wc -l) + c=$(ip netns exec "$ns" conntrack -C) + + if [ "$c" -eq 0 ]; then + echo "FAIL: conntrack count for $ns is 0" + lret=1 + fi + + if [ "$c" -ne "$l" ]; then + echo "FAIL: conntrack count inconsistency for $ns -L: $c != $l" + lret=1 + fi + + # check the dump we retrieved is free of duplicated entries. + unique=$(uniq "$tmpfile" | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$unique" ]; then + echo "FAIL: listing contained redundant entries for $ns: $l != $unique" + diff -u "$tmpfile" "$tmpfile_uniq" + lret=1 + fi + + # we either inserted icmp or only udp, hence, --proto should return same entry count as without filter. + proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | sort | uniq | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$proto" ]; then + echo "FAIL: dump inconsistency for $ns -L --proto $protoname: $l != $proto" + diff -u "$tmpfile" "$tmpfile_uniq" + lret=1 + fi + + if [ -r /proc/self/net/nf_conntrack ] ; then + proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | tee \"$tmpfile_proc\" | wc -l") + + if [ "$l" -ne "$proc" ]; then + echo "FAIL: proc inconsistency for $ns: $l != $proc" + lret=1 + fi + + proc=$(uniq "$tmpfile_proc" | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$proc" ]; then + echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc" + diff -u "$tmpfile_proc" "$tmpfile_uniq" + lret=1 + fi + fi + + if [ $lret -eq 0 ];then + echo "PASS: dump in netns $ns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)" + else + echo "FAIL: dump in netns $ns had different entry count (-C $c, -L $l, -p $proto, /proc $proc)" + ret=1 + fi +} + +test_dump_all() +{ + local timeout=3 + local tainted_then="" + + read tainted_then < /proc/sys/kernel/tainted + + ct_flush_once "$nsclient1" + ct_flush_once "$nsclient2" + + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600 + + ctflood "$nsclient1" $timeout "dumpall" & + insert_ctnetlink "$nsclient2" $insert_count + + wait + + check_dump "$nsclient1" "icmp" + check_dump "$nsclient2" "udp" + + check_taint "$tainted_then" "test parallel conntrack dumps" +} + +check_sysctl_immutable() +{ + local ns="$1" + local name="$2" + local failhard="$3" + local o=0 + local n=0 + + o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null) + n=$((o+1)) + + # return value isn't reliable, need to read it back + ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null + + n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null) + + [ -z "$n" ] && return 1 + + if [ $o -ne $n ]; then + if [ $failhard -gt 0 ] ;then + echo "FAIL: net.$name should not be changeable from namespace (now $n)" + ret=1 + fi + return 0 + fi + + return 1 +} + +test_conntrack_max_limit() +{ + sysctl -q net.netfilter.nf_conntrack_max=100 + insert_ctnetlink "$nsclient1" 101 + + # check netns is clamped by init_net, i.e., either netns follows + # init_net value, or a higher pernet limit (compared to init_net) is ignored. + check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound" + + sysctl -q net.netfilter.nf_conntrack_max=$init_net_max +} + +test_conntrack_disable() +{ + local timeout=2 + + # disable conntrack pickups + ip netns exec "$nsclient1" nft flush table ip test_ct + + ct_flush_once "$nsclient1" + ct_flush_once "$nsclient2" + + ctflood "$nsclient1" "$timeout" "conntrack disable" + ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1 + + # Disabled, should not have picked up any connection. + check_ctcount "$nsclient1" 0 "conntrack disabled" + + # This one is still active, expect 1 connection. + check_ctcount "$nsclient2" 1 "conntrack enabled" +} + +init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) + +check_max_alias $init_net_max + +sysctl -q net.netfilter.nf_conntrack_max="262000" +check_max_alias 262000 + +setup_ns nsclient1 nsclient2 + +# check this only works from init_net +for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do + check_sysctl_immutable "$nsclient1" "net.$n" 1 +done + +# won't work on older kernels. If it works, check that the netns obeys the limit +if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then + # subtest: if pernet is changeable, check that reducing it in pernet + # limits the pernet entries. Inverse, pernet clamped by a lower init_net + # setting, is already checked by "test_conntrack_max_limit" test. + + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1 + insert_ctnetlink "$nsclient1" 2 + check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound" + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max +fi + +for n in "$nsclient1" "$nsclient2";do +# enable conntrack in both namespaces +ip netns exec "$n" nft -f - <<EOF +table ip test_ct { + chain input { + type filter hook input priority 0 + ct state new counter + } +} +EOF +done + +tmpfile=$(mktemp) +tmpfile_proc=$(mktemp) +tmpfile_uniq=$(mktemp) +test_conntrack_max_limit +test_dump_all +test_floodresize_all +test_conntrack_disable + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c new file mode 100644 index 000000000000..507930cee8cb --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Needs something like: + * + * iptables -t nat -A POSTROUTING -o nomatch -j MASQUERADE + * + * so NAT engine attaches a NAT null-binding to each connection. + * + * With unmodified kernels, child or parent will exit with + * "Port number changed" error, even though no port translation + * was requested. + */ + +#include <errno.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/wait.h> + +#define LEN 512 +#define PORT 56789 +#define TEST_TIME 5 + +static void die(const char *e) +{ + perror(e); + exit(111); +} + +static void die_port(uint16_t got, uint16_t want) +{ + fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got)); + exit(1); +} + +static int udp_socket(void) +{ + static const struct timeval tv = { + .tv_sec = 1, + }; + int fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + + if (fd < 0) + die("socket"); + + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + return fd; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_in sa1 = { + .sin_family = AF_INET, + }; + struct sockaddr_in sa2 = { + .sin_family = AF_INET, + }; + int s1, s2, status; + time_t end, now; + socklen_t plen; + char buf[LEN]; + bool child; + + sa1.sin_port = htons(PORT); + sa2.sin_port = htons(PORT + 1); + + s1 = udp_socket(); + s2 = udp_socket(); + + inet_pton(AF_INET, "127.0.0.11", &sa1.sin_addr); + inet_pton(AF_INET, "127.0.0.12", &sa2.sin_addr); + + if (bind(s1, (struct sockaddr *)&sa1, sizeof(sa1)) < 0) + die("bind 1"); + if (bind(s2, (struct sockaddr *)&sa2, sizeof(sa2)) < 0) + die("bind 2"); + + child = fork() == 0; + + now = time(NULL); + end = now + TEST_TIME; + + while (now < end) { + struct sockaddr_in peer; + socklen_t plen = sizeof(peer); + + now = time(NULL); + + if (child) { + if (sendto(s1, buf, LEN, 0, (struct sockaddr *)&sa2, sizeof(sa2)) != LEN) + continue; + + if (recvfrom(s2, buf, LEN, 0, (struct sockaddr *)&peer, &plen) < 0) + die("child recvfrom"); + + if (peer.sin_port != htons(PORT)) + die_port(peer.sin_port, PORT); + } else { + if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN) + continue; + + if (recvfrom(s1, buf, LEN, 0, (struct sockaddr *)&peer, &plen) < 0) + die("parent recvfrom"); + + if (peer.sin_port != htons((PORT + 1))) + die_port(peer.sin_port, PORT + 1); + } + } + + if (child) + return 0; + + wait(&status); + + if (WIFEXITED(status)) + return WEXITSTATUS(status); + + return 1; +} diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh new file mode 100755 index 000000000000..a24c896347a8 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +cleanup() +{ + cleanup_all_ns +} + +checktool "nft --version" "run test without nft" +checktool "conntrack --version" "run test without conntrack" + +trap cleanup EXIT + +setup_ns ns0 + +# make loopback connections get nat null bindings assigned +ip netns exec "$ns0" nft -f - <<EOF +table ip nat { + chain POSTROUTING { + type nat hook postrouting priority srcnat; policy accept; + oifname "nomatch" counter packets 0 bytes 0 masquerade + } +} +EOF + +do_flush() +{ + local end + local now + + now=$(date +%s) + end=$((now + 5)) + + while [ $now -lt $end ];do + ip netns exec "$ns0" conntrack -F 2>/dev/null + now=$(date +%s) + done +} + +do_flush & + +if ip netns exec "$ns0" ./conntrack_reverse_clash; then + echo "PASS: No SNAT performed for null bindings" +else + echo "ERROR: SNAT performed without any matching snat rule" + exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh new file mode 100755 index 000000000000..d860f7d9744b --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For SCTP COLLISION SCENARIO as Below: +# +# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359] +# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187] +# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359] +# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO] +# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK] +# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970] +# +# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS + +source lib.sh + +CLIENT_IP="198.51.200.1" +CLIENT_PORT=1234 + +SERVER_IP="198.51.100.1" +SERVER_PORT=1234 + +CLIENT_GW="198.51.200.2" +SERVER_GW="198.51.100.2" + +# setup the topo +setup() { + setup_ns CLIENT_NS SERVER_NS ROUTER_NS + ip -n "$SERVER_NS" link add link0 type veth peer name link1 netns "$ROUTER_NS" + ip -n "$CLIENT_NS" link add link3 type veth peer name link2 netns "$ROUTER_NS" + + ip -n "$SERVER_NS" link set link0 up + ip -n "$SERVER_NS" addr add $SERVER_IP/24 dev link0 + ip -n "$SERVER_NS" route add $CLIENT_IP dev link0 via $SERVER_GW + + ip -n "$ROUTER_NS" link set link1 up + ip -n "$ROUTER_NS" link set link2 up + ip -n "$ROUTER_NS" addr add $SERVER_GW/24 dev link1 + ip -n "$ROUTER_NS" addr add $CLIENT_GW/24 dev link2 + ip net exec "$ROUTER_NS" sysctl -wq net.ipv4.ip_forward=1 + + ip -n "$CLIENT_NS" link set link3 up + ip -n "$CLIENT_NS" addr add $CLIENT_IP/24 dev link3 + ip -n "$CLIENT_NS" route add $SERVER_IP dev link3 via $CLIENT_GW + + # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with + # tc on $SERVER_NS side + tc -n "$SERVER_NS" qdisc add dev link0 root handle 1: htb r2q 64 + tc -n "$SERVER_NS" class add dev link0 parent 1: classid 1:1 htb rate 100mbit + tc -n "$SERVER_NS" filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \ + 0xff match u8 2 0xff at 32 flowid 1:1 + if ! tc -n "$SERVER_NS" qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms; then + echo "SKIP: Cannot add netem qdisc" + exit $ksft_skip + fi + + # simulate the ctstate check on OVS nf_conntrack + ip net exec "$ROUTER_NS" iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP + ip net exec "$ROUTER_NS" iptables -A INPUT -p sctp -j DROP + + # use a smaller number for assoc's max_retrans to reproduce the issue + modprobe -q sctp + ip net exec "$CLIENT_NS" sysctl -wq net.sctp.association_max_retrans=3 +} + +cleanup() { + ip net exec "$CLIENT_NS" pkill sctp_collision >/dev/null 2>&1 + ip net exec "$SERVER_NS" pkill sctp_collision >/dev/null 2>&1 + cleanup_all_ns +} + +do_test() { + ip net exec "$SERVER_NS" ./sctp_collision server \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT & + ip net exec "$CLIENT_NS" ./sctp_collision client \ + $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT +} + +# NOTE: one way to work around the issue is set a smaller hb_interval +# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500 + +# run the test case +trap cleanup EXIT +setup && \ +echo "Test for SCTP Collision in nf_conntrack:" && \ +do_test && echo "PASS!" +exit $? diff --git a/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh new file mode 100755 index 000000000000..121ea93c0178 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that UNREPLIED tcp conntrack will eventually timeout. +# + +source lib.sh + +if ! nft --version > /dev/null 2>&1;then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +if ! conntrack --version > /dev/null 2>&1;then + echo "SKIP: Could not run test without conntrack tool" + exit $ksft_skip +fi + +ret=0 + +cleanup() { + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + + cleanup_all_ns +} + +ipv4() { + echo -n 192.168."$1".2 +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + if ! ip netns exec "$ns2" nft list counter inet filter "$name" | grep -q "$expect"; then + echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2 + ip netns exec "$ns2" nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +trap cleanup EXIT + +# Create test namespaces +setup_ns ns1 ns2 + +# Connect the namespace to the host using a veth pair +ip -net "$ns1" link add name veth1 type veth peer name veth2 +ip -net "$ns1" link set netns "$ns2" dev veth2 + +ip -net "$ns1" link set up dev lo +ip -net "$ns2" link set up dev lo +ip -net "$ns1" link set up dev veth1 +ip -net "$ns2" link set up dev veth2 + +ip -net "$ns2" addr add 10.11.11.2/24 dev veth2 +ip -net "$ns2" route add default via 10.11.11.1 + +ip netns exec "$ns2" sysctl -q net.ipv4.conf.veth2.forwarding=1 + +# add a rule inside NS so we enable conntrack +ip netns exec "$ns1" nft -f - <<EOF +table inet filter { + chain input { + type filter hook input priority 0; policy accept; + ct state established accept + } +} +EOF + +ip -net "$ns1" addr add 10.11.11.1/24 dev veth1 +ip -net "$ns1" route add 10.99.99.99 via 10.11.11.2 + +# Check connectivity works +ip netns exec "$ns1" ping -q -c 2 10.11.11.2 >/dev/null || exit 1 + +ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT & + +ip netns exec "$ns2" nft -f - <<EOF +table inet filter { + counter connreq { } + counter redir { } + chain input { + type filter hook input priority 0; policy accept; + ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept + ct state new ct status dnat tcp dport 8080 counter name "redir" accept + } +} +EOF +if [ $? -ne 0 ]; then + echo "ERROR: Could not load nft rules" + exit 1 +fi + +ip netns exec "$ns2" sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10 + +echo "INFO: connect $ns1 -> $ns2 to the virtual ip" +ip netns exec "$ns1" bash -c 'for i in $(seq 1 $BUSYWAIT_TIMEOUT) ; do + socat -u STDIN TCP:10.99.99.99:80 < /dev/null + sleep 0.1 + done' & + +wait_for_attempt() +{ + count=$(ip netns exec "$ns2" conntrack -L -p tcp --dport 80 2>/dev/null | wc -l) + if [ "$count" -gt 0 ]; then + return 0 + fi + + return 1 +} + +# wait for conntrack to pick the new connection request up before loading +# the nat redirect rule. +if ! busywait "$BUSYWAIT_TIMEOUT" wait_for_attempt; then + echo "ERROR: $ns2 did not pick up tcp connection from peer" + exit 1 +fi + +ip netns exec "$ns2" nft -f - <<EOF +table inet nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + ip daddr 10.99.99.99 tcp dport 80 redirect to :8080 + } +} +EOF +if [ $? -ne 0 ]; then + echo "ERROR: Could not load nat redirect" + exit 1 +fi + +wait_for_redirect() +{ + count=$(ip netns exec "$ns2" conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l) + if [ "$count" -gt 0 ]; then + return 0 + fi + + return 1 +} +echo "INFO: NAT redirect added in ns $ns2, waiting for $BUSYWAIT_TIMEOUT ms for nat to take effect" + +busywait "$BUSYWAIT_TIMEOUT" wait_for_redirect +ret=$? + +expect="packets 1 bytes 60" +if ! check_counter "$ns2" "redir" "$expect"; then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: redirection counter has expected values" +else + echo "ERROR: no tcp connection was redirected" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh new file mode 100755 index 000000000000..207b79932d91 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh @@ -0,0 +1,216 @@ +#!/bin/bash + +# This script demonstrates interaction of conntrack and vrf. +# The vrf driver calls the netfilter hooks again, with oif/iif +# pointing at the VRF device. +# +# For ingress, this means first iteration has iifname of lower/real +# device. In this script, thats veth0. +# Second iteration is iifname set to vrf device, tvrf in this script. +# +# For egress, this is reversed: first iteration has the vrf device, +# second iteration is done with the lower/real/veth0 device. +# +# test_ct_zone_in demonstrates unexpected change of nftables +# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack +# connection on VRF rcv" +# +# It was possible to assign conntrack zone to a packet (or mark it for +# `notracking`) in the prerouting chain before conntrack, based on real iif. +# +# After the change, the zone assignment is lost and the zone is assigned based +# on the VRF master interface (in case such a rule exists). +# assignment is lost. Instead, assignment based on the `iif` matching +# Thus it is impossible to distinguish packets based on the original +# interface. +# +# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem +# that was supposed to be fixed by the commit mentioned above to make sure +# that any fix to test case 1 won't break masquerade again. + +source lib.sh + +IP0=172.30.30.1 +IP1=172.30.30.2 +PFXL=30 +ret=0 + +cleanup() +{ + ip netns pids $ns0 | xargs kill 2>/dev/null + ip netns pids $ns1 | xargs kill 2>/dev/null + + cleanup_all_ns +} + +checktool "nft --version" "run test without nft" +checktool "conntrack --version" "run test without conntrack" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT + +setup_ns ns0 ns1 + +if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: Could not add veth device" + exit $ksft_skip +fi + +if ! ip -net "$ns0" li add tvrf type vrf table 9876; then + echo "SKIP: Could not add vrf device" + exit $ksft_skip +fi + +ip -net "$ns0" li set veth0 master tvrf +ip -net "$ns0" li set tvrf up +ip -net "$ns0" li set veth0 up +ip -net "$ns1" li set veth0 up + +ip -net "$ns0" addr add $IP0/$PFXL dev veth0 +ip -net "$ns1" addr add $IP1/$PFXL dev veth0 + +listener_ready() +{ + local ns="$1" + + ss -N "$ns" -l -n -t -o "sport = :55555" | grep -q "55555" +} + +ip netns exec "$ns1" socat -u -4 TCP-LISTEN:55555,reuseaddr,fork STDOUT > /dev/null & +busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" + +# test vrf ingress handling. +# The incoming connection should be placed in conntrack zone 1, +# as decided by the first iteration of the ruleset. +test_ct_zone_in() +{ +ip netns exec "$ns0" nft -f - <<EOF +table testct { + chain rawpre { + type filter hook prerouting priority raw; + + iif { veth0, tvrf } counter meta nftrace set 1 + iif veth0 counter ct zone set 1 counter return + iif tvrf counter ct zone set 2 counter return + ip protocol icmp counter + notrack counter + } + + chain rawout { + type filter hook output priority raw; + + oif veth0 counter ct zone set 1 counter return + oif tvrf counter ct zone set 2 counter return + notrack counter + } +} +EOF + ip netns exec "$ns1" ping -W 1 -c 1 -I veth0 "$IP0" > /dev/null + + # should be in zone 1, not zone 2 + count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) + if [ "$count" -eq 1 ]; then + echo "PASS: entry found in conntrack zone 1" + else + echo "FAIL: entry not found in conntrack zone 1" + count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) + if [ "$count" -eq 1 ]; then + echo "FAIL: entry found in zone 2 instead" + else + echo "FAIL: entry not in zone 1 or 2, dumping table" + ip netns exec "$ns0" conntrack -L + ip netns exec "$ns0" nft list ruleset + fi + fi +} + +# add masq rule that gets evaluated w. outif set to vrf device. +# This tests the first iteration of the packet through conntrack, +# oifname is the vrf device. +test_masquerade_vrf() +{ + local qdisc=$1 + + if [ "$qdisc" != "default" ]; then + tc -net "$ns0" qdisc add dev tvrf root "$qdisc" + fi + + ip netns exec "$ns0" conntrack -F 2>/dev/null + +ip netns exec "$ns0" nft -f - <<EOF +flush ruleset +table ip nat { + chain rawout { + type filter hook output priority raw; + + oif tvrf ct state untracked counter + } + chain postrouting2 { + type filter hook postrouting priority mangle; + + oif tvrf ct state untracked counter + } + chain postrouting { + type nat hook postrouting priority 0; + # NB: masquerade should always be combined with 'oif(name) bla', + # lack of this is intentional here, we want to exercise double-snat. + ip saddr 172.30.30.0/30 counter masquerade random + } +} +EOF + if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then + echo "FAIL: connect failure with masquerade + sport rewrite on vrf device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1' && + ip netns exec "$ns0" nft list table ip nat |grep -q 'untracked counter packets [1-9]'; then + echo "PASS: connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" + else + echo "FAIL: vrf rules have unexpected counter value" + ret=1 + fi + + if [ "$qdisc" != "default" ]; then + tc -net "$ns0" qdisc del dev tvrf root + fi +} + +# add masq rule that gets evaluated w. outif set to veth device. +# This tests the 2nd iteration of the packet through conntrack, +# oifname is the lower device (veth0 in this case). +test_masquerade_veth() +{ + ip netns exec "$ns0" conntrack -F 2>/dev/null +ip netns exec "$ns0" nft -f - <<EOF +flush ruleset +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; + meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random + } +} +EOF + if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then + echo "FAIL: connect failure with masquerade + sport rewrite on veth device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1'; then + echo "PASS: connect with masquerade + sport rewrite on veth device" + else + echo "FAIL: vrf masq rule has unexpected counter value" + ret=1 + fi +} + +test_ct_zone_in +test_masquerade_vrf "default" +test_masquerade_vrf "pfifo" +test_masquerade_veth + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh new file mode 100755 index 000000000000..6af2ea3ad6b8 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/ipvs.sh @@ -0,0 +1,205 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# End-to-end ipvs test suite +# Topology: +#--------------------------------------------------------------+ +# | | +# ns0 | ns1 | +# ----------- | ----------- ----------- | +# | veth01 | --------- | veth10 | | veth12 | | +# ----------- peer ----------- ----------- | +# | | | | +# ----------- | | | +# | br0 | |----------------- peer |--------------| +# ----------- | | | +# | | | | +# ---------- peer ---------- ----------- | +# | veth02 | --------- | veth20 | | veth21 | | +# ---------- | ---------- ----------- | +# | ns2 | +# | | +#--------------------------------------------------------------+ +# +# We assume that all network driver are loaded +# + +source lib.sh + +ret=0 +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +readonly port=8080 + +readonly vip_v4=207.175.44.110 +readonly cip_v4=10.0.0.2 +readonly gip_v4=10.0.0.1 +readonly dip_v4=172.16.0.1 +readonly rip_v4=172.16.0.2 +readonly sip_v4=10.0.0.3 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" +readonly datalen=32 + +sysipvsnet="/proc/sys/net/ipv4/vs/" +if [ ! -d $sysipvsnet ]; then + if ! modprobe -q ip_vs; then + echo "skip: could not run test without ipvs module" + exit $ksft_skip + fi +fi + +checktool "ipvsadm -v" "run test without ipvsadm" +checktool "socat -h" "run test without socat" + +setup() { + setup_ns ns0 ns1 ns2 + + ip link add veth01 netns "${ns0}" type veth peer name veth10 netns "${ns1}" + ip link add veth02 netns "${ns0}" type veth peer name veth20 netns "${ns2}" + ip link add veth12 netns "${ns1}" type veth peer name veth21 netns "${ns2}" + + ip netns exec "${ns0}" ip link set veth01 up + ip netns exec "${ns0}" ip link set veth02 up + ip netns exec "${ns0}" ip link add br0 type bridge + ip netns exec "${ns0}" ip link set veth01 master br0 + ip netns exec "${ns0}" ip link set veth02 master br0 + ip netns exec "${ns0}" ip link set br0 up + ip netns exec "${ns0}" ip addr add "${cip_v4}/24" dev br0 + + ip netns exec "${ns1}" ip link set veth10 up + ip netns exec "${ns1}" ip addr add "${gip_v4}/24" dev veth10 + ip netns exec "${ns1}" ip link set veth12 up + ip netns exec "${ns1}" ip addr add "${dip_v4}/24" dev veth12 + + ip netns exec "${ns2}" ip link set veth21 up + ip netns exec "${ns2}" ip addr add "${rip_v4}/24" dev veth21 + ip netns exec "${ns2}" ip link set veth20 up + ip netns exec "${ns2}" ip addr add "${sip_v4}/24" dev veth20 + + sleep 1 + + dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none +} + +cleanup() { + cleanup_all_ns + + if [ -f "${outfile}" ]; then + rm "${outfile}" + fi + if [ -f "${infile}" ]; then + rm "${infile}" + fi +} + +server_listen() { + ip netns exec "$ns2" timeout 5 socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" & + server_pid=$! + sleep 0.2 +} + +client_connect() { + ip netns exec "${ns0}" timeout 2 socat -u -4 STDIN TCP:"${vip_v4}":"${port}" < "${infile}" +} + +verify_data() { + wait "${server_pid}" + cmp "$infile" "$outfile" 2>/dev/null +} + +test_service() { + server_listen + client_connect + verify_data +} + + +test_dr() { + ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 + + ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1 + ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr + ip netns exec "${ns1}" ipvsadm -a -t "${vip_v4}:${port}" -r "${rip_v4}:${port}" + ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1 + + # avoid incorrect arp response + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 + ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 + + test_service +} + +test_nat() { + ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 + + ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1 + ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr + ip netns exec "${ns1}" ipvsadm -a -m -t "${vip_v4}:${port}" -r "${rip_v4}:${port}" + ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1 + + ip netns exec "${ns2}" ip link del veth20 + ip netns exec "${ns2}" ip route add default via "${dip_v4}" dev veth21 + + test_service +} + +test_tun() { + ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 + + ip netns exec "${ns1}" modprobe -q ipip + ip netns exec "${ns1}" ip link set tunl0 up + ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0 + ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0 + ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.default.send_redirects=0 + ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr + ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port} + ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1 + + ip netns exec "${ns2}" modprobe -q ipip + ip netns exec "${ns2}" ip link set tunl0 up + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 + ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 + + test_service +} + +run_tests() { + local errors= + + echo "Testing DR mode..." + cleanup + setup + test_dr + errors=$(( $errors + $? )) + + echo "Testing NAT mode..." + cleanup + setup + test_nat + errors=$(( $errors + $? )) + + echo "Testing Tunnel mode..." + cleanup + setup + test_tun + errors=$(( $errors + $? )) + + return $errors +} + +trap cleanup EXIT + +run_tests + +if [ $? -ne 0 ]; then + echo -e "$(basename $0): ${RED}FAIL${NC}" + exit 1 +fi +echo -e "$(basename $0): ${GREEN}PASS${NC}" +exit 0 diff --git a/tools/testing/selftests/net/netfilter/lib.sh b/tools/testing/selftests/net/netfilter/lib.sh new file mode 100644 index 000000000000..bedd35298e15 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/lib.sh @@ -0,0 +1,10 @@ +net_netfilter_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "$net_netfilter_dir/../lib.sh" + +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} diff --git a/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh new file mode 100755 index 000000000000..c6fdd2079f4d --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +checktool "conntrack --version" "run test without conntrack" +checktool "iptables --version" "run test without iptables" +checktool "ip6tables --version" "run test without ip6tables" + +modprobe -q tun +modprobe -q nf_conntrack +# echo 1 > /proc/sys/net/netfilter/nf_log_all_netns + +PDRILL_TIMEOUT=10 + +files=" +conntrack_ack_loss_stall.pkt +conntrack_inexact_rst.pkt +conntrack_syn_challenge_ack.pkt +conntrack_synack_old.pkt +conntrack_synack_reuse.pkt +conntrack_rst_invalid.pkt +" + +if ! packetdrill --dry_run --verbose "packetdrill/conntrack_ack_loss_stall.pkt";then + echo "SKIP: packetdrill not installed" + exit ${ksft_skip} +fi + +ret=0 + +run_packetdrill() +{ + filename="$1" + ipver="$2" + local mtu=1500 + + export NFCT_IP_VERSION="$ipver" + + if [ "$ipver" = "ipv4" ];then + export xtables="iptables" + elif [ "$ipver" = "ipv6" ];then + export xtables="ip6tables" + mtu=1520 + fi + + timeout "$PDRILL_TIMEOUT" unshare -n packetdrill --ip_version="$ipver" --mtu=$mtu \ + --tolerance_usecs=1000000 --non_fatal packet "$filename" +} + +run_one_test_file() +{ + filename="$1" + + for v in ipv4 ipv6;do + printf "%-50s(%s)%-20s" "$filename" "$v" "" + if run_packetdrill packetdrill/"$f" "$v";then + echo OK + else + echo FAIL + ret=1 + fi + done +} + +echo "Replaying packetdrill test cases:" +for f in $files;do + run_one_test_file packetdrill/"$f" +done + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh new file mode 100755 index 000000000000..1014551dd769 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test NAT source port clash resolution +# + +source lib.sh +ret=0 +socatpid=0 + +cleanup() +{ + [ "$socatpid" -gt 0 ] && kill "$socatpid" + + cleanup_all_ns +} + +checktool "socat -h" "run test without socat" +checktool "iptables --version" "run test without iptables" + +trap cleanup EXIT + +setup_ns ns1 ns2 + +# Connect the namespaces using a veth pair +ip link add name veth2 type veth peer name veth1 +ip link set netns "$ns1" dev veth1 +ip link set netns "$ns2" dev veth2 + +ip netns exec "$ns1" ip link set up dev lo +ip netns exec "$ns1" ip link set up dev veth1 +ip netns exec "$ns1" ip addr add 192.168.1.1/24 dev veth1 + +ip netns exec "$ns2" ip link set up dev lo +ip netns exec "$ns2" ip link set up dev veth2 +ip netns exec "$ns2" ip addr add 192.168.1.2/24 dev veth2 + +# Create a server in one namespace +ip netns exec "$ns1" socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 & +socatpid=$! + +# Restrict source port to just one so we don't have to exhaust +# all others. +ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000" + +# add a virtual IP using DNAT +ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 + +# ... and route it to the other namespace +ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1 + +# add a persistent connection from the other namespace +ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & + +sleep 1 + +# ip daddr:dport will be rewritten to 192.168.1.1 5201 +# NAT must reallocate source port 10000 because +# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use +echo test | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null +ret=$? + +# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). +if [ $ret -eq 0 ]; then + echo "PASS: socat can connect via NAT'd address" +else + echo "FAIL: socat cannot connect via NAT'd address" +fi + +# check sport clashres. +ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201 +ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201 + +sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null & + +# if connect succeeds, client closes instantly due to EOF on stdin. +# if connect hangs, it will time out after 5s. +echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null & +cpid2=$! + +time_then=$(date +%s) +wait $cpid2 +rv=$? +time_now=$(date +%s) + +# Check how much time has elapsed, expectation is for +# 'cpid2' to connect and then exit (and no connect delay). +delta=$((time_now - time_then)) + +if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then + echo "PASS: could connect to service via redirected ports" +else + echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)" + ret=1 +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nf_queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c new file mode 100644 index 000000000000..9e56b9d47037 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nf_queue.c @@ -0,0 +1,395 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <time.h> +#include <arpa/inet.h> + +#include <libmnl/libmnl.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_queue.h> + +struct options { + bool count_packets; + bool gso_enabled; + int verbose; + unsigned int queue_num; + unsigned int timeout; + uint32_t verdict; + uint32_t delay_ms; +}; + +static unsigned int queue_stats[5]; +static struct options opts; + +static void help(const char *p) +{ + printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); +} + +static int parse_attr_cb(const struct nlattr *attr, void *data) +{ + const struct nlattr **tb = data; + int type = mnl_attr_get_type(attr); + + /* skip unsupported attribute in user-space */ + if (mnl_attr_type_valid(attr, NFQA_MAX) < 0) + return MNL_CB_OK; + + switch (type) { + case NFQA_MARK: + case NFQA_IFINDEX_INDEV: + case NFQA_IFINDEX_OUTDEV: + case NFQA_IFINDEX_PHYSINDEV: + case NFQA_IFINDEX_PHYSOUTDEV: + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) { + perror("mnl_attr_validate"); + return MNL_CB_ERROR; + } + break; + case NFQA_TIMESTAMP: + if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC, + sizeof(struct nfqnl_msg_packet_timestamp)) < 0) { + perror("mnl_attr_validate2"); + return MNL_CB_ERROR; + } + break; + case NFQA_HWADDR: + if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC, + sizeof(struct nfqnl_msg_packet_hw)) < 0) { + perror("mnl_attr_validate2"); + return MNL_CB_ERROR; + } + break; + case NFQA_PAYLOAD: + break; + } + tb[type] = attr; + return MNL_CB_OK; +} + +static int queue_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nlattr *tb[NFQA_MAX+1] = { 0 }; + struct nfqnl_msg_packet_hdr *ph = NULL; + uint32_t id = 0; + + (void)data; + + mnl_attr_parse(nlh, sizeof(struct nfgenmsg), parse_attr_cb, tb); + if (tb[NFQA_PACKET_HDR]) { + ph = mnl_attr_get_payload(tb[NFQA_PACKET_HDR]); + id = ntohl(ph->packet_id); + + if (opts.verbose > 0) + printf("packet hook=%u, hwproto 0x%x", + ntohs(ph->hw_protocol), ph->hook); + + if (ph->hook >= 5) { + fprintf(stderr, "Unknown hook %d\n", ph->hook); + return MNL_CB_ERROR; + } + + if (opts.verbose > 0) { + uint32_t skbinfo = 0; + + if (tb[NFQA_SKB_INFO]) + skbinfo = ntohl(mnl_attr_get_u32(tb[NFQA_SKB_INFO])); + if (skbinfo & NFQA_SKB_CSUMNOTREADY) + printf(" csumnotready"); + if (skbinfo & NFQA_SKB_GSO) + printf(" gso"); + if (skbinfo & NFQA_SKB_CSUM_NOTVERIFIED) + printf(" csumnotverified"); + puts(""); + } + + if (opts.count_packets) + queue_stats[ph->hook]++; + } + + return MNL_CB_OK + id; +} + +static struct nlmsghdr * +nfq_build_cfg_request(char *buf, uint8_t command, int queue_num) +{ + struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf); + struct nfqnl_msg_config_cmd cmd = { + .command = command, + .pf = htons(AF_INET), + }; + struct nfgenmsg *nfg; + + nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG; + nlh->nlmsg_flags = NLM_F_REQUEST; + + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + + nfg->nfgen_family = AF_UNSPEC; + nfg->version = NFNETLINK_V0; + nfg->res_id = htons(queue_num); + + mnl_attr_put(nlh, NFQA_CFG_CMD, sizeof(cmd), &cmd); + + return nlh; +} + +static struct nlmsghdr * +nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num) +{ + struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf); + struct nfqnl_msg_config_params params = { + .copy_range = htonl(range), + .copy_mode = mode, + }; + struct nfgenmsg *nfg; + + nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG; + nlh->nlmsg_flags = NLM_F_REQUEST; + + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + nfg->nfgen_family = AF_UNSPEC; + nfg->version = NFNETLINK_V0; + nfg->res_id = htons(queue_num); + + mnl_attr_put(nlh, NFQA_CFG_PARAMS, sizeof(params), ¶ms); + + return nlh; +} + +static struct nlmsghdr * +nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd) +{ + struct nfqnl_msg_verdict_hdr vh = { + .verdict = htonl(verd), + .id = htonl(id), + }; + struct nlmsghdr *nlh; + struct nfgenmsg *nfg; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_VERDICT; + nlh->nlmsg_flags = NLM_F_REQUEST; + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + nfg->nfgen_family = AF_UNSPEC; + nfg->version = NFNETLINK_V0; + nfg->res_id = htons(queue_num); + + mnl_attr_put(nlh, NFQA_VERDICT_HDR, sizeof(vh), &vh); + + return nlh; +} + +static void print_stats(void) +{ + unsigned int last, total; + int i; + + total = 0; + last = queue_stats[0]; + + for (i = 0; i < 5; i++) { + printf("hook %d packets %08u\n", i, queue_stats[i]); + last = queue_stats[i]; + total += last; + } + + printf("%u packets total\n", total); +} + +struct mnl_socket *open_queue(void) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + unsigned int queue_num; + struct mnl_socket *nl; + struct nlmsghdr *nlh; + struct timeval tv; + uint32_t flags; + + nl = mnl_socket_open(NETLINK_NETFILTER); + if (nl == NULL) { + perror("mnl_socket_open"); + exit(EXIT_FAILURE); + } + + if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) { + perror("mnl_socket_bind"); + exit(EXIT_FAILURE); + } + + queue_num = opts.queue_num; + nlh = nfq_build_cfg_request(buf, NFQNL_CFG_CMD_BIND, queue_num); + + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + + nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num); + + flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0; + flags |= NFQA_CFG_F_UID_GID; + mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags)); + mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags)); + + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + + memset(&tv, 0, sizeof(tv)); + tv.tv_sec = opts.timeout; + if (opts.timeout && setsockopt(mnl_socket_get_fd(nl), + SOL_SOCKET, SO_RCVTIMEO, + &tv, sizeof(tv))) { + perror("setsockopt(SO_RCVTIMEO)"); + exit(EXIT_FAILURE); + } + + return nl; +} + +static void sleep_ms(uint32_t delay) +{ + struct timespec ts = { .tv_sec = delay / 1000 }; + + delay %= 1000; + + ts.tv_nsec = delay * 1000llu * 1000llu; + + nanosleep(&ts, NULL); +} + +static int mainloop(void) +{ + unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE; + struct mnl_socket *nl; + struct nlmsghdr *nlh; + unsigned int portid; + char *buf; + int ret; + + buf = malloc(buflen); + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + nl = open_queue(); + portid = mnl_socket_get_portid(nl); + + for (;;) { + uint32_t id; + + ret = mnl_socket_recvfrom(nl, buf, buflen); + if (ret == -1) { + if (errno == ENOBUFS || errno == EINTR) + continue; + + if (errno == EAGAIN) { + errno = 0; + ret = 0; + break; + } + + perror("mnl_socket_recvfrom"); + exit(EXIT_FAILURE); + } + + ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL); + if (ret < 0) { + perror("mnl_cb_run"); + exit(EXIT_FAILURE); + } + + id = ret - MNL_CB_OK; + if (opts.delay_ms) + sleep_ms(opts.delay_ms); + + nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict); + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + } + + mnl_socket_close(nl); + + return ret; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) { + switch (c) { + case 'c': + opts.count_packets = true; + break; + case 'h': + help(argv[0]); + exit(0); + break; + case 'q': + opts.queue_num = atoi(optarg); + if (opts.queue_num > 0xffff) + opts.queue_num = 0; + break; + case 'Q': + opts.verdict = atoi(optarg); + if (opts.verdict > 0xffff) { + fprintf(stderr, "Expected destination queue number\n"); + exit(1); + } + + opts.verdict <<= 16; + opts.verdict |= NF_QUEUE; + break; + case 'd': + opts.delay_ms = atoi(optarg); + if (opts.delay_ms == 0) { + fprintf(stderr, "Expected nonzero delay (in milliseconds)\n"); + exit(1); + } + break; + case 't': + opts.timeout = atoi(optarg); + break; + case 'G': + opts.gso_enabled = false; + break; + case 'v': + opts.verbose++; + break; + } + } + + if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) { + fprintf(stderr, "Cannot use same destination and source queue\n"); + exit(1); + } +} + +int main(int argc, char *argv[]) +{ + int ret; + + opts.verdict = NF_ACCEPT; + opts.gso_enabled = true; + + parse_opts(argc, argv); + + ret = mainloop(); + if (opts.count_packets) + print_stats(); + + return ret; +} diff --git a/tools/testing/selftests/net/netfilter/nft_audit.sh b/tools/testing/selftests/net/netfilter/nft_audit.sh new file mode 100755 index 000000000000..87f2b4c725aa --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_audit.sh @@ -0,0 +1,269 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that audit logs generated for nft commands are as expected. + +SKIP_RC=4 +RC=0 + +if [ -r /var/run/auditd.pid ];then + read pid < /var/run/auditd.pid + p=$(pgrep ^auditd$) + + if [ "$pid" -eq "$p" ]; then + echo "SKIP: auditd is running" + exit $SKIP_RC + fi +fi + +nft --version >/dev/null 2>&1 || { + echo "SKIP: missing nft tool" + exit $SKIP_RC +} + +# nft must be recent enough to support "reset" keyword. +nft --check -f /dev/stdin >/dev/null 2>&1 <<EOF +add table t +add chain t c +reset rules t c +EOF + +if [ "$?" -ne 0 ];then + echo -n "SKIP: nft reset feature test failed: " + nft --version + exit $SKIP_RC +fi + +# Run everything in a separate network namespace +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } + +# give other scripts a chance to finish - audit_logread sees all activity +sleep 1 + +logfile=$(mktemp) +rulefile=$(mktemp) +echo "logging into $logfile" +./audit_logread >"$logfile" & +logread_pid=$! +trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT +exec 3<"$logfile" + +lsplit='s/^\(.*\) entries=\([^ ]*\) \(.*\)$/pfx="\1"\nval="\2"\nsfx="\3"/' +summarize_logs() { + sum=0 + while read line; do + eval $(sed "$lsplit" <<< "$line") + [[ $sum -gt 0 ]] && { + [[ "$pfx $sfx" == "$tpfx $tsfx" ]] && { + let "sum += val" + continue + } + echo "$tpfx entries=$sum $tsfx" + } + tpfx="$pfx" + tsfx="$sfx" + sum=$val + done + echo "$tpfx entries=$sum $tsfx" +} + +do_test() { # (cmd, log) + echo -n "testing for cmd: $1 ... " + cat <&3 >/dev/null + $1 >/dev/null || exit 1 + sleep 0.1 + res=$(diff -a -u <(echo "$2") <(summarize_logs <&3)) + [ $? -eq 0 ] && { echo "OK"; return; } + echo "FAIL" + grep -v '^\(---\|+++\|@@\)' <<< "$res" + ((RC--)) +} + +nft flush ruleset + +# adding tables, chains and rules + +for table in t1 t2; do + do_test "nft add table $table" \ + "table=$table family=2 entries=1 op=nft_register_table" + + do_test "nft add chain $table c1" \ + "table=$table family=2 entries=1 op=nft_register_chain" + + do_test "nft add chain $table c2; add chain $table c3" \ + "table=$table family=2 entries=2 op=nft_register_chain" + + cmd="add rule $table c1 counter" + + do_test "nft $cmd" \ + "table=$table family=2 entries=1 op=nft_register_rule" + + do_test "nft $cmd; $cmd" \ + "table=$table family=2 entries=2 op=nft_register_rule" + + cmd="" + sep="" + for chain in c2 c3; do + for i in {1..3}; do + cmd+="$sep add rule $table $chain counter" + sep=";" + done + done + do_test "nft $cmd" \ + "table=$table family=2 entries=6 op=nft_register_rule" +done + +for ((i = 0; i < 500; i++)); do + echo "add rule t2 c3 counter accept comment \"rule $i\"" +done > "$rulefile" +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=500 op=nft_register_rule' + +# adding sets and elements + +settype='type inet_service; counter' +setelem='{ 22, 80, 443 }' +setblock="{ $settype; elements = $setelem; }" +do_test "nft add set t1 s $setblock" \ +"table=t1 family=2 entries=4 op=nft_register_set" + +do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \ +"table=t1 family=2 entries=5 op=nft_register_set" + +do_test "nft add element t1 s3 $setelem" \ +"table=t1 family=2 entries=3 op=nft_register_setelem" + +# adding counters + +do_test 'nft add counter t1 c1' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add counter t2 c1; add counter t2 c2' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +for ((i = 3; i <= 500; i++)); do + echo "add counter t2 c$i" +done > "$rulefile" +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=498 op=nft_register_obj' + +# adding/updating quotas + +do_test 'nft add quota t1 q1 { 10 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +for ((i = 3; i <= 500; i++)); do + echo "add quota t2 q$i { 10 bytes }" +done > "$rulefile" +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=498 op=nft_register_obj' + +# changing the quota value triggers obj update path +do_test 'nft add quota t1 q1 { 20 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +# resetting rules + +do_test 'nft reset rules t1 c2' \ +'table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules table t1' \ +'table=t1 family=2 entries=9 op=nft_reset_rule' + +do_test 'nft reset rules t2 c3' \ +'table=t2 family=2 entries=503 op=nft_reset_rule' + +do_test 'nft reset rules t2' \ +'table=t2 family=2 entries=509 op=nft_reset_rule' + +do_test 'nft reset rules' \ +'table=t1 family=2 entries=9 op=nft_reset_rule +table=t2 family=2 entries=509 op=nft_reset_rule' + +# resetting sets and elements + +elem=(22 ",80" ",443") +relem="" +for i in {1..3}; do + relem+="${elem[((i - 1))]}" + do_test "nft reset element t1 s { $relem }" \ + "table=t1 family=2 entries=$i op=nft_reset_setelem" +done + +do_test 'nft reset set t1 s' \ +'table=t1 family=2 entries=3 op=nft_reset_setelem' + +# resetting counters + +do_test 'nft reset counter t1 c1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset counters t1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset counters t2' \ +'table=t2 family=2 entries=500 op=nft_reset_obj' + +do_test 'nft reset counters' \ +'table=t1 family=2 entries=1 op=nft_reset_obj +table=t2 family=2 entries=500 op=nft_reset_obj' + +# resetting quotas + +do_test 'nft reset quota t1 q1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset quotas t1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset quotas t2' \ +'table=t2 family=2 entries=500 op=nft_reset_obj' + +do_test 'nft reset quotas' \ +'table=t1 family=2 entries=1 op=nft_reset_obj +table=t2 family=2 entries=500 op=nft_reset_obj' + +# deleting rules + +readarray -t handles < <(nft -a list chain t1 c1 | \ + sed -n 's/.*counter.* handle \(.*\)$/\1/p') + +do_test "nft delete rule t1 c1 handle ${handles[0]}" \ +'table=t1 family=2 entries=1 op=nft_unregister_rule' + +cmd='delete rule t1 c1 handle' +do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \ +'table=t1 family=2 entries=2 op=nft_unregister_rule' + +do_test 'nft flush chain t1 c2' \ +'table=t1 family=2 entries=3 op=nft_unregister_rule' + +do_test 'nft flush table t2' \ +'table=t2 family=2 entries=509 op=nft_unregister_rule' + +# deleting chains + +do_test 'nft delete chain t2 c2' \ +'table=t2 family=2 entries=1 op=nft_unregister_chain' + +# deleting sets and elements + +do_test 'nft delete element t1 s { 22 }' \ +'table=t1 family=2 entries=1 op=nft_unregister_setelem' + +do_test 'nft delete element t1 s { 80, 443 }' \ +'table=t1 family=2 entries=2 op=nft_unregister_setelem' + +do_test 'nft flush set t1 s2' \ +'table=t1 family=2 entries=3 op=nft_unregister_setelem' + +do_test 'nft delete set t1 s2' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +do_test 'nft delete set t1 s3' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +exit $RC diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh new file mode 100755 index 000000000000..efea93cf23d4 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -0,0 +1,1890 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# nft_concat_range.sh - Tests for sets with concatenation of ranged fields +# +# Copyright (c) 2019 Red Hat GmbH +# +# Author: Stefano Brivio <sbrivio@redhat.com> +# +# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031,SC2317 +# ^ Configuration and templates sourced with eval, counters reused in subshells + +source lib.sh + +# Available test groups: +# - reported_issues: check for issues that were reported in the past +# - correctness: check that packets match given entries, and only those +# - correctness_large: same but with additional non-matching entries +# - concurrency: attempt races between insertion, deletion and lookup +# - timeout: check that packets match entries until they expire +# - performance: estimate matching rate, compare with rbtree and hash baselines +TESTS="reported_issues correctness correctness_large concurrency timeout" + +[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}" + +# Set types, defined by TYPE_ variables below +TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto + net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp + net6_port_net6_port net_port_mac_proto_net" + +# Reported bugs, also described by TYPE_ variables below +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch" + +# List of possible paths to pktgen script from kernel tree for performance tests +PKTGEN_SCRIPT_PATHS=" + ../../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh + pktgen/pktgen_bench_xmit_mode_netif_receive.sh" + +# Definition of set types: +# display display text for test report +# type_spec nftables set type specifier +# chain_spec nftables type specifier for rules mapping to set +# dst call sequence of format_*() functions for destination fields +# src call sequence of format_*() functions for source fields +# start initial integer used to generate addresses and ports +# count count of entries to generate and match +# src_delta number summed to destination generator for source fields +# tools list of tools for correctness and timeout tests, any can be used +# proto L4 protocol of test packets +# +# race_repeat race attempts per thread, 0 disables concurrency test for type +# flood_tools list of tools for concurrency tests, any can be used +# flood_proto L4 protocol of test packets for concurrency tests +# flood_spec nftables type specifier for concurrency tests +# +# perf_duration duration of single pktgen injection test +# perf_spec nftables type specifier for performance tests +# perf_dst format_*() functions for destination fields in performance test +# perf_src format_*() functions for source fields in performance test +# perf_entries number of set entries for performance test +# perf_proto L3 protocol of test packets +TYPE_net_port=" +display net,port +type_spec ipv4_addr . inet_service +chain_spec ip daddr . udp dport +dst addr4 port +src +start 1 +count 5 +src_delta 2000 +tools sendip bash +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto udp +flood_spec ip daddr . udp dport + +perf_duration 5 +perf_spec ip daddr . udp dport +perf_dst addr4 port +perf_src +perf_entries 1000 +perf_proto ipv4 +" + +TYPE_port_net=" +display port,net +type_spec inet_service . ipv4_addr +chain_spec udp dport . ip daddr +dst port addr4 +src +start 1 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto udp +flood_spec udp dport . ip daddr + +perf_duration 5 +perf_spec udp dport . ip daddr +perf_dst port addr4 +perf_src +perf_entries 100 +perf_proto ipv4 +" + +TYPE_net6_port=" +display net6,port +type_spec ipv6_addr . inet_service +chain_spec ip6 daddr . udp dport +dst addr6 port +src +start 10 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp6 + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp6 +flood_spec ip6 daddr . udp dport + +perf_duration 5 +perf_spec ip6 daddr . udp dport +perf_dst addr6 port +perf_src +perf_entries 1000 +perf_proto ipv6 +" + +TYPE_port_proto=" +display port,proto +type_spec inet_service . inet_proto +chain_spec udp dport . meta l4proto +dst port proto +src +start 1 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 0 + +perf_duration 5 +perf_spec udp dport . meta l4proto +perf_dst port proto +perf_src +perf_entries 30000 +perf_proto ipv4 +" + +TYPE_net6_port_mac=" +display net6,port,mac +type_spec ipv6_addr . inet_service . ether_addr +chain_spec ip6 daddr . udp dport . ether saddr +dst addr6 port +src mac +start 10 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp6 + +race_repeat 0 + +perf_duration 5 +perf_spec ip6 daddr . udp dport . ether daddr +perf_dst addr6 port mac +perf_src +perf_entries 10 +perf_proto ipv6 +" + +TYPE_net6_port_mac_proto=" +display net6,port,mac,proto +type_spec ipv6_addr . inet_service . ether_addr . inet_proto +chain_spec ip6 daddr . udp dport . ether saddr . meta l4proto +dst addr6 port +src mac proto +start 10 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp6 + +race_repeat 0 + +perf_duration 5 +perf_spec ip6 daddr . udp dport . ether daddr . meta l4proto +perf_dst addr6 port mac proto +perf_src +perf_entries 1000 +perf_proto ipv6 +" + +TYPE_net_port_net=" +display net,port,net +type_spec ipv4_addr . inet_service . ipv4_addr +chain_spec ip daddr . udp dport . ip saddr +dst addr4 port +src addr4 +start 1 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp +flood_spec ip daddr . udp dport . ip saddr + +perf_duration 0 +" + +TYPE_net6_port_net6_port=" +display net6,port,net6,port +type_spec ipv6_addr . inet_service . ipv6_addr . inet_service +chain_spec ip6 daddr . udp dport . ip6 saddr . udp sport +dst addr6 port +src addr6 port +start 10 +count 5 +src_delta 2000 +tools sendip socat +proto udp6 + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp6 +flood_spec ip6 daddr . tcp dport . ip6 saddr . tcp sport + +perf_duration 0 +" + +TYPE_net_port_mac_proto_net=" +display net,port,mac,proto,net +type_spec ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr +chain_spec ip daddr . udp dport . ether saddr . meta l4proto . ip saddr +dst addr4 port +src mac proto addr4 +start 1 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net_mac=" +display net,mac +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 0 + +perf_duration 5 +perf_spec ip daddr . ether daddr +perf_dst addr4 mac +perf_src +perf_entries 1000 +perf_proto ipv4 +" + +TYPE_mac_net=" +display mac,net +type_spec ether_addr . ipv4_addr +chain_spec ether saddr . ip saddr +dst +src mac addr4 +start 1 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net_mac_icmp=" +display net,mac - ICMP +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 5 +src_delta 2000 +tools ping +proto icmp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net6_mac_icmp=" +display net6,mac - ICMPv6 +type_spec ipv6_addr . ether_addr +chain_spec ip6 daddr . ether saddr +dst addr6 +src mac +start 10 +count 50 +src_delta 2000 +tools ping +proto icmp6 + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net_port_proto_net=" +display net,port,proto,net +type_spec ipv4_addr . inet_service . inet_proto . ipv4_addr +chain_spec ip daddr . udp dport . meta l4proto . ip saddr +dst addr4 port proto +src addr4 +start 1 +count 5 +src_delta 2000 +tools sendip socat +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp +flood_spec ip daddr . tcp dport . meta l4proto . ip saddr + +perf_duration 0 +" + +# Definition of tests for bugs reported in the past: +# display display text for test report +TYPE_flush_remove_add=" +display Add two elements, flush, re-add +" + +TYPE_reload=" +display net,mac with reload +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 1 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net_port_proto_match=" +display net,port,proto +type_spec ipv4_addr . inet_service . inet_proto +chain_spec ip daddr . udp dport . meta l4proto +dst addr4 port proto +src +start 1 +count 9 +src_delta 9 +tools sendip bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_avx2_mismatch=" +display avx2 false match +type_spec inet_proto . ipv6_addr +chain_spec meta l4proto . ip6 daddr +dst proto addr6 +src +start 1 +count 1 +src_delta 1 +tools ping +proto icmp6 + +race_repeat 0 + +perf_duration 0 +" + + +# Set template for all tests, types and rules are filled in depending on test +set_template=' +flush ruleset + +table inet filter { + counter test { + packets 0 bytes 0 + } + + set test { + type ${type_spec} + flags interval,timeout + } + + chain input { + type filter hook prerouting priority 0; policy accept; + ${chain_spec} @test counter name \"test\" + } +} + +table netdev perf { + counter test { + packets 0 bytes 0 + } + + counter match { + packets 0 bytes 0 + } + + set test { + type ${type_spec} + flags interval + } + + set norange { + type ${type_spec} + } + + set noconcat { + type ${type_spec%% *} + flags interval + } + + chain test { + type filter hook ingress device veth_a priority 0; + } +} +' + +err_buf= +info_buf= + +# Append string to error buffer +err() { + err_buf="${err_buf}${1} +" +} + +# Append string to information buffer +info() { + info_buf="${info_buf}${1} +" +} + +# Flush error buffer to stdout +err_flush() { + printf "%s" "${err_buf}" + err_buf= +} + +# Flush information buffer to stdout +info_flush() { + printf "%s" "${info_buf}" + info_buf= +} + +# Setup veth pair: this namespace receives traffic, B generates it +setup_veth() { + ip netns add B + ip link add veth_a type veth peer name veth_b || return 1 + + ip link set veth_a up + ip link set veth_b netns B + + ip -n B link set veth_b up + + ip addr add dev veth_a 10.0.0.1 + ip route add default dev veth_a + + ip -6 addr add fe80::1/64 dev veth_a nodad + ip -6 addr add 2001:db8::1/64 dev veth_a nodad + ip -6 route add default dev veth_a + + ip -n B route add default dev veth_b + + ip -6 -n B addr add fe80::2/64 dev veth_b nodad + ip -6 -n B addr add 2001:db8::2/64 dev veth_b nodad + ip -6 -n B route add default dev veth_b + + B() { + ip netns exec B "$@" >/dev/null 2>&1 + } +} + +# Fill in set template and initialise set +setup_set() { + eval "echo \"${set_template}\"" | nft -f - +} + +# Check that at least one of the needed tools is available +check_tools() { + [ -z "${tools}" ] && return 0 + + __tools= + for tool in ${tools}; do + __tools="${__tools} ${tool}" + + command -v "${tool}" >/dev/null && return 0 + done + err "need one of:${__tools}, skipping" && return 1 +} + +# Set up function to send ICMP packets +setup_send_icmp() { + send_icmp() { + B ping -c1 -W1 "${dst_addr4}" >/dev/null 2>&1 + } +} + +# Set up function to send ICMPv6 packets +setup_send_icmp6() { + if command -v ping6 >/dev/null; then + send_icmp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + B ping6 -q -c1 -W1 "${dst_addr6}" + } + else + send_icmp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + B ping -q -6 -c1 -W1 "${dst_addr6}" + } + fi +} + +# Set up function to send single UDP packets on IPv4 +setup_send_udp() { + if command -v sendip >/dev/null; then + send_udp() { + [ -n "${src_port}" ] && src_port="-us ${src_port}" + [ -n "${dst_port}" ] && dst_port="-ud ${dst_port}" + [ -n "${src_addr4}" ] && src_addr4="-is ${src_addr4}" + + # shellcheck disable=SC2086 # sendip needs split options + B sendip -p ipv4 -p udp ${src_addr4} ${src_port} \ + ${dst_port} "${dst_addr4}" + + src_port= + dst_port= + src_addr4= + } + elif command -v socat -v >/dev/null; then + send_udp() { + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}" dev veth_b + __socatbind=",bind=${src_addr4}" + if [ -n "${src_port}" ];then + __socatbind="${__socatbind}:${src_port}" + fi + fi + + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + [ -z "${dst_port}" ] && dst_port=12345 + + echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:"$dst_addr4":"$dst_port""${__socatbind}" + + src_addr4= + src_port= + } + elif [ -z "$(bash -c 'type -p')" ]; then + send_udp() { + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + B ip route add default dev veth_b + fi + + B bash -c "echo > /dev/udp/${dst_addr4}/${dst_port}" + + if [ -n "${src_addr4}" ]; then + B ip addr del "${src_addr4}/16" dev veth_b + fi + src_addr4= + } + else + return 1 + fi +} + +# Set up function to send single UDP packets on IPv6 +setup_send_udp6() { + if command -v sendip >/dev/null; then + send_udp6() { + [ -n "${src_port}" ] && src_port="-us ${src_port}" + [ -n "${dst_port}" ] && dst_port="-ud ${dst_port}" + if [ -n "${src_addr6}" ]; then + src_addr6="-6s ${src_addr6}" + else + src_addr6="-6s 2001:db8::2" + fi + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + B sendip -p ipv6 -p udp ${src_addr6} ${src_port} \ + ${dst_port} "${dst_addr6}" + + src_port= + dst_port= + src_addr6= + } + elif command -v socat -v >/dev/null; then + send_udp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + __socatbind6= + + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + + __socatbind6=",bind=[${src_addr6}]" + + if [ -n "${src_port}" ] ;then + __socatbind6="${__socatbind6}:${src_port}" + fi + fi + + echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:["$dst_addr6"]:"$dst_port""${__socatbind6}" + } + elif [ -z "$(bash -c 'type -p')" ]; then + send_udp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + B ip addr add "${src_addr6}" dev veth_b nodad + B bash -c "echo > /dev/udp/${dst_addr6}/${dst_port}" + ip -6 addr del "${dst_addr6}" dev veth_a 2>/dev/null + } + else + return 1 + fi +} + +listener_ready() +{ + port="$1" + ss -lnt -o "sport = :$port" | grep -q "$port" +} + +# Set up function to send TCP traffic on IPv4 +setup_flood_tcp() { + if command -v iperf3 >/dev/null; then + flood_tcp() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_port="--cport ${src_port}" + fi + B ip route add default dev veth_b 2>/dev/null + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf3 -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B iperf3 -c "${dst_addr4}" ${dst_port} ${src_port} \ + ${src_addr4} -l16 -t 1000 + + src_addr4= + src_port= + dst_port= + } + elif command -v iperf >/dev/null; then + flood_tcp() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 2>/dev/null + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_addr4="${src_addr4}:${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B iperf -c "${dst_addr4}" ${dst_port} ${src_addr4} \ + -l20 -t 1000 + + src_addr4= + src_port= + dst_port= + } + elif command -v netperf >/dev/null; then + flood_tcp() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="10.0.0.2" + fi + if [ -n "${src_port}" ]; then + dst_port="${dst_port},${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + netserver -4 ${dst_port} -L "${dst_addr4}" \ + >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}" + + # shellcheck disable=SC2086 # this needs split options + B netperf -4 -H "${dst_addr4}" ${dst_port} \ + -L "${src_addr4}" -l 1000 -t TCP_STREAM + + src_addr4= + src_port= + dst_port= + } + else + return 1 + fi +} + +# Set up function to send TCP traffic on IPv6 +setup_flood_tcp6() { + if command -v iperf3 >/dev/null; then + flood_tcp6() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + src_addr6="-B ${src_addr6}" + else + src_addr6="-B 2001:db8::2" + fi + if [ -n "${src_port}" ]; then + src_port="--cport ${src_port}" + fi + B ip route add default dev veth_b + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf3 -s -DB "${dst_addr6}" ${dst_port} >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}" + + # shellcheck disable=SC2086 # this needs split options + B iperf3 -c "${dst_addr6}" ${dst_port} \ + ${src_port} ${src_addr6} -l16 -t 1000 + + src_addr6= + src_port= + dst_port= + } + elif command -v iperf >/dev/null; then + flood_tcp6() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + src_addr6="-B ${src_addr6}" + else + src_addr6="-B 2001:db8::2" + fi + if [ -n "${src_port}" ]; then + src_addr6="${src_addr6}:${src_port}" + fi + B ip route add default dev veth_b + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf -s -VDB "${dst_addr6}" ${dst_port} >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B iperf -c "${dst_addr6}" -V ${dst_port} \ + ${src_addr6} -l1 -t 1000 + + src_addr6= + src_port= + dst_port= + } + elif command -v netperf >/dev/null; then + flood_tcp6() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + else + src_addr6="2001:db8::2" + fi + if [ -n "${src_port}" ]; then + dst_port="${dst_port},${src_port}" + fi + B ip route add default dev veth_b + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + netserver -6 ${dst_port} -L "${dst_addr6}" \ + >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B netperf -6 -H "${dst_addr6}" ${dst_port} \ + -L "${src_addr6}" -l 1000 -t TCP_STREAM + + src_addr6= + src_port= + dst_port= + } + else + return 1 + fi +} + +# Set up function to send UDP traffic on IPv4 +setup_flood_udp() { + if command -v iperf3 >/dev/null; then + flood_udp() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 2>/dev/null + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_port="--cport ${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf3 -s -DB "${dst_addr4}" ${dst_port} + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B iperf3 -u -c "${dst_addr4}" -Z -b 100M -l16 -t1000 \ + ${dst_port} ${src_port} ${src_addr4} + + src_addr4= + src_port= + dst_port= + } + elif command -v iperf >/dev/null; then + flood_udp() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_addr4="${src_addr4}:${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf -u -sDB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B iperf -u -c "${dst_addr4}" -b 100M -l1 -t1000 \ + ${dst_port} ${src_addr4} + + src_addr4= + src_port= + dst_port= + } + elif command -v netperf >/dev/null; then + flood_udp() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="10.0.0.2" + fi + if [ -n "${src_port}" ]; then + dst_port="${dst_port},${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + netserver -4 ${dst_port} -L "${dst_addr4}" \ + >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B netperf -4 -H "${dst_addr4}" ${dst_port} \ + -L "${src_addr4}" -l 1000 -t UDP_STREAM + + src_addr4= + src_port= + dst_port= + } + else + return 1 + fi +} + +# Find pktgen script and set up function to start pktgen injection +setup_perf() { + for pktgen_script_path in ${PKTGEN_SCRIPT_PATHS} __notfound; do + command -v "${pktgen_script_path}" >/dev/null && break + done + [ "${pktgen_script_path}" = "__notfound" ] && return 1 + + perf_ipv4() { + ${pktgen_script_path} -s80 \ + -i veth_a -d "${dst_addr4}" -p "${dst_port}" \ + -m "${dst_mac}" \ + -t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null & + perf_pid=$! + } + perf_ipv6() { + IP6=6 ${pktgen_script_path} -s100 \ + -i veth_a -d "${dst_addr6}" -p "${dst_port}" \ + -m "${dst_mac}" \ + -t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null & + perf_pid=$! + } +} + +# Clean up before each test +cleanup() { + nft reset counter inet filter test >/dev/null 2>&1 + nft flush ruleset >/dev/null 2>&1 + ip link del dummy0 2>/dev/null + ip route del default 2>/dev/null + ip -6 route del default 2>/dev/null + ip netns pids B 2>/dev/null | xargs kill 2>/dev/null + ip netns del B 2>/dev/null + ip link del veth_a 2>/dev/null + timeout= + killall iperf3 2>/dev/null + killall iperf 2>/dev/null + killall netperf 2>/dev/null + killall netserver 2>/dev/null +} + +cleanup_exit() { + cleanup + rm -f "$tmp" +} + +# Entry point for setup functions +setup() { + if [ "$(id -u)" -ne 0 ]; then + echo " need to run as root" + exit ${ksft_skip} + fi + + cleanup + check_tools || return 1 + for arg do + if ! eval setup_"${arg}"; then + err " ${arg} not supported" + return 1 + fi + done +} + +# Format integer into IPv4 address, summing 10.0.0.5 (arbitrary) to it +format_addr4() { + a=$((${1} + 16777216 * 10 + 5)) + printf "%i.%i.%i.%i" \ + "$((a / 16777216))" "$((a % 16777216 / 65536))" \ + "$((a % 65536 / 256))" "$((a % 256))" +} + +# Format integer into IPv6 address, summing 2001:db8:: to it +format_addr6() { + printf "2001:db8::%04x:%04x" "$((${1} / 65536))" "$((${1} % 65536))" +} + +# Format integer into EUI-48 address, summing 00:01:00:00:00:00 to it +format_mac() { + printf "00:01:%02x:%02x:%02x:%02x" \ + "$((${1} / 16777216))" "$((${1} % 16777216 / 65536))" \ + "$((${1} % 65536 / 256))" "$((${1} % 256))" +} + +# Format integer into port, avoid 0 port +format_port() { + printf "%i" "$((${1} % 65534 + 1))" +} + +# Drop suffixed '6' from L4 protocol, if any +format_proto() { + printf "%s" "${proto}" | tr -d 6 +} + +# Format destination and source fields into nft concatenated type +format() { + __start= + __end= + __expr="{ " + + for f in ${dst}; do + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + + __start="$(eval format_"${f}" "${start}")" + __end="$(eval format_"${f}" "${end}")" + + if [ "${f}" = "proto" ]; then + __expr="${__expr}${__start}" + else + __expr="${__expr}${__start}-${__end}" + fi + done + for f in ${src}; do + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + + __start="$(eval format_"${f}" "${srcstart}")" + __end="$(eval format_"${f}" "${srcend}")" + + if [ "${f}" = "proto" ]; then + __expr="${__expr}${__start}" + else + __expr="${__expr}${__start}-${__end}" + fi + done + + if [ -n "${timeout}" ]; then + echo "${__expr} timeout ${timeout}s }" + else + echo "${__expr} }" + fi +} + +# Format destination and source fields into nft type, start element only +format_norange() { + __expr="{ " + + for f in ${dst}; do + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + + __expr="${__expr}$(eval format_"${f}" "${start}")" + done + for f in ${src}; do + __expr="${__expr} . $(eval format_"${f}" "${start}")" + done + + echo "${__expr} }" +} + +# Format first destination field into nft type +format_noconcat() { + for f in ${dst}; do + __start="$(eval format_"${f}" "${start}")" + __end="$(eval format_"${f}" "${end}")" + + if [ "${f}" = "proto" ]; then + echo "{ ${__start} }" + else + echo "{ ${__start}-${__end} }" + fi + return + done +} + +# Add single entry to 'test' set in 'inet filter' table +add() { + if ! nft add element inet filter test "${1}"; then + err "Failed to add ${1} given ruleset:" + err "$(nft -a list ruleset)" + return 1 + fi +} + +# Format and output entries for sets in 'netdev perf' table +add_perf() { + if [ "${1}" = "test" ]; then + echo "add element netdev perf test $(format)" + elif [ "${1}" = "norange" ]; then + echo "add element netdev perf norange $(format_norange)" + elif [ "${1}" = "noconcat" ]; then + echo "add element netdev perf noconcat $(format_noconcat)" + fi +} + +# Add single entry to 'norange' set in 'netdev perf' table +add_perf_norange() { + if ! nft add element netdev perf norange "${1}"; then + err "Failed to add ${1} given ruleset:" + err "$(nft -a list ruleset)" + return 1 + fi +} + +# Add single entry to 'noconcat' set in 'netdev perf' table +add_perf_noconcat() { + if ! nft add element netdev perf noconcat "${1}"; then + err "Failed to add ${1} given ruleset:" + err "$(nft -a list ruleset)" + return 1 + fi +} + +# Delete single entry from set +del() { + if ! nft delete element inet filter test "${1}"; then + err "Failed to delete ${1} given ruleset:" + err "$(nft -a list ruleset)" + return 1 + fi +} + +# Return packet count from 'test' counter in 'inet filter' table +count_packets() { + found=0 + for token in $(nft list counter inet filter test); do + [ ${found} -eq 1 ] && echo "${token}" && return + [ "${token}" = "packets" ] && found=1 + done +} + +# Return packet count from 'test' counter in 'netdev perf' table +count_perf_packets() { + found=0 + for token in $(nft list counter netdev perf test); do + [ ${found} -eq 1 ] && echo "${token}" && return + [ "${token}" = "packets" ] && found=1 + done +} + +# Set MAC addresses, send traffic according to specifier +flood() { + ip link set veth_a address "$(format_mac "${1}")" + ip -n B link set veth_b address "$(format_mac "${2}")" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval flood_\$proto +} + +# Set MAC addresses, start pktgen injection +perf() { + dst_mac="$(format_mac "${1}")" + ip link set veth_a address "${dst_mac}" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval perf_\$perf_proto +} + +# Set MAC addresses, send single packet, check that it matches, reset counter +send_match() { + ip link set veth_a address "$(format_mac "${1}")" + ip -n B link set veth_b address "$(format_mac "${2}")" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval send_\$proto + if [ "$(count_packets)" != "1" ]; then + err "${proto} packet to:" + err " $(for f in ${dst}; do + eval format_\$f "${1}"; printf ' '; done)" + err "from:" + err " $(for f in ${src}; do + eval format_\$f "${2}"; printf ' '; done)" + err "should have matched ruleset:" + err "$(nft -a list ruleset)" + return 1 + fi + nft reset counter inet filter test >/dev/null +} + +# Set MAC addresses, send single packet, check that it doesn't match +send_nomatch() { + ip link set veth_a address "$(format_mac "${1}")" + ip -n B link set veth_b address "$(format_mac "${2}")" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval send_\$proto + if [ "$(count_packets)" != "0" ]; then + err "${proto} packet to:" + err " $(for f in ${dst}; do + eval format_\$f "${1}"; printf ' '; done)" + err "from:" + err " $(for f in ${src}; do + eval format_\$f "${2}"; printf ' '; done)" + err "should not have matched ruleset:" + err "$(nft -a list ruleset)" + return 1 + fi +} + +# Correctness test template: +# - add ranged element, check that packets match it +# - check that packets outside range don't match it +# - remove some elements, check that packets don't match anymore +test_correctness_main() { + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 + + # Delete elements now and then + if [ $((i % 3)) -eq 0 ]; then + del "$(format)" || return 1 + for j in $(seq "$start" \ + $((range_size / 2 + 1)) ${end}); do + send_nomatch "${j}" $((j + src_delta)) \ + || return 1 + done + fi + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done +} + +test_correctness() { + setup veth send_"${proto}" set || return ${ksft_skip} + + test_correctness_main +} + +# Repeat the correctness tests, but add extra non-matching entries. +# This exercises the more compact '4 bit group' representation that +# gets picked when the default 8-bit representation exceed +# NFT_PIPAPO_LT_SIZE_HIGH bytes of memory. +# See usage of NFT_PIPAPO_LT_SIZE_HIGH in pipapo_lt_bits_adjust(). +# +# The format() helper is way too slow when generating lots of +# entries so its not used here. +test_correctness_large() { + setup veth send_"${proto}" set || return ${ksft_skip} + # number of dummy (filler) entries to add. + local dcount=16385 + + ( + echo -n "add element inet filter test { " + + case "$type_spec" in + "ether_addr . ipv4_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_mac $((1000000 + i)) + printf ". 172.%i.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + done + ;; + "inet_proto . ipv6_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "%i . " $((RANDOM%256)) + format_addr6 $((1000000 + i)) + done + ;; + "inet_service . inet_proto") + # smaller key sizes, need more entries to hit the + # 4-bit threshold. + dcount=65536 + for i in $(seq 1 $dcount); do + local proto=$((RANDOM%256)) + + # Test uses UDP to match, as it also fails when matching + # an entry that doesn't exist, so skip 'udp' entries + # to not trigger a wrong failure. + [ $proto -eq 17 ] && proto=18 + [ $i -gt 1 ] && echo ", " + printf "%i . %i " $(((i%65534) + 1)) $((proto)) + done + ;; + "inet_service . ipv4_addr") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "%i . 172.%i.%i.%i " $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + done + ;; + "ipv4_addr . ether_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + format_mac $((1000000 + i)) + done + ;; + "ipv4_addr . inet_service") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) + done + ;; + "ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr") + dcount=65536 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) + format_mac $((1000000 + i)) + printf ". %i . 192.168.%i.%i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + done + ;; + "ipv4_addr . inet_service . inet_proto") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . %i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) + done + ;; + "ipv4_addr . inet_service . inet_proto . ipv4_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256)) + done + ;; + "ipv4_addr . inet_service . ipv4_addr") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) + done + ;; + "ipv6_addr . ether_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . " + format_mac $((1000000 + i)) + done + ;; + "ipv6_addr . inet_service") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1))" + done + ;; + "ipv6_addr . inet_service . ether_addr") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1)) . " + format_mac $((i + 1000000)) + done + ;; + "ipv6_addr . inet_service . ether_addr . inet_proto") + dcount=65536 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1)) . " + format_mac $((i + 1000000)) + echo -n " . $((RANDOM%256))" + done + ;; + "ipv6_addr . inet_service . ipv6_addr . inet_service") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1)) . " + format_addr6 $((i + 2123456)) + echo -n " . $((RANDOM%256))" + done + ;; + *) + "Unhandled $type_spec" + return 1 + esac + echo -n "}" + + ) | nft -f - || return 1 + + test_correctness_main +} + +# Concurrency test template: +# - add all the elements +# - start a thread for each physical thread that: +# - adds all the elements +# - flushes the set +# - adds all the elements +# - flushes the entire ruleset +# - adds the set back +# - adds all the elements +# - delete all the elements +test_concurrency() { + proto=${flood_proto} + tools=${flood_tools} + chain_spec=${flood_spec} + setup veth flood_"${proto}" set || return ${ksft_skip} + + range_size=1 + cstart=${start} + flood_pids= + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + + flood "${i}" $((i + src_delta)) & flood_pids="${flood_pids} $!" + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + sleep $((RANDOM%10)) + + pids= + for c in $(seq 1 "$(nproc)"); do ( + for r in $(seq 1 "${race_repeat}"); do + range_size=1 + + # $start needs to be local to this subshell + # shellcheck disable=SC2030 + start=${cstart} + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush inet filter test 2>/dev/null + + range_size=1 + start=${cstart} + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush ruleset + setup set 2>/dev/null + + range_size=1 + start=${cstart} + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + range_size=1 + start=${cstart} + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + del "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + done + ) & pids="${pids} $!" + done + + # shellcheck disable=SC2046,SC2086 # word splitting wanted here + wait $(for pid in ${pids}; do echo ${pid}; done) + # shellcheck disable=SC2046,SC2086 + kill $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null + # shellcheck disable=SC2046,SC2086 + wait $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null + + return 0 +} + +# Timeout test template: +# - add all the elements with 3s timeout while checking that packets match +# - wait 3s after the last insertion, check that packets don't match any entry +test_timeout() { + setup veth send_"${proto}" set || return ${ksft_skip} + + timeout=3 + + [ "$KSFT_MACHINE_SLOW" = "yes" ] && timeout=8 + + range_size=1 + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + + for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + sleep $timeout + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do + send_nomatch "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done +} + +# Performance test template: +# - add concatenated ranged entries +# - add non-ranged concatenated entries (for hash set matching rate baseline) +# - add ranged entries with first field only (for rbhash baseline) +# - start pktgen injection directly on device rx path of this namespace +# - measure drop only rate, hash and rbtree baselines, then matching rate +test_performance() { + chain_spec=${perf_spec} + dst="${perf_dst}" + src="${perf_src}" + setup veth perf set || return ${ksft_skip} + + first=${start} + range_size=1 + for set in test norange noconcat; do + start=${first} + for i in $(seq "$start" $((start + perf_entries))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + elif [ "$start" -eq "$end" ]; then + end=$((start + 1)) + fi + + add_perf ${set} + + start=$((end + range_size)) + done > "${tmp}" + nft -f "${tmp}" + done + + perf $((end - 1)) "$srcstart" + + sleep 2 + + nft add rule netdev perf test counter name \"test\" drop + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + info " baseline (drop from netdev hook): ${pps}pps" + handle="$(nft -a list chain netdev perf test | grep counter)" + handle="${handle##* }" + nft delete rule netdev perf test handle "${handle}" + + nft add rule "netdev perf test ${chain_spec} @norange \ + counter name \"test\" drop" + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + info " baseline hash (non-ranged entries): ${pps}pps" + handle="$(nft -a list chain netdev perf test | grep counter)" + handle="${handle##* }" + nft delete rule netdev perf test handle "${handle}" + + nft add rule "netdev perf test ${chain_spec%%. *} @noconcat \ + counter name \"test\" drop" + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + info " baseline rbtree (match on first field only): ${pps}pps" + handle="$(nft -a list chain netdev perf test | grep counter)" + handle="${handle##* }" + nft delete rule netdev perf test handle "${handle}" + + nft add rule "netdev perf test ${chain_spec} @test \ + counter name \"test\" drop" + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + p5="$(printf %5s "${perf_entries}")" + info " set with ${p5} full, ranged entries: ${pps}pps" + kill "${perf_pid}" +} + +test_bug_flush_remove_add() { + rounds=100 + [ "$KSFT_MACHINE_SLOW" = "yes" ] && rounds=10 + + set_cmd='{ set s { type ipv4_addr . inet_service; flags interval; }; }' + elem1='{ 10.0.0.1 . 22-25, 10.0.0.1 . 10-20 }' + elem2='{ 10.0.0.1 . 10-20, 10.0.0.1 . 22-25 }' + for i in $(seq 1 $rounds); do + nft add table t "$set_cmd" || return ${ksft_skip} + nft add element t s "$elem1" 2>/dev/null || return 1 + nft flush set t s 2>/dev/null || return 1 + nft add element t s "$elem2" 2>/dev/null || return 1 + done + nft flush ruleset +} + +# - add ranged element, check that packets match it +# - reload the set, check packets still match +test_bug_reload() { + setup veth send_"${proto}" set || return ${ksft_skip} + rstart=${start} + + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + # check kernel does allocate pcpu sctrach map + # for reload with no elemet add/delete + ( echo flush set inet filter test ; + nft list set inet filter test ) | nft -f - + + start=${rstart} + range_size=1 + + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush ruleset +} + +# - add ranged element, check that packets match it +# - delete element again, check it is gone +test_bug_net_port_proto_match() { + setup veth send_"${proto}" set || return ${ksft_skip} + rstart=${start} + + range_size=1 + for i in $(seq 1 10); do + for j in $(seq 1 20) ; do + elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))") + + nft "add element inet filter test { $elem }" || return 1 + nft "get element inet filter test { $elem }" | grep -q "$elem" + if [ $? -ne 0 ];then + local got=$(nft "get element inet filter test { $elem }") + err "post-add: should have returned $elem but got $got" + return 1 + fi + done + done + + # recheck after set was filled + for i in $(seq 1 10); do + for j in $(seq 1 20) ; do + elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))") + + nft "get element inet filter test { $elem }" | grep -q "$elem" + if [ $? -ne 0 ];then + local got=$(nft "get element inet filter test { $elem }") + err "post-fill: should have returned $elem but got $got" + return 1 + fi + done + done + + # random del and re-fetch + for i in $(seq 1 10); do + for j in $(seq 1 20) ; do + local rnd=$((RANDOM%10)) + local got="" + + elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))") + if [ $rnd -gt 0 ];then + continue + fi + + nft "delete element inet filter test { $elem }" + got=$(nft "get element inet filter test { $elem }" 2>/dev/null) + if [ $? -eq 0 ];then + err "post-delete: query for $elem returned $got instead of error." + return 1 + fi + done + done + + nft flush ruleset +} + +test_bug_avx2_mismatch() +{ + setup veth send_"${proto}" set || return ${ksft_skip} + + local a1="fe80:dead:01ff:0a02:0b03:6007:8009:a001" + local a2="fe80:dead:01fe:0a02:0b03:6007:8009:a001" + + nft "add element inet filter test { icmpv6 . $a1 }" + + dst_addr6="$a2" + send_icmp6 + + if [ "$(count_packets)" -gt "0" ]; then + err "False match for $a2" + return 1 + fi +} + +test_reported_issues() { + eval test_bug_"${subtest}" +} + +# Run everything in a separate network namespace +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } +tmp="$(mktemp)" +trap cleanup_exit EXIT + +# Entry point for test runs +passed=0 +for name in ${TESTS}; do + printf "TEST: %s\n" "$(echo "$name" | tr '_' ' ')" + if [ "${name}" = "reported_issues" ]; then + SUBTESTS="${BUGS}" + else + SUBTESTS="${TYPES}" + fi + + for subtest in ${SUBTESTS}; do + eval desc=\$TYPE_"${subtest}" + IFS=' +' + for __line in ${desc}; do + # shellcheck disable=SC2086 + eval ${__line%% *}=\"${__line##* }\"; + done + IFS=' +' + + if [ "${name}" = "concurrency" ] && \ + [ "${race_repeat}" = "0" ]; then + continue + fi + if [ "${name}" = "performance" ] && \ + [ "${perf_duration}" = "0" ]; then + continue + fi + + [ "$KSFT_MACHINE_SLOW" = "yes" ] && count=1 + + printf " %-32s " "${display}" + tthen=$(date +%s) + eval test_"${name}" + ret=$? + + tnow=$(date +%s) + printf "%5ds%-30s" $((tnow-tthen)) + + if [ $ret -eq 0 ]; then + printf "[ OK ]\n" + info_flush + passed=$((passed + 1)) + elif [ $ret -eq 1 ]; then + printf "[FAIL]\n" + err_flush + exit 1 + elif [ $ret -eq ${ksft_skip} ]; then + printf "[SKIP]\n" + err_flush + fi + done +done + +[ ${passed} -eq 0 ] && exit ${ksft_skip} || exit 0 diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh new file mode 100755 index 000000000000..5d276995a5c5 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# + +source lib.sh + +[ "$KSFT_MACHINE_SLOW" = yes ] && exit ${ksft_skip} + +NFT_CONCAT_RANGE_TESTS="performance" exec ./nft_concat_range.sh diff --git a/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh new file mode 100755 index 000000000000..abcaa7337197 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh @@ -0,0 +1,171 @@ +#!/bin/bash +# +# This tests connection tracking helper assignment: +# 1. can attach ftp helper to a connection from nft ruleset. +# 2. auto-assign still works. +# +# Kselftest framework requirement - SKIP code is 4. + +source lib.sh + +ret=0 + +testipv6=1 + +checktool "socat -h" "run test without socat" +checktool "conntrack --version" "run test without conntrack" +checktool "nft --version" "run test without nft" + +cleanup() +{ + ip netns pids "$ns1" | xargs kill 2>/dev/null + + ip netns del "$ns1" + ip netns del "$ns2" +} + +trap cleanup EXIT + +setup_ns ns1 ns2 + +if ! ip link add veth0 netns "$ns1" type veth peer name veth0 netns "$ns2" > /dev/null 2>&1;then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi + +ip -net "$ns1" link set veth0 up +ip -net "$ns2" link set veth0 up + +ip -net "$ns1" addr add 10.0.1.1/24 dev veth0 +ip -net "$ns1" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$ns2" addr add 10.0.1.2/24 dev veth0 +ip -net "$ns2" addr add dead:1::2/64 dev veth0 nodad + +load_ruleset_family() { + local family=$1 + local ns=$2 + +ip netns exec "$ns" nft -f - <<EOF +table $family raw { + ct helper ftp { + type "ftp" protocol tcp + } + chain pre { + type filter hook prerouting priority 0; policy accept; + tcp dport 2121 ct helper set "ftp" + } + chain output { + type filter hook output priority 0; policy accept; + tcp dport 2121 ct helper set "ftp" + } +} +EOF + return $? +} + +check_for_helper() +{ + local netns=$1 + local message=$2 + local port=$3 + + if echo "$message" |grep -q 'ipv6';then + local family="ipv6" + else + local family="ipv4" + fi + + if ! ip netns exec "$netns" conntrack -L -f $family -p tcp --dport "$port" 2> /dev/null |grep -q 'helper=ftp';then + if [ "$autoassign" -eq 0 ] ;then + echo "FAIL: ${netns} did not show attached helper $message" 1>&2 + ret=1 + else + echo "PASS: ${netns} did not show attached helper $message" 1>&2 + fi + else + if [ "$autoassign" -eq 0 ] ;then + echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2 + else + echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2 + ret=1 + fi + fi + + return 0 +} + +listener_ready() +{ + ns="$1" + port="$2" + proto="$3" + ss -N "$ns" -lnt -o "sport = :$port" | grep -q "$port" +} + +test_helper() +{ + local port=$1 + local autoassign=$2 + + if [ "$autoassign" -eq 0 ] ;then + msg="set via ruleset" + else + msg="auto-assign" + fi + + ip netns exec "$ns2" socat -t 3 -u -4 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null & + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" "$port" "-4" + + ip netns exec "$ns1" socat -u -4 STDIN TCP:10.0.1.2:"$port" < /dev/null > /dev/null + + check_for_helper "$ns1" "ip $msg" "$port" "$autoassign" + check_for_helper "$ns2" "ip $msg" "$port" "$autoassign" + + if [ $testipv6 -eq 0 ] ;then + return 0 + fi + + ip netns exec "$ns1" conntrack -F 2> /dev/null + ip netns exec "$ns2" conntrack -F 2> /dev/null + + ip netns exec "$ns2" socat -t 3 -u -6 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null & + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" "$port" "-6" + + ip netns exec "$ns1" socat -t 3 -u -6 STDIN TCP:"[dead:1::2]":"$port" < /dev/null > /dev/null + + check_for_helper "$ns1" "ipv6 $msg" "$port" + check_for_helper "$ns2" "ipv6 $msg" "$port" +} + +if ! load_ruleset_family ip "$ns1"; then + echo "FAIL: ${ns1} cannot load ip ruleset" 1>&2 + exit 1 +fi + +if ! load_ruleset_family ip6 "$ns1"; then + echo "SKIP: ${ns1} cannot load ip6 ruleset" 1>&2 + testipv6=0 +fi + +if ! load_ruleset_family inet "${ns2}"; then + echo "SKIP: ${ns1} cannot load inet ruleset" 1>&2 + if ! load_ruleset_family ip "${ns2}"; then + echo "FAIL: ${ns2} cannot load ip ruleset" 1>&2 + exit 1 + fi + + if [ "$testipv6" -eq 1 ] ;then + if ! load_ruleset_family ip6 "$ns2"; then + echo "FAIL: ${ns2} cannot load ip6 ruleset" 1>&2 + exit 1 + fi + fi +fi + +test_helper 2121 0 +ip netns exec "$ns1" sysctl -qe 'net.netfilter.nf_conntrack_helper=1' +ip netns exec "$ns2" sysctl -qe 'net.netfilter.nf_conntrack_helper=1' +test_helper 21 1 + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh new file mode 100755 index 000000000000..9929a9ffef65 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_fib.sh @@ -0,0 +1,847 @@ +#!/bin/bash +# +# This tests the fib expression. +# +# Kselftest framework requirement - SKIP code is 4. +# +# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99 +# dead:1::99 dead:1::1 dead:2::1 dead:2::99 +# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2 + +source lib.sh + +ret=0 + +timeout=4 + +log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) + +cleanup() +{ + cleanup_all_ns + + [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns +} + +checktool "nft --version" "run test without nft" + +setup_ns nsrouter ns1 ns2 + +trap cleanup EXIT + +if dmesg | grep -q ' nft_rpfilter: ';then + dmesg -c | grep ' nft_rpfilter: ' + echo "WARN: a previous test run has failed" 1>&2 +fi + +sysctl -q net.netfilter.nf_log_all_netns=1 + +load_ruleset() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority 0; policy accept; + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop + } +} +EOF +} + +load_input_ruleset() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain input { + type filter hook input priority 0; policy accept; + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop + } +} +EOF +} + +load_pbr_ruleset() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain forward { + type filter hook forward priority raw; + fib saddr . iif oif gt 0 accept + log drop + } +} +EOF +} + +load_type_ruleset() { + local netns=$1 + + for family in ip ip6;do +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table $family filter { + chain type_match_in { + fib daddr type local counter comment "daddr configured on other iface" + fib daddr . iif type local counter comment "daddr configured on iif" + fib daddr type unicast counter comment "daddr not local" + fib daddr . iif type unicast counter comment "daddr not configured on iif" + } + + chain type_match_out { + fib daddr type unicast counter + fib daddr . oif type unicast counter + fib daddr type local counter + fib daddr . oif type local counter + } + + chain prerouting { + type filter hook prerouting priority 0; + icmp type echo-request counter jump type_match_in + icmpv6 type echo-request counter jump type_match_in + } + + chain input { + type filter hook input priority 0; + icmp type echo-request counter jump type_match_in + icmpv6 type echo-request counter jump type_match_in + } + + chain forward { + type filter hook forward priority 0; + icmp type echo-request counter jump type_match_in + icmpv6 type echo-request counter jump type_match_in + } + + chain output { + type filter hook output priority 0; + icmp type echo-request counter jump type_match_out + icmpv6 type echo-request counter jump type_match_out + } + + chain postrouting { + type filter hook postrouting priority 0; + icmp type echo-request counter jump type_match_out + icmpv6 type echo-request counter jump type_match_out + } +} +EOF +done +} + +reload_type_ruleset() { + ip netns exec "$1" nft flush table ip filter + ip netns exec "$1" nft flush table ip6 filter + load_type_ruleset "$1" +} + +check_fib_type_counter_family() { + local family="$1" + local want="$2" + local ns="$3" + local chain="$4" + local what="$5" + local errmsg="$6" + + if ! ip netns exec "$ns" nft list chain "$family" filter "$chain" | grep "$what" | grep -q "packets $want";then + echo "Netns $ns $family fib type counter doesn't match expected packet count of $want for $what $errmsg" 1>&2 + ip netns exec "$ns" nft list chain "$family" filter "$chain" + ret=1 + return 1 + fi + + return 0 +} + +check_fib_type_counter() { + check_fib_type_counter_family "ip" "$@" || return 1 + check_fib_type_counter_family "ip6" "$@" || return 1 +} + +load_ruleset_count() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority 0; policy accept; + ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop + ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop + } +} +EOF +} + +check_drops() { + if dmesg | grep -q ' nft_rpfilter: ';then + dmesg | grep ' nft_rpfilter: ' + echo "FAIL: rpfilter did drop packets" + ret=1 + return 1 + fi + + return 0 +} + +check_fib_counter() { + local want=$1 + local ns=$2 + local address=$3 + + if ! ip netns exec "$ns" nft list table inet filter | grep 'fib saddr . iif' | grep "$address" | grep -q "packets $want";then + echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2 + ip netns exec "$ns" nft list table inet filter + return 1 + fi + + if [ "$want" -gt 0 ]; then + echo "PASS: fib expression did drop packets for $address" + fi + + return 0 +} + +load_ruleset "$nsrouter" +load_ruleset "$ns1" +load_ruleset "$ns2" + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" + +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsrouter" link set veth1 up +ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad + +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" route add default via dead:1::1 + +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +test_ping() { + local daddr4=$1 + local daddr6=$2 + + if ! ip netns exec "$ns1" ping -c 1 -q "$daddr4" > /dev/null; then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2 + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q "$daddr6" > /dev/null; then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2 + return 1 + fi + + return 0 +} + +test_ping_unreachable() { + local daddr4=$1 + local daddr6=$2 + + if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr4" > /dev/null; then + echo "FAIL: ${ns1} could reach $daddr4" 1>&2 + return 1 + fi + + if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr6" > /dev/null; then + echo "FAIL: ${ns1} could reach $daddr6" 1>&2 + return 1 + fi + + return 0 +} + +test_fib_type() { + local notice="$1" + local errmsg="addr-on-if" + local lret=0 + + if ! load_type_ruleset "$nsrouter";then + echo "SKIP: Could not load fib type ruleset" + [ $ret -eq 0 ] && ret=$ksft_skip + return + fi + + # makes router receive packet for addresses configured on incoming + # interface. + test_ping 10.0.1.1 dead:1::1 || return 1 + + # expectation: triggers all 'local' in prerouting/input. + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type local" "$errmsg" || lret=1 + + reload_type_ruleset "$nsrouter" + # makes router receive packet for address configured on a different (but local) + # interface. + test_ping 10.0.2.1 dead:2::1 || return 1 + + # expectation: triggers 'unicast' in prerouting/input for daddr . iif and local for 'daddr'. + errmsg="addr-on-host" + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1 + + reload_type_ruleset "$nsrouter" + test_ping 10.0.2.99 dead:2::99 || return 1 + errmsg="addr-on-otherhost" + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type unicast" "$errmsg" || lret=1 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1 + + if [ $lret -eq 0 ];then + echo "PASS: fib expression address types match ($notice)" + else + echo "FAIL: fib expression address types match ($notice)" + ret=1 + fi +} + +test_fib_vrf_dev_add_dummy() +{ + if ! ip -net "$nsrouter" link add dummy0 type dummy ;then + echo "SKIP: VRF tests: dummy device type not supported" + return 1 + fi + + if ! ip -net "$nsrouter" link add tvrf type vrf table 9876;then + echo "SKIP: VRF tests: vrf device type not supported" + return 1 + fi + + ip -net "$nsrouter" link set dummy0 master tvrf + ip -net "$nsrouter" link set dummy0 up + ip -net "$nsrouter" link set tvrf up +} + +load_ruleset_vrf() +{ +# Due to the many different possible combinations using named counters +# or one-rule-per-expected-result is complex. +# +# Instead, add dynamic sets for the fib modes +# (fib address type, fib output interface lookup .. ), +# and then add the obtained fib results to them. +# +# The test is successful if the sets contain the expected results +# and no unexpected extra entries existed. +ip netns exec "$nsrouter" nft -f - <<EOF +flush ruleset +table inet t { + set fibif4 { + typeof meta iif . ip daddr . fib daddr oif + flags dynamic + counter + } + + set fibif4iif { + typeof meta iif . ip daddr . fib daddr . iif oif + flags dynamic + counter + } + + set fibif6 { + typeof meta iif . ip6 daddr . fib daddr oif + flags dynamic + counter + } + + set fibif6iif { + typeof meta iif . ip6 daddr . fib daddr . iif oif + flags dynamic + counter + } + + set fibtype4 { + typeof meta iif . ip daddr . fib daddr type + flags dynamic + counter + } + + set fibtype4iif { + typeof meta iif . ip daddr . fib daddr . iif type + flags dynamic + counter + } + + set fibtype6 { + typeof meta iif . ip6 daddr . fib daddr type + flags dynamic + counter + } + + set fibtype6iif { + typeof meta iif . ip6 daddr . fib daddr . iif type + flags dynamic + counter + } + + chain fib_test { + meta nfproto ipv4 jump { + add @fibif4 { meta iif . ip daddr . fib daddr oif } + add @fibif4iif { meta iif . ip daddr . fib daddr . iif oif } + add @fibtype4 { meta iif . ip daddr . fib daddr type } + add @fibtype4iif { meta iif . ip daddr . fib daddr . iif type } + + add @fibif4 { meta iif . ip saddr . fib saddr oif } + add @fibif4iif { meta iif . ip saddr . fib saddr . iif oif } + } + + meta nfproto ipv6 jump { + add @fibif6 { meta iif . ip6 daddr . fib daddr oif } + add @fibif6iif { meta iif . ip6 daddr . fib daddr . iif oif } + add @fibtype6 { meta iif . ip6 daddr . fib daddr type } + add @fibtype6iif { meta iif . ip6 daddr . fib daddr . iif type } + + add @fibif6 { meta iif . ip6 saddr . fib saddr oif } + add @fibif6iif { meta iif . ip6 saddr . fib saddr . iif oif } + } + } + + chain prerouting { + type filter hook prerouting priority 0; + icmp type echo-request counter jump fib_test + + # neighbour discovery to be ignored. + icmpv6 type echo-request counter jump fib_test + } +} +EOF + +if [ $? -ne 0 ] ;then + echo "SKIP: Could not load ruleset for fib vrf test" + [ $ret -eq 0 ] && ret=$ksft_skip + return 1 +fi +} + +check_type() +{ + local setname="$1" + local iifname="$2" + local addr="$3" + local type="$4" + local count="$5" + + [ -z "$count" ] && count=1 + + if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then + echo "FAIL: did not find $iifname . $addr . $type in $setname" + ip netns exec "$nsrouter" nft list set inet t "$setname" + ret=1 + return 1 + fi + + # delete the entry, this allows to check if anything unexpected appeared + # at the end of the test run: all dynamic sets should be empty by then. + if ! ip netns exec "$nsrouter" nft delete element inet t "$setname" { "$iifname" . "$addr" . "$type" } ; then + echo "FAIL: can't delete $iifname . $addr . $type in $setname" + ip netns exec "$nsrouter" nft list set inet t "$setname" + ret=1 + return 1 + fi + + return 0 +} + +check_local() +{ + check_type $@ "local" 1 +} + +check_unicast() +{ + check_type $@ "unicast" 1 +} + +check_rpf() +{ + check_type $@ +} + +check_fib_vrf_sets_empty() +{ + local setname="" + local lret=0 + + # A non-empty set means that we have seen unexpected packets OR + # that a fib lookup provided unexpected results. + for setname in "fibif4" "fibif4iif" "fibif6" "fibif6iif" \ + "fibtype4" "fibtype4iif" "fibtype6" "fibtype6iif";do + if ip netns exec "$nsrouter" nft list set inet t "$setname" | grep -q elements;then + echo "FAIL: $setname not empty" + ip netns exec "$nsrouter" nft list set inet t "$setname" + ret=1 + lret=1 + fi + done + + return $lret +} + +check_fib_vrf_type() +{ + local msg="$1" + + local addr + # the incoming interface is always veth0. As its not linked to a VRF, + # the 'tvrf' device should NOT show up anywhere. + local ifname="veth0" + local lret=0 + + # local_veth0, local_veth1 + for addr in "10.0.1.1" "10.0.2.1"; do + check_local fibtype4 "$ifname" "$addr" || lret=1 + check_type fibif4 "$ifname" "$addr" "0" || lret=1 + done + for addr in "dead:1::1" "dead:2::1";do + check_local fibtype6 "$ifname" "$addr" || lret=1 + check_type fibif6 "$ifname" "$addr" "0" || lret=1 + done + + # when restricted to the incoming interface, 10.0.1.1 should + # be 'local', but 10.0.2.1 unicast. + check_local fibtype4iif "$ifname" "10.0.1.1" || lret=1 + check_unicast fibtype4iif "$ifname" "10.0.2.1" || lret=1 + + # same for the ipv6 addresses. + check_local fibtype6iif "$ifname" "dead:1::1" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:2::1" || lret=1 + + # None of these addresses should find a valid route when restricting + # to the incoming interface (we ask for daddr - 10.0.1.1/2.1 are + # reachable via 'lo'. + for addr in "10.0.1.1" "10.0.2.1" "10.9.9.1" "10.9.9.2";do + check_type fibif4iif "$ifname" "$addr" "0" || lret=1 + done + + # expect default route (veth1), dummy0 is part of VRF but iif isn't. + for addr in "10.9.9.1" "10.9.9.2";do + check_unicast fibtype4 "$ifname" "$addr" || lret=1 + check_unicast fibtype4iif "$ifname" "$addr" || lret=1 + check_type fibif4 "$ifname" "$addr" "veth1" || lret=1 + done + for addr in "dead:9::1" "dead:9::2";do + check_unicast fibtype6 "$ifname" "$addr" || lret=1 + check_unicast fibtype6iif "$ifname" "$addr" || lret=1 + check_type fibif6 "$ifname" "$addr" "veth1" || lret=1 + done + + # same for the IPv6 equivalent addresses. + for addr in "dead:1::1" "dead:2::1" "dead:9::1" "dead:9::2";do + check_type fibif6iif "$ifname" "$addr" "0" || lret=1 + done + + check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1 + check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1 + check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1 + + check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1 + check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1 + check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1 + check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1 + + check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 5 || lret=1 + check_rpf fibif4iif "$ifname" "10.0.1.99" "veth0" 5 || lret=1 + check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 5 || lret=1 + check_rpf fibif6iif "$ifname" "dead:1::99" "veth0" 5 || lret=1 + + check_fib_vrf_sets_empty || lret=1 + + if [ $lret -eq 0 ];then + echo "PASS: $msg" + else + echo "FAIL: $msg" + ret=1 + fi +} + +check_fib_veth_vrf_type() +{ + local msg="$1" + + local addr + local ifname + local setname + local lret=0 + + # as veth0 is now part of tvrf interface, packets will be seen + # twice, once with iif veth0, then with iif tvrf. + + for ifname in "veth0" "tvrf"; do + for addr in "10.0.1.1" "10.9.9.1"; do + check_local fibtype4 "$ifname" "$addr" || lret=1 + # addr local, but nft_fib doesn't return routes with RTN_LOCAL. + check_type fibif4 "$ifname" "$addr" 0 || lret=1 + check_type fibif4iif "$ifname" "$addr" 0 || lret=1 + done + + for addr in "dead:1::1" "dead:9::1"; do + check_local fibtype6 "$ifname" "$addr" || lret=1 + # same, address is local but no route is returned for lo. + check_type fibif6 "$ifname" "$addr" 0 || lret=1 + check_type fibif6iif "$ifname" "$addr" 0 || lret=1 + done + + for t in fibtype4 fibtype4iif; do + check_unicast "$t" "$ifname" 10.9.9.2 || lret=1 + done + for t in fibtype6 fibtype6iif; do + check_unicast "$t" "$ifname" dead:9::2 || lret=1 + done + + check_unicast fibtype4iif "$ifname" "10.9.9.1" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:9::1" || lret=1 + + check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1 + check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1 + + check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1 + + check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1 + check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1 + check_type fibif4 "$ifname" "10.9.9.2" "dummy0" || lret=1 + check_type fibif6 "$ifname" "dead:9::2" "dummy0" || lret=1 + + # restricted to iif -- MUST NOT provide result, its != $ifname. + check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1 + check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1 + + check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 4 || lret=1 + check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 4 || lret=1 + check_rpf fibif4iif "$ifname" "10.0.1.99" "$ifname" 4 || lret=1 + check_rpf fibif6iif "$ifname" "dead:1::99" "$ifname" 4 || lret=1 + done + + check_local fibtype4iif "veth0" "10.0.1.1" || lret=1 + check_local fibtype6iif "veth0" "dead:1::1" || lret=1 + + check_unicast fibtype4iif "tvrf" "10.0.1.1" || lret=1 + check_unicast fibtype6iif "tvrf" "dead:1::1" || lret=1 + + # 10.9.9.2 should not provide a result for iif veth, but + # should when iif is tvrf. + # This is because its reachable via dummy0 which is part of + # tvrf. iif veth0 MUST conceal the dummy0 result (i.e. return oif 0). + check_type fibif4iif "veth0" "10.9.9.2" 0 || lret=1 + check_type fibif6iif "veth0" "dead:9::2" 0 || lret=1 + + check_type fibif4iif "tvrf" "10.9.9.2" "tvrf" || lret=1 + check_type fibif6iif "tvrf" "dead:9::2" "tvrf" || lret=1 + + check_fib_vrf_sets_empty || lret=1 + + if [ $lret -eq 0 ];then + echo "PASS: $msg" + else + echo "FAIL: $msg" + ret=1 + fi +} + +# Extends nsrouter config by adding dummy0+vrf. +# +# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99 +# dead:1::99 dead:1::1 dead:2::1 dead:2::99 +# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2 +# [dummy0] +# 10.9.9.1 +# dead:9::1 +# [tvrf] +test_fib_vrf() +{ + local cntname="" + + if ! test_fib_vrf_dev_add_dummy; then + [ $ret -eq 0 ] && ret=$ksft_skip + return + fi + + ip -net "$nsrouter" addr add "10.9.9.1"/24 dev dummy0 + ip -net "$nsrouter" addr add "dead:9::1"/64 dev dummy0 nodad + + ip -net "$nsrouter" route add default via 10.0.2.99 + ip -net "$nsrouter" route add default via dead:2::99 + + load_ruleset_vrf || return + + # no echo reply for these addresses: The dummy interface is part of tvrf, + # but veth0 (incoming interface) isn't linked to it. + test_ping_unreachable "10.9.9.1" "dead:9::1" & + test_ping_unreachable "10.9.9.2" "dead:9::2" & + + # expect replies from these. + test_ping "10.0.1.1" "dead:1::1" + test_ping "10.0.2.1" "dead:2::1" + test_ping "10.0.2.99" "dead:2::99" + + wait + + check_fib_vrf_type "fib expression address types match (iif not in vrf)" + + # second round: this time, make veth0 (rx interface) part of the vrf. + # 10.9.9.1 / dead:9::1 become reachable from ns1, while ns2 + # becomes unreachable. + ip -net "$nsrouter" link set veth0 master tvrf + ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + + # this reload should not be needed, but in case + # there is some error (missing or unexpected entry) this will prevent them + # from leaking into round 2. + load_ruleset_vrf || return + + test_ping "10.0.1.1" "dead:1::1" + test_ping "10.9.9.1" "dead:9::1" + + # ns2 should no longer be reachable (veth1 not in vrf) + test_ping_unreachable "10.0.2.99" "dead:2::99" & + + # vrf via dummy0, but host doesn't exist + test_ping_unreachable "10.9.9.2" "dead:9::2" & + + wait + + check_fib_veth_vrf_type "fib expression address types match (iif in vrf)" +} + +ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + +test_ping 10.0.2.1 dead:2::1 || exit 1 +check_drops + +test_ping 10.0.2.99 dead:2::99 || exit 1 +check_drops + +[ $ret -eq 0 ] && echo "PASS: fib expression did not cause unwanted packet drops" + +load_input_ruleset "$ns1" + +test_ping 127.0.0.1 ::1 +check_drops + +test_ping 10.0.1.99 dead:1::99 +check_drops + +[ $ret -eq 0 ] && echo "PASS: fib expression did not discard loopback packets" + +load_input_ruleset "$ns1" + +test_ping 127.0.0.1 ::1 || exit 1 +check_drops || exit 1 + +test_ping 10.0.1.99 dead:1::99 || exit 1 +check_drops || exit 1 + +echo "PASS: fib expression did not discard loopback packets" + +ip netns exec "$nsrouter" nft flush table inet filter + +ip -net "$ns1" route del default +ip -net "$ns1" -6 route del default + +ip -net "$ns1" addr del 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr del dead:1::99/64 dev eth0 + +ip -net "$ns1" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns1" addr add dead:2::99/64 dev eth0 nodad + +ip -net "$ns1" route add default via 10.0.2.1 +ip -net "$ns1" -6 route add default via dead:2::1 + +ip -net "$nsrouter" addr add dead:2::1/64 dev veth0 nodad + +# switch to ruleset that doesn't log, this time +# its expected that this does drop the packets. +load_ruleset_count "$nsrouter" + +# ns1 has a default route, but nsrouter does not. +# must not check return value, ping to 1.1.1.1 will +# fail. +check_fib_counter 0 "$nsrouter" 1.1.1.1 || exit 1 +check_fib_counter 0 "$nsrouter" 1c3::c01d || exit 1 + +ip netns exec "$ns1" ping -W 0.5 -c 1 -q 1.1.1.1 > /dev/null +check_fib_counter 1 "$nsrouter" 1.1.1.1 || exit 1 + +ip netns exec "$ns1" ping -W 0.5 -i 0.1 -c 3 -q 1c3::c01d > /dev/null +check_fib_counter 3 "$nsrouter" 1c3::c01d || exit 1 + +# delete all rules +ip netns exec "$ns1" nft flush ruleset +ip netns exec "$ns2" nft flush ruleset +ip netns exec "$nsrouter" nft flush ruleset + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad + +ip -net "$ns1" addr del 10.0.2.99/24 dev eth0 +ip -net "$ns1" addr del dead:2::99/64 dev eth0 + +ip -net "$nsrouter" addr del dead:2::1/64 dev veth0 + +# ... pbr ruleset for the router, check iif+oif. +if ! load_pbr_ruleset "$nsrouter";then + echo "SKIP: Could not load fib forward ruleset" + [ "$ret" -eq 0 ] && ret=$ksft_skip +fi + +ip -net "$nsrouter" rule add from all table 128 +ip -net "$nsrouter" rule add from all iif veth0 table 129 +ip -net "$nsrouter" route add table 128 to 10.0.1.0/24 dev veth0 +ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1 + +# drop main ipv4 table +ip -net "$nsrouter" -4 rule delete table main + +if test_ping 10.0.2.99 dead:2::99;then + echo "PASS: fib expression forward check with policy based routing" +else + echo "FAIL: fib expression forward check with policy based routing" + ret=1 +fi + +test_fib_type "policy routing" +ip netns exec "$nsrouter" nft delete table ip filter +ip netns exec "$nsrouter" nft delete table ip6 filter + +# Un-do policy routing changes +ip -net "$nsrouter" rule del from all table 128 +ip -net "$nsrouter" rule del from all iif veth0 table 129 + +ip -net "$nsrouter" route del table 128 to 10.0.1.0/24 dev veth0 +ip -net "$nsrouter" route del table 129 to 10.0.2.0/24 dev veth1 + +ip -net "$ns1" -4 route del default +ip -net "$ns1" -6 route del default + +ip -net "$ns1" -4 route add default via 10.0.1.1 +ip -net "$ns1" -6 route add default via dead:1::1 + +ip -net "$nsrouter" -4 rule add from all table main priority 32766 + +test_fib_type "default table" +ip netns exec "$nsrouter" nft delete table ip filter +ip netns exec "$nsrouter" nft delete table ip6 filter + +test_fib_vrf + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh new file mode 100755 index 000000000000..a4ee5496f2a1 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -0,0 +1,674 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This tests basic flowtable functionality. +# Creates following default topology: +# +# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000) +# Router1 is the one doing flow offloading, Router2 has no special +# purpose other than having a link that is smaller than either Originator +# and responder, i.e. TCPMSS announced values are too large and will still +# result in fragmentation and/or PMTU discovery. +# +# You can check with different Orgininator/Link/Responder MTU eg: +# nft_flowtable.sh -o8000 -l1500 -r2000 +# + +source lib.sh + +ret=0 +SOCAT_TIMEOUT=60 + +nsin="" +ns1out="" +ns2out="" + +log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) + +checktool "nft --version" "run test without nft tool" +checktool "socat -h" "run test without socat" + +setup_ns ns1 ns2 nsr1 nsr2 + +cleanup() { + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + + cleanup_all_ns + + rm -f "$nsin" "$ns1out" "$ns2out" + + [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns" +} + +trap cleanup EXIT + +sysctl -q net.netfilter.nf_log_all_netns=1 + +ip link add veth0 netns "$nsr1" type veth peer name eth0 netns "$ns1" +ip link add veth1 netns "$nsr1" type veth peer name veth0 netns "$nsr2" + +ip link add veth1 netns "$nsr2" type veth peer name eth0 netns "$ns2" + +for dev in veth0 veth1; do + ip -net "$nsr1" link set "$dev" up + ip -net "$nsr2" link set "$dev" up +done + +ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsr2" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsr2" addr add dead:2::1/64 dev veth1 nodad + +# set different MTUs so we need to push packets coming from ns1 (large MTU) +# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), +# or to do PTMU discovery (send ICMP error back to originator). +# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers +# is NOT the lowest link mtu. + +omtu=9000 +lmtu=1500 +rmtu=2000 + +filesize=$((2 * 1024 * 1024)) + +usage(){ + echo "nft_flowtable.sh [OPTIONS]" + echo + echo "MTU options" + echo " -o originator" + echo " -l link" + echo " -r responder" + exit 1 +} + +while getopts "o:l:r:s:" o +do + case $o in + o) omtu=$OPTARG;; + l) lmtu=$OPTARG;; + r) rmtu=$OPTARG;; + s) filesize=$OPTARG;; + *) usage;; + esac +done + +if ! ip -net "$nsr1" link set veth0 mtu "$omtu"; then + exit 1 +fi + +ip -net "$ns1" link set eth0 mtu "$omtu" + +if ! ip -net "$nsr2" link set veth1 mtu "$rmtu"; then + exit 1 +fi + +if ! ip -net "$nsr1" link set veth1 mtu "$lmtu"; then + exit 1 +fi + +if ! ip -net "$nsr2" link set veth0 mtu "$lmtu"; then + exit 1 +fi + +ip -net "$ns2" link set eth0 mtu "$rmtu" + +# transfer-net between nsr1 and nsr2. +# these addresses are not used for connections. +ip -net "$nsr1" addr add 192.168.10.1/24 dev veth1 +ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad + +ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0 +ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad + +for i in 0 1; do + ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null + ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null +done + +for ns in "$ns1" "$ns2";do + ip -net "$ns" link set eth0 up + + if ! ip netns exec "$ns" sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then + echo "ERROR: Check Originator/Responder values (problem during address addition)" + exit 1 + fi + # don't set ip DF bit for first two tests + ip netns exec "$ns" sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null +done + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns1" route add default via dead:1::1 +ip -net "$ns2" route add default via dead:2::1 + +ip -net "$nsr1" route add default via 192.168.10.2 +ip -net "$nsr2" route add default via 192.168.10.1 + +ip netns exec "$nsr1" nft -f - <<EOF +table inet filter { + flowtable f1 { + hook ingress priority 0 + devices = { veth0, veth1 } + } + + counter routed_orig { } + counter routed_repl { } + + chain forward { + type filter hook forward priority 0; policy drop; + + # flow offloaded? Tag ct with mark 1, so we can detect when it fails. + meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept + + # count packets supposedly offloaded as per direction. + ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept + + ct state established,related accept + + meta nfproto ipv4 meta l4proto icmp accept + meta nfproto ipv6 meta l4proto icmpv6 accept + } +} +EOF + +if [ $? -ne 0 ]; then + echo "SKIP: Could not load nft ruleset" + exit $ksft_skip +fi + +ip netns exec "$ns2" nft -f - <<EOF +table inet filter { + counter ip4dscp0 { } + counter ip4dscp3 { } + + chain input { + type filter hook input priority 0; policy accept; + meta l4proto tcp goto { + ip dscp cs3 counter name ip4dscp3 accept + ip dscp 0 counter name ip4dscp0 accept + } + } +} +EOF + +if [ $? -ne 0 ]; then + echo -n "SKIP: Could not load ruleset: " + nft --version + exit $ksft_skip +fi + +# test basic connectivity +if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + echo "ERROR: $ns1 cannot reach ns2" 1>&2 + exit 1 +fi + +if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then + echo "ERROR: $ns2 cannot reach $ns1" 1>&2 + exit 1 +fi + +nsin=$(mktemp) +ns1out=$(mktemp) +ns2out=$(mktemp) + +make_file() +{ + name="$1" + sz="$2" + + head -c "$sz" < /dev/urandom > "$name" +} + +check_counters() +{ + local what=$1 + local ok=1 + + local orig repl + orig=$(ip netns exec "$nsr1" nft reset counter inet filter routed_orig | grep packets) + repl=$(ip netns exec "$nsr1" nft reset counter inet filter routed_repl | grep packets) + + local orig_cnt=${orig#*bytes} + local repl_cnt=${repl#*bytes} + + local fs + fs=$(du -sb "$nsin") + local max_orig=${fs%%/*} + local max_repl=$((max_orig)) + + # flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook + # should always be lower than the size of the transmitted file (max_orig). + if [ "$orig_cnt" -gt "$max_orig" ];then + echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig, reply counter $repl_cnt" 1>&2 + ret=1 + ok=0 + fi + + if [ "$repl_cnt" -gt $max_repl ];then + echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl, original counter $orig_cnt" 1>&2 + ret=1 + ok=0 + fi + + if [ $ok -eq 1 ]; then + echo "PASS: $what" + fi +} + +check_dscp() +{ + local what=$1 + local ok=1 + + local counter + counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp3 | grep packets) + + local pc4=${counter%*bytes*} + local pc4=${pc4#*packets} + + counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp0 | grep packets) + local pc4z=${counter%*bytes*} + local pc4z=${pc4z#*packets} + + case "$what" in + "dscp_none") + if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then + echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 + ret=1 + ok=0 + fi + ;; + "dscp_fwd") + if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then + echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 + ret=1 + ok=0 + fi + ;; + "dscp_ingress") + if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then + echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + ret=1 + ok=0 + fi + ;; + "dscp_egress") + if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then + echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + ret=1 + ok=0 + fi + ;; + *) + echo "FAIL: Unknown DSCP check" 1>&2 + ret=1 + ok=0 + esac + + if [ "$ok" -eq 1 ] ;then + echo "PASS: $what: dscp packet counters match" + fi +} + +check_transfer() +{ + in=$1 + out=$2 + what=$3 + + if ! cmp "$in" "$out" > /dev/null 2>&1; then + echo "FAIL: file mismatch for $what" 1>&2 + ls -l "$in" + ls -l "$out" + return 1 + fi + + return 0 +} + +listener_ready() +{ + ss -N "$nsb" -lnt -o "sport = :12345" | grep -q 12345 +} + +test_tcp_forwarding_ip() +{ + local nsa=$1 + local nsb=$2 + local dstip=$3 + local dstport=$4 + local lret=0 + + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" & + lpid=$! + + busywait 1000 listener_ready + + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out" + + wait $lpid + + if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then + lret=1 + ret=1 + fi + + if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then + lret=1 + ret=1 + fi + + return $lret +} + +test_tcp_forwarding() +{ + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + + return $? +} + +test_tcp_forwarding_set_dscp() +{ + check_dscp "dscp_none" + +ip netns exec "$nsr1" nft -f - <<EOF +table netdev dscpmangle { + chain setdscp0 { + type filter hook ingress device "veth0" priority 0; policy accept + ip dscp set cs3 + } +} +EOF +if [ $? -eq 0 ]; then + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + check_dscp "dscp_ingress" + + ip netns exec "$nsr1" nft delete table netdev dscpmangle +else + echo "SKIP: Could not load netdev:ingress for veth0" +fi + +ip netns exec "$nsr1" nft -f - <<EOF +table netdev dscpmangle { + chain setdscp0 { + type filter hook egress device "veth1" priority 0; policy accept + ip dscp set cs3 + } +} +EOF +if [ $? -eq 0 ]; then + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + check_dscp "dscp_egress" + + ip netns exec "$nsr1" nft flush table netdev dscpmangle +else + echo "SKIP: Could not load netdev:egress for veth1" +fi + + # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3 + # counters should have seen packets (before and after ft offload kicks in). + ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3 + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + check_dscp "dscp_fwd" +} + +test_tcp_forwarding_nat() +{ + local lret + local pmtu + + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + lret=$? + + pmtu=$3 + what=$4 + + if [ "$lret" -eq 0 ] ; then + if [ "$pmtu" -eq 1 ] ;then + check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what" + else + echo "PASS: flow offload for ns1/ns2 with masquerade $what" + fi + + test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 + lret=$? + if [ "$pmtu" -eq 1 ] ;then + check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what" + elif [ "$lret" -eq 0 ] ; then + echo "PASS: flow offload for ns1/ns2 with dnat $what" + fi + fi + + return $lret +} + +make_file "$nsin" "$filesize" + +# First test: +# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. +# Due to MTU mismatch in both directions, all packets (except small packets like pure +# acks) have to be handled by normal forwarding path. Therefore, packet counters +# are not checked. +if test_tcp_forwarding "$ns1" "$ns2"; then + echo "PASS: flow offloaded for ns1/ns2" +else + echo "FAIL: flow offload for ns1/ns2:" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +# delete default route, i.e. ns2 won't be able to reach ns1 and +# will depend on ns1 being masqueraded in nsr1. +# expect ns1 has nsr1 address. +ip -net "$ns2" route del default via 10.0.2.1 +ip -net "$ns2" route del default via dead:2::1 +ip -net "$ns2" route add 192.168.10.1 via 10.0.2.1 + +# Second test: +# Same, but with NAT enabled. Same as in first test: we expect normal forward path +# to handle most packets. +ip netns exec "$nsr1" nft -f - <<EOF +table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 + } + + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oifname "veth1" counter masquerade + } +} +EOF + +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then + echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2 + exit 0 +fi + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 0 ""; then + echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +# Third test: +# Same as second test, but with PMTU discovery enabled. This +# means that we expect the fastpath to handle packets as soon +# as the endpoints adjust the packet size. +ip netns exec "$ns1" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null +ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null + +# reset counters. +# With pmtu in-place we'll also check that nft counters +# are lower than file size and packets were forwarded via flowtable layer. +# For earlier tests (large mtus), packets cannot be handled via flowtable +# (except pure acks and other small packets). +ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then + echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 + ip netns exec "$nsr1" nft list ruleset +fi + +# Another test: +# Add bridge interface br0 to Router1, with NAT enabled. +test_bridge() { +if ! ip -net "$nsr1" link add name br0 type bridge 2>/dev/null;then + echo "SKIP: could not add bridge br0" + [ "$ret" -eq 0 ] && ret=$ksft_skip + return +fi +ip -net "$nsr1" addr flush dev veth0 +ip -net "$nsr1" link set up dev veth0 +ip -net "$nsr1" link set veth0 master br0 +ip -net "$nsr1" addr add 10.0.1.1/24 dev br0 +ip -net "$nsr1" addr add dead:1::1/64 dev br0 nodad +ip -net "$nsr1" link set up dev br0 + +ip netns exec "$nsr1" sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null + +# br0 with NAT enabled. +ip netns exec "$nsr1" nft -f - <<EOF +flush table ip nat +table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 + } + + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oifname "veth1" counter masquerade + } +} +EOF + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "on bridge"; then + echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + + +# Another test: +# Add bridge interface br0 to Router1, with NAT and VLAN. +ip -net "$nsr1" link set veth0 nomaster +ip -net "$nsr1" link set down dev veth0 +ip -net "$nsr1" link add link veth0 name veth0.10 type vlan id 10 +ip -net "$nsr1" link set up dev veth0 +ip -net "$nsr1" link set up dev veth0.10 +ip -net "$nsr1" link set veth0.10 master br0 + +ip -net "$ns1" addr flush dev eth0 +ip -net "$ns1" link add link eth0 name eth0.10 type vlan id 10 +ip -net "$ns1" link set eth0 up +ip -net "$ns1" link set eth0.10 up +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0.10 +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" addr add dead:1::99/64 dev eth0.10 nodad + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "bridge and VLAN"; then + echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +# restore test topology (remove bridge and VLAN) +ip -net "$nsr1" link set veth0 nomaster +ip -net "$nsr1" link set veth0 down +ip -net "$nsr1" link set veth0.10 down +ip -net "$nsr1" link delete veth0.10 type vlan +ip -net "$nsr1" link delete br0 type bridge +ip -net "$ns1" addr flush dev eth0.10 +ip -net "$ns1" link set eth0.10 down +ip -net "$ns1" link set eth0 down +ip -net "$ns1" link delete eth0.10 type vlan + +# restore address in ns1 and nsr1 +ip -net "$ns1" link set eth0 up +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via dead:1::1 +ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad +ip -net "$nsr1" link set up dev veth0 +} + +test_bridge + +KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1) +KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1) +SPI1=$RANDOM +SPI2=$RANDOM + +if [ $SPI1 -eq $SPI2 ]; then + SPI2=$((SPI2+1)) +fi + +do_esp() { + local ns=$1 + local me=$2 + local remote=$3 + local lnet=$4 + local rnet=$5 + local spi_out=$6 + local spi_in=$7 + + ip -net "$ns" xfrm state add src "$remote" dst "$me" proto esp spi "$spi_in" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$rnet" dst "$lnet" + ip -net "$ns" xfrm state add src "$me" dst "$remote" proto esp spi "$spi_out" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$lnet" dst "$rnet" + + # to encrypt packets as they go out (includes forwarded packets that need encapsulation) + ip -net "$ns" xfrm policy add src "$lnet" dst "$rnet" dir out tmpl src "$me" dst "$remote" proto esp mode tunnel priority 1 action allow + # to fwd decrypted packets after esp processing: + ip -net "$ns" xfrm policy add src "$rnet" dst "$lnet" dir fwd tmpl src "$remote" dst "$me" proto esp mode tunnel priority 1 action allow +} + +do_esp "$nsr1" 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 "$SPI1" "$SPI2" + +do_esp "$nsr2" 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 "$SPI2" "$SPI1" + +ip netns exec "$nsr1" nft delete table ip nat + +# restore default routes +ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1 +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +if test_tcp_forwarding "$ns1" "$ns2"; then + check_counters "ipsec tunnel mode for ns1/ns2" +else + echo "FAIL: ipsec tunnel mode for ns1/ns2" + ip netns exec "$nsr1" nft list ruleset 1>&2 + ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2 +fi + +if [ "$1" = "" ]; then + low=1280 + mtu=$((65536 - low)) + o=$(((RANDOM%mtu) + low)) + l=$(((RANDOM%mtu) + low)) + r=$(((RANDOM%mtu) + low)) + + MINSIZE=$((2 * 1000 * 1000)) + MAXSIZE=$((64 * 1000 * 1000)) + + filesize=$(((RANDOM * RANDOM) % MAXSIZE)) + if [ "$filesize" -lt "$MINSIZE" ]; then + filesize=$((filesize+MINSIZE)) + fi + + echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize" + $0 -o "$o" -l "$l" -r "$r" -s "$filesize" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh new file mode 100755 index 000000000000..5ff7be9daeee --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh @@ -0,0 +1,154 @@ +#!/bin/bash -e +# +# SPDX-License-Identifier: GPL-2.0 +# +# Torture nftables' netdevice notifier callbacks and related code by frequent +# renaming of interfaces which netdev-family chains and flowtables hook into. + +source lib.sh + +checktool "nft --version" "run test without nft tool" +checktool "iperf3 --version" "run test without iperf3 tool" + +# how many seconds to torture the kernel? +# default to 80% of max run time but don't exceed 48s +TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10)) +[[ $TEST_RUNTIME -gt 48 ]] && TEST_RUNTIME=48 + +trap "cleanup_all_ns" EXIT + +setup_ns nsc nsr nss + +ip -net $nsc link add cr0 type veth peer name rc0 netns $nsr +ip -net $nsc addr add 10.0.0.1/24 dev cr0 +ip -net $nsc link set cr0 up +ip -net $nsc route add default via 10.0.0.2 + +ip -net $nss link add sr0 type veth peer name rs0 netns $nsr +ip -net $nss addr add 10.1.0.1/24 dev sr0 +ip -net $nss link set sr0 up +ip -net $nss route add default via 10.1.0.2 + +ip -net $nsr addr add 10.0.0.2/24 dev rc0 +ip -net $nsr link set rc0 up +ip -net $nsr addr add 10.1.0.2/24 dev rs0 +ip -net $nsr link set rs0 up +ip netns exec $nsr sysctl -q net.ipv4.ip_forward=1 +ip netns exec $nsr sysctl -q net.ipv4.conf.all.forwarding=1 + +{ + echo "table netdev t {" + for ((i = 0; i < 10; i++)); do + cat <<-EOF + chain chain_rc$i { + type filter hook ingress device rc$i priority 0 + counter + } + chain chain_rs$i { + type filter hook ingress device rs$i priority 0 + counter + } + EOF + done + echo "}" + echo "table ip t {" + for ((i = 0; i < 10; i++)); do + cat <<-EOF + flowtable ft_${i} { + hook ingress priority 0 + devices = { rc$i, rs$i } + } + EOF + done + echo "chain c {" + echo "type filter hook forward priority 0" + for ((i = 0; i < 10; i++)); do + echo -n "iifname rc$i oifname rs$i " + echo "ip protocol tcp counter flow add @ft_${i}" + done + echo "counter" + echo "}" + echo "}" +} | ip netns exec $nsr nft -f - || { + echo "SKIP: Could not load nft ruleset" + exit $ksft_skip +} + +for ((o=0, n=1; ; o=n, n++, n %= 10)); do + ip -net $nsr link set rc$o name rc$n + ip -net $nsr link set rs$o name rs$n +done & +rename_loop_pid=$! + +while true; do ip netns exec $nsr nft list ruleset >/dev/null 2>&1; done & +nft_list_pid=$! + +ip netns exec $nsr nft monitor >/dev/null & +nft_monitor_pid=$! + +ip netns exec $nss iperf3 --server --daemon -1 +summary_expr='s,^\[SUM\] .* \([0-9\.]\+\) Kbits/sec .* receiver,\1,p' +rate=$(ip netns exec $nsc iperf3 \ + --format k -c 10.1.0.1 --time $TEST_RUNTIME \ + --length 56 --parallel 10 -i 0 | sed -n "$summary_expr") + +kill $nft_list_pid +kill $nft_monitor_pid +kill $rename_loop_pid +wait + +wildcard_prep() { + ip netns exec $nsr nft -f - <<EOF +table ip t { + flowtable ft_wild { + hook ingress priority 0 + devices = { wild* } + } +} +EOF +} + +if ! wildcard_prep; then + echo "SKIP wildcard tests: not supported by host's nft?" +else + for ((i = 0; i < 100; i++)); do + ip -net $nsr link add wild$i type dummy & + done + wait + for ((i = 80; i < 100; i++)); do + ip -net $nsr link del wild$i & + done + for ((i = 0; i < 80; i++)); do + ip -net $nsr link del wild$i & + done + wait + for ((i = 0; i < 100; i += 10)); do + ( + for ((j = 0; j < 10; j++)); do + ip -net $nsr link add wild$((i + j)) type dummy + done + for ((j = 0; j < 10; j++)); do + ip -net $nsr link del wild$((i + j)) + done + ) & + done + wait +fi + +[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || { + echo "FAIL: Kernel is tainted!" + exit $ksft_fail +} + +[[ $rate -gt 0 ]] || { + echo "FAIL: Zero throughput in iperf3" + exit $ksft_fail +} + +[[ -f /sys/kernel/debug/kmemleak && \ + -n $(</sys/kernel/debug/kmemleak) ]] && { + echo "FAIL: non-empty kmemleak report" + exit $ksft_fail +} + +exit $ksft_pass diff --git a/tools/testing/selftests/net/netfilter/nft_meta.sh b/tools/testing/selftests/net/netfilter/nft_meta.sh new file mode 100755 index 000000000000..71505b6cb252 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_meta.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +# check iif/iifname/oifgroup/iiftype match. + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" + +if ! nft --version > /dev/null 2>&1; then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +cleanup() +{ + ip netns del "$ns0" +} + +ip netns add "$ns0" +ip -net "$ns0" link set lo up +ip -net "$ns0" addr add 127.0.0.1 dev lo + +trap cleanup EXIT + +currentyear=$(date +%Y) +lastyear=$((currentyear-1)) +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table inet filter { + counter iifcount {} + counter iifnamecount {} + counter iifgroupcount {} + counter iiftypecount {} + counter infproto4count {} + counter il4protocounter {} + counter imarkcounter {} + counter icpu0counter {} + counter ilastyearcounter {} + counter icurrentyearcounter {} + + counter oifcount {} + counter oifnamecount {} + counter oifgroupcount {} + counter oiftypecount {} + counter onfproto4count {} + counter ol4protocounter {} + counter oskuidcounter {} + counter oskgidcounter {} + counter omarkcounter {} + + chain input { + type filter hook input priority 0; policy accept; + + meta iif lo counter name "iifcount" + meta iifname "lo" counter name "iifnamecount" + meta iifgroup "default" counter name "iifgroupcount" + meta iiftype "loopback" counter name "iiftypecount" + meta nfproto ipv4 counter name "infproto4count" + meta l4proto icmp counter name "il4protocounter" + meta mark 42 counter name "imarkcounter" + meta cpu 0 counter name "icpu0counter" + meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter + meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter + } + + chain output { + type filter hook output priority 0; policy accept; + meta oif lo counter name "oifcount" counter + meta oifname "lo" counter name "oifnamecount" + meta oifgroup "default" counter name "oifgroupcount" + meta oiftype "loopback" counter name "oiftypecount" + meta nfproto ipv4 counter name "onfproto4count" + meta l4proto icmp counter name "ol4protocounter" + meta skuid 0 counter name "oskuidcounter" + meta skgid 0 counter name "oskgidcounter" + meta mark 42 counter name "omarkcounter" + } +} +EOF + +if [ $? -ne 0 ]; then + echo "SKIP: Could not add test ruleset" + exit $ksft_skip +fi + +ret=0 + +check_one_counter() +{ + local cname="$1" + local want="packets $2" + local verbose="$3" + + if ! ip netns exec "$ns0" nft list counter inet filter "$cname" | grep -q "$want"; then + echo "FAIL: $cname, want \"$want\", got" + ret=1 + ip netns exec "$ns0" nft list counter inet filter "$cname" + fi +} + +check_lo_counters() +{ + local want="$1" + local verbose="$2" + local counter + + for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \ + oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \ + il4protocounter icurrentyearcounter ol4protocounter \ + ; do + check_one_counter "$counter" "$want" "$verbose" + done +} + +check_lo_counters "0" false +ip netns exec "$ns0" ping -q -c 1 127.0.0.1 -m 42 > /dev/null + +check_lo_counters "2" true + +check_one_counter oskuidcounter "1" true +check_one_counter oskgidcounter "1" true +check_one_counter imarkcounter "1" true +check_one_counter omarkcounter "1" true +check_one_counter ilastyearcounter "0" true + +if [ $ret -eq 0 ];then + echo "OK: nftables meta iif/oif counters at expected values" +else + exit $ret +fi + +#First CPU execution and counter +taskset -p 01 $$ > /dev/null +ip netns exec "$ns0" nft reset counters > /dev/null +ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null +check_one_counter icpu0counter "2" true + +if [ $ret -eq 0 ];then + echo "OK: nftables meta cpu counter at expected values" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh new file mode 100755 index 000000000000..9e39de26455f --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_nat.sh @@ -0,0 +1,1156 @@ +#!/bin/bash +# +# This test is for basic NAT functionality: snat, dnat, redirect, masquerade. +# + +source lib.sh + +ret=0 +test_inet_nat=true + +checktool "nft --version" "run test without nft tool" +checktool "socat -h" "run test without socat" + +cleanup() +{ + ip netns pids "$ns0" | xargs kill 2>/dev/null + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + + rm -f "$INFILE" "$OUTFILE" + + cleanup_all_ns +} + +trap cleanup EXIT + +INFILE=$(mktemp) +OUTFILE=$(mktemp) + +setup_ns ns0 ns1 ns2 + +if ! ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1;then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns2" + +ip -net "$ns0" link set veth0 up +ip -net "$ns0" addr add 10.0.1.1/24 dev veth0 +ip -net "$ns0" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$ns0" link set veth1 up +ip -net "$ns0" addr add 10.0.2.1/24 dev veth1 +ip -net "$ns0" addr add dead:2::1/64 dev veth1 nodad + +do_config() +{ + ns="$1" + subnet="$2" + + ip -net "$ns" link set eth0 up + ip -net "$ns" addr add "10.0.$subnet.99/24" dev eth0 + ip -net "$ns" route add default via "10.0.$subnet.1" + ip -net "$ns" addr add "dead:$subnet::99/64" dev eth0 nodad + ip -net "$ns" route add default via "dead:$subnet::1" +} + +do_config "$ns1" 1 +do_config "$ns2" 2 + +bad_counter() +{ + local ns=$1 + local counter=$2 + local expect=$3 + local tag=$4 + + echo "ERROR: $counter counter in $ns has unexpected value (expected $expect) at $tag" 1>&2 + ip netns exec "$ns" nft list counter inet filter "$counter" 1>&2 +} + +check_counters() +{ + ns=$1 + local lret=0 + + if ! ip netns exec "$ns" nft list counter inet filter ns0in | grep -q "packets 1 bytes 84";then + bad_counter "$ns" ns0in "packets 1 bytes 84" "check_counters 1" + lret=1 + fi + + if ! ip netns exec "$ns" nft list counter inet filter ns0out | grep -q "packets 1 bytes 84";then + bad_counter "$ns" ns0out "packets 1 bytes 84" "check_counters 2" + lret=1 + fi + + expect="packets 1 bytes 104" + if ! ip netns exec "$ns" nft list counter inet filter ns0in6 | grep -q "$expect";then + bad_counter "$ns" ns0in6 "$expect" "check_counters 3" + lret=1 + fi + if ! ip netns exec "$ns" nft list counter inet filter ns0out6 | grep -q "$expect";then + bad_counter "$ns" ns0out6 "$expect" "check_counters 4" + lret=1 + fi + + return $lret +} + +check_ns0_counters() +{ + local ns=$1 + local lret=0 + + if ! ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0";then + bad_counter "$ns0" ns0in "packets 0 bytes 0" "check_ns0_counters 1" + lret=1 + fi + + if ! ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0";then + bad_counter "$ns0" ns0in6 "packets 0 bytes 0" + lret=1 + fi + + if ! ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0";then + bad_counter "$ns0" ns0out "packets 0 bytes 0" "check_ns0_counters 2" + lret=1 + fi + if ! ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0";then + bad_counter "$ns0" ns0out6 "packets 0 bytes 0" "check_ns0_counters3 " + lret=1 + fi + + for dir in "in" "out" ; do + expect="packets 1 bytes 84" + if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}" | grep -q "$expect";then + bad_counter "$ns0" "$ns${dir}" "$expect" "check_ns0_counters 4" + lret=1 + fi + + expect="packets 1 bytes 104" + if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}6" | grep -q "$expect";then + bad_counter "$ns0" "$ns${dir}6" "$expect" "check_ns0_counters 5" + lret=1 + fi + done + + return $lret +} + +reset_counters() +{ + for i in "$ns0" "$ns1" "$ns2" ;do + ip netns exec "$i" nft reset counters inet > /dev/null + done +} + +test_local_dnat6() +{ + local family=$1 + local lret=0 + local IPF="" + + if [ "$family" = "inet" ];then + IPF="ip6" + fi + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain output { + type nat hook output priority 0; policy accept; + ip6 daddr dead:1::99 dnat $IPF to dead:2::99 + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family dnat hook" + return $ksft_skip + fi + + # ping netns1, expect rewrite to netns2 + if ! ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null;then + lret=1 + echo "ERROR: ping6 failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat6 1" + lret=1 + fi + done + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat6 2" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat6 3" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat6 4" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ipv6 ping to $ns1 was $family NATted to $ns2" + ip netns exec "$ns0" nft flush chain ip6 nat output + + return $lret +} + +test_local_dnat() +{ + local family=$1 + local lret=0 + local IPF="" + + if [ "$family" = "inet" ];then + IPF="ip" + fi + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF 2>/dev/null +table $family nat { + chain output { + type nat hook output priority 0; policy accept; + ip daddr 10.0.1.99 dnat $IPF to 10.0.2.99 + } +} +EOF + if [ $? -ne 0 ]; then + if [ "$family" = "inet" ];then + echo "SKIP: inet nat tests" + test_inet_nat=false + return $ksft_skip + fi + echo "SKIP: Could not add add $family dnat hook" + return $ksft_skip + fi + + # ping netns1, expect rewrite to netns2 + if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns0" "ns1$dir" "$expect" "test_local_dnat 1" + lret=1 + fi + done + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns0" "ns2$dir" "$expect" "test_local_dnat 2" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect";then + bad_counter "$ns1" "ns0$dir" "$expect" "test_local_dnat 3" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect";then + bad_counter "$ns2" "ns0$dir" "$expect" "test_local_dnat 4" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to $ns1 was $family NATted to $ns2" + + ip netns exec "$ns0" nft flush chain "$family" nat output + + reset_counters + if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns1$dir "$expect" "test_local_dnat 5" + lret=1 + fi + done + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 6" + lret=1 + fi + done + + # expect 1 count in ns1 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns0" ns0$dir "$expect" "test_local_dnat 7" + lret=1 + fi + done + + # expect 0 packet in ns2 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 8" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to $ns1 OK after $family nat output chain flush" + + return $lret +} + +listener_ready() +{ + local ns="$1" + local port="$2" + local proto="$3" + ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port" +} + +test_local_dnat_portonly() +{ + local family=$1 + local daddr=$2 + local lret=0 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain output { + type nat hook output priority 0; policy accept; + meta l4proto tcp dnat to :2000 + + } +} +EOF + if [ $? -ne 0 ]; then + if [ "$family" = "inet" ];then + echo "SKIP: inet port test" + test_inet_nat=false + return + fi + echo "SKIP: Could not add $family dnat hook" + return + fi + + echo "SERVER-$family" | ip netns exec "$ns1" timeout 3 socat -u STDIN TCP-LISTEN:2000 & + + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 2000 "-t" + + result=$(ip netns exec "$ns0" timeout 1 socat -u TCP:"$daddr":2000 STDOUT) + + if [ "$result" = "SERVER-inet" ];then + echo "PASS: inet port rewrite without l3 address" + else + echo "ERROR: inet port rewrite without l3 address, got $result" + ret=1 + fi +} + +test_masquerade6() +{ + local family=$1 + local natflags=$2 + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 via ipv6" + return 1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade6 1" + lret=1 + fi + + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade6 2" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade $natflags + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family masquerade hook" + return $ksft_skip + fi + + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 3" + lret=1 + fi + + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 4" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 5" + lret=1 + fi + + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade6 6" + lret=1 + fi + done + + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 with active ipv6 masquerade $natflags (attempt 2)" + lret=1 + fi + + if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting;then + echo "ERROR: Could not flush $family nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: $family IPv6 masquerade $natflags for $ns2" + + return $lret +} + +test_masquerade() +{ + local family=$1 + local natflags=$2 + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 $natflags" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade 1" + lret=1 + fi + + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 2" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade $natflags + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family masquerade hook" + return $ksft_skip + fi + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 3" + lret=1 + fi + + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 4" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 5" + lret=1 + fi + + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade 6" + lret=1 + fi + done + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 with active ip masquerade $natflags (attempt 2)" + lret=1 + fi + + if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting; then + echo "ERROR: Could not flush $family nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: $family IP masquerade $natflags for $ns2" + + return $lret +} + +test_redirect6() +{ + local family=$1 + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then + echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6" + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns2$dir "$expect" "test_redirect6 1" + lret=1 + fi + + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" ns1$dir "$expect" "test_redirect6 2" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family redirect hook" + return $ksft_skip + fi + + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 via ipv6 with active $family redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 3" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 4" + lret=1 + fi + done + + if ! ip netns exec "$ns0" nft delete table "$family" nat;then + echo "ERROR: Could not delete $family nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: $family IPv6 redirection for $ns2" + + return $lret +} + +test_redirect() +{ + local family=$1 + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" "$ns2$dir" "$expect" "test_redirect 1" + lret=1 + fi + + if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then + bad_counter "$ns2" ns1$dir "$expect" "test_redirect 2" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family redirect hook" + return $ksft_skip + fi + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 with active $family ip redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_redirect 3" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns0" ns0$dir "$expect" "test_redirect 4" + lret=1 + fi + done + + if ! ip netns exec "$ns0" nft delete table "$family" nat;then + echo "ERROR: Could not delete $family nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: $family IP redirection for $ns2" + + return $lret +} + +# test port shadowing. +# create two listening services, one on router (ns0), one +# on client (ns2), which is masqueraded from ns1 point of view. +# ns2 sends udp packet coming from service port to ns1, on a highport. +# Later, if n1 uses same highport to connect to ns0:service, packet +# might be port-forwarded to ns2 instead. + +# second argument tells if we expect the 'fake-entry' to take effect +# (CLIENT) or not (ROUTER). +test_port_shadow() +{ + local test=$1 + local expect=$2 + local daddrc="10.0.1.99" + local daddrs="10.0.1.1" + local result="" + local logmsg="" + + # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. + echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405 + + echo ROUTER | ip netns exec "$ns0" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405 2>/dev/null & + local sc_r=$! + echo CLIENT | ip netns exec "$ns2" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405,reuseport 2>/dev/null & + local sc_c=$! + + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1405 "-u" + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" 1405 "-u" + + # ns1 tries to connect to ns0:1405. With default settings this should connect + # to client, it matches the conntrack entry created above. + + result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404) + + if [ "$result" = "$expect" ] ;then + echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}" + else + echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended" + ret=1 + fi + + kill $sc_r $sc_c 2>/dev/null + + # flush udp entries for next test round, if any + ip netns exec "$ns0" conntrack -F >/dev/null 2>&1 +} + +# This prevents port shadow of router service via packet filter, +# packets claiming to originate from service port from internal +# network are dropped. +test_port_shadow_filter() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family filter { + chain forward { + type filter hook forward priority 0; policy accept; + meta iif veth1 udp sport 1405 drop + } +} +EOF + test_port_shadow "port-filter" "ROUTER" + + ip netns exec "$ns0" nft delete table "$family" filter +} + +# This prevents port shadow of router service via notrack. +test_port_shadow_notrack() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family raw { + chain prerouting { + type filter hook prerouting priority -300; policy accept; + meta iif veth0 udp dport 1405 notrack + } + chain output { + type filter hook output priority -300; policy accept; + meta oif veth0 udp sport 1405 notrack + } +} +EOF + test_port_shadow "port-notrack" "ROUTER" + + ip netns exec "$ns0" nft delete table "$family" raw +} + +# This prevents port shadow of router service via sport remap. +test_port_shadow_pat() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family pat { + chain postrouting { + type nat hook postrouting priority -1; policy accept; + meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random + } +} +EOF + test_port_shadow "pat" "ROUTER" + + ip netns exec "$ns0" nft delete table "$family" pat +} + +test_port_shadowing() +{ + local family="ip" + + if ! conntrack -h >/dev/null 2>&1;then + echo "SKIP: Could not run nat port shadowing test without conntrack tool" + return + fi + + if ! socat -h > /dev/null 2>&1;then + echo "SKIP: Could not run nat port shadowing test without socat tool" + return + fi + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family masquerade hook" + return $ksft_skip + fi + + # test default behaviour. Packet from ns1 to ns0 is redirected to ns2. + test_port_shadow "default" "CLIENT" + + # test packet filter based mitigation: prevent forwarding of + # packets claiming to come from the service port. + test_port_shadow_filter "$family" + + # test conntrack based mitigation: connections going or coming + # from router:service bypass connection tracking. + test_port_shadow_notrack "$family" + + # test nat based mitigation: fowarded packets coming from service port + # are masqueraded with random highport. + test_port_shadow_pat "$family" + + ip netns exec "$ns0" nft delete table $family nat +} + +test_stateless_nat_ip() +{ + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules" + return 1 + fi + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table ip stateless { + map xlate_in { + typeof meta iifname . ip saddr . ip daddr : ip daddr + elements = { + "veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2, + } + } + map xlate_out { + typeof meta iifname . ip saddr . ip daddr : ip daddr + elements = { + "veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99 + } + } + + chain prerouting { + type filter hook prerouting priority -400; policy accept; + ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in + ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add ip statless rules" + return $ksft_skip + fi + + reset_counters + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null; then + echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules" + lret=1 + fi + + # ns1 should have seen packets from .2.2, due to stateless rewrite. + expect="packets 1 bytes 84" + if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 1" + lret=1 + fi + + for dir in "in" "out" ; do + if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then + bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3" + lret=1 + fi + + if ! ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect";then + bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4" + lret=1 + fi + done + + reset_counters + + if ! socat -h > /dev/null 2>&1;then + echo "SKIP: Could not run stateless nat frag test without socat tool" + if [ $lret -eq 0 ]; then + return $ksft_skip + fi + + ip netns exec "$ns0" nft delete table ip stateless + return $lret + fi + + dd if=/dev/urandom of="$INFILE" bs=4096 count=1 2>/dev/null + + ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:"$OUTFILE" < /dev/null 2>/dev/null & + + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 4233 "-u" + + # re-do with large ping -> ip fragmentation + if ! ip netns exec "$ns2" timeout 3 socat -u STDIN UDP4-SENDTO:"10.0.1.99:4233" < "$INFILE" > /dev/null;then + echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2 + lret=1 + fi + + wait + + if ! cmp "$INFILE" "$OUTFILE";then + ls -l "$INFILE" "$OUTFILE" + echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2 + lret=1 + fi + + :> "$OUTFILE" + + # ns1 should have seen packets from 2.2, due to stateless rewrite. + expect="packets 3 bytes 4164" + if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 5" + lret=1 + fi + + if ! ip netns exec "$ns0" nft delete table ip stateless; then + echo "ERROR: Could not delete table ip stateless" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP statless for $ns2" + + return $lret +} + +# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 +for i in "$ns0" "$ns1" "$ns2" ;do +ip netns exec "$i" nft -f /dev/stdin <<EOF +table inet filter { + counter ns0in {} + counter ns1in {} + counter ns2in {} + + counter ns0out {} + counter ns1out {} + counter ns2out {} + + counter ns0in6 {} + counter ns1in6 {} + counter ns2in6 {} + + counter ns0out6 {} + counter ns1out6 {} + counter ns2out6 {} + + map nsincounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0in", + 10.0.2.1 : "ns0in", + 10.0.1.99 : "ns1in", + 10.0.2.99 : "ns2in" } + } + + map nsincounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0in6", + dead:2::1 : "ns0in6", + dead:1::99 : "ns1in6", + dead:2::99 : "ns2in6" } + } + + map nsoutcounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0out", + 10.0.2.1 : "ns0out", + 10.0.1.99: "ns1out", + 10.0.2.99: "ns2out" } + } + + map nsoutcounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0out6", + dead:2::1 : "ns0out6", + dead:1::99 : "ns1out6", + dead:2::99 : "ns2out6" } + } + + chain input { + type filter hook input priority 0; policy accept; + counter name ip saddr map @nsincounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6 + } + chain output { + type filter hook output priority 0; policy accept; + counter name ip daddr map @nsoutcounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6 + } +} +EOF +done + +# special case for stateless nat check, counter needs to +# be done before (input) ip defragmentation +ip netns exec "$ns1" nft -f /dev/stdin <<EOF +table inet filter { + counter ns0insl {} + + chain pre { + type filter hook prerouting priority -400; policy accept; + ip saddr 10.0.2.2 counter name "ns0insl" + } +} +EOF + +ping_basic() +{ + i="$1" + if ! ip netns exec "$ns0" ping -c 1 -q 10.0."$i".99 > /dev/null;then + echo "ERROR: Could not reach other namespace(s)" 1>&2 + ret=1 + fi + + if ! ip netns exec "$ns0" ping -c 1 -q dead:"$i"::99 > /dev/null;then + echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 + ret=1 + fi +} + +test_basic_conn() +{ + local nsexec + name="$1" + + nsexec=$(eval echo \$"$1") + + ping_basic 1 + ping_basic 2 + + if ! check_counters "$nsexec";then + return 1 + fi + + if ! check_ns0_counters "$name";then + return 1 + fi + + reset_counters + return 0 +} + +if ! test_basic_conn "ns1" ; then + echo "ERROR: basic test for ns1 failed" 1>&2 + exit 1 +fi +if ! test_basic_conn "ns2"; then + echo "ERROR: basic test for ns1 failed" 1>&2 +fi + +if [ $ret -eq 0 ];then + echo "PASS: netns routing/connectivity: $ns0 can reach $ns1 and $ns2" +fi + +reset_counters +test_local_dnat ip +test_local_dnat6 ip6 + +reset_counters +test_local_dnat_portonly inet 10.0.1.99 + +reset_counters +$test_inet_nat && test_local_dnat inet +$test_inet_nat && test_local_dnat6 inet + +for flags in "" "fully-random"; do +reset_counters +test_masquerade ip $flags +test_masquerade6 ip6 $flags +reset_counters +$test_inet_nat && test_masquerade inet $flags +$test_inet_nat && test_masquerade6 inet $flags +done + +reset_counters +test_redirect ip +test_redirect6 ip6 +reset_counters +$test_inet_nat && test_redirect inet +$test_inet_nat && test_redirect6 inet + +test_port_shadowing +test_stateless_nat_ip + +if [ $ret -ne 0 ];then + echo -n "FAIL: " + nft --version +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh new file mode 100755 index 000000000000..9f200f80253a --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh @@ -0,0 +1,265 @@ +#!/bin/bash +# +# Test connection tracking zone and NAT source port reallocation support. +# + +source lib.sh + +# Don't increase too much, 2000 clients should work +# just fine but script can then take several minutes with +# KASAN/debug builds. +maxclients=100 + +have_socat=0 +ret=0 + +[ "$KSFT_MACHINE_SLOW" = yes ] && maxclients=40 +# client1---. +# veth1-. +# | +# NAT Gateway --veth0--> Server +# | | +# veth2-' | +# client2---' | +# .... | +# clientX----vethX---' + +# All clients share identical IP address. +# NAT Gateway uses policy routing and conntrack zones to isolate client +# namespaces. Each client connects to Server, each with colliding tuples: +# clientsaddr:10000 -> serveraddr:dport +# NAT Gateway is supposed to do port reallocation for each of the +# connections. + +v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null) +v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null) +v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null) +v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null) +v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null) +v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null) + +cleanup() +{ + cleanup_all_ns + + sysctl -q net.ipv4.neigh.default.gc_thresh1="$v4gc1" 2>/dev/null + sysctl -q net.ipv4.neigh.default.gc_thresh2="$v4gc2" 2>/dev/null + sysctl -q net.ipv4.neigh.default.gc_thresh3="$v4gc3" 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh1="$v6gc1" 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh2="$v6gc2" 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh3="$v6gc3" 2>/dev/null +} + +checktool "nft --version" echo "run test without nft tool" +checktool "conntrack -V" "run test without conntrack tool" + +if socat -h >/dev/null 2>&1; then + have_socat=1 +fi + +setup_ns gw srv + +trap cleanup EXIT + +ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv" +ip -net "$gw" link set veth0 up +ip -net "$srv" link set eth0 up + +sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null +sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null +sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null +sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null +sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null +sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null + +for i in $(seq 1 "$maxclients");do + setup_ns "cl$i" + + cl=$(eval echo \$cl"$i") + if ! ip link add veth"$i" netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1;then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip + fi +done + +for i in $(seq 1 "$maxclients");do + cl=$(eval echo \$cl"$i") + echo netns exec "$cl" ip link set eth0 up + echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2 + echo netns exec "$gw" ip link set "veth$i" up + echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2 + + # clients have same IP addresses. + echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0 + echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0 nodad + echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0 + echo netns exec "$cl" ip route add default via dead:1::2 dev eth0 + + # NB: same addresses on client-facing interfaces. + echo netns exec "$gw" ip addr add 10.1.0.2/24 dev "veth$i" + echo netns exec "$gw" ip addr add dead:1::2/64 dev "veth$i" nodad + + # gw: policy routing + echo netns exec "$gw" ip route add 10.1.0.0/24 dev "veth$i" table $((1000+i)) + echo netns exec "$gw" ip route add dead:1::0/64 dev "veth$i" table $((1000+i)) + echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i)) + echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i)) + echo netns exec "$gw" ip rule add fwmark "$i" lookup $((1000+i)) +done | ip -batch /dev/stdin + +ip -net "$gw" addr add 10.3.0.1/24 dev veth0 +ip -net "$gw" addr add dead:3::1/64 dev veth0 nodad + +ip -net "$srv" addr add 10.3.0.99/24 dev eth0 +ip -net "$srv" addr add dead:3::99/64 dev eth0 nodad + +ip netns exec "$gw" nft -f /dev/stdin<<EOF +table inet raw { + map iiftomark { + type ifname : mark + } + + map iiftozone { + typeof iifname : ct zone + } + + set inicmp { + flags dynamic + type ipv4_addr . ifname . ipv4_addr + } + set inflows { + flags dynamic + type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service + } + + set inflows6 { + flags dynamic + type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service + } + + chain prerouting { + type filter hook prerouting priority -64000; policy accept; + ct original zone set meta iifname map @iiftozone + meta mark set meta iifname map @iiftomark + + tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter } + add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter } + ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter } + } + + chain nat_postrouting { + type nat hook postrouting priority 0; policy accept; + ct mark set meta mark meta oifname veth0 masquerade + } + + chain mangle_prerouting { + type filter hook prerouting priority -100; policy accept; + ct direction reply meta mark set ct mark + } +} +EOF +if [ "$?" -ne 0 ];then + echo "SKIP: Could not add nftables rules" + exit $ksft_skip +fi + +( echo add element inet raw iiftomark \{ + for i in $(seq 1 $((maxclients-1))); do + echo \"veth"$i"\" : "$i", + done + echo \"veth"$maxclients"\" : "$maxclients" \} + echo add element inet raw iiftozone \{ + for i in $(seq 1 $((maxclients-1))); do + echo \"veth"$i"\" : "$i", + done + echo \"veth$maxclients\" : $maxclients \} +) | ip netns exec "$gw" nft -f /dev/stdin + +ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null +ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null + +# useful for debugging: allows to use 'ping' from clients to gateway. +ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null +ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null + +for i in $(seq 1 "$maxclients"); do + cl=$(eval echo \$cl"$i") + ip netns exec "$cl" ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 & +done + +wait || ret=1 + +[ "$ret" -ne 0 ] && "FAIL: Ping failure from $cl" 1>&2 + +for i in $(seq 1 "$maxclients"); do + if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"; then + ret=1 + echo "FAIL: counter icmp mismatch for veth$i" 1>&2 + ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2 + break + fi +done + +if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }"; then + ret=1 + echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }" + ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2 +fi + +if [ $ret -eq 0 ]; then + echo "PASS: ping test from all $maxclients namespaces" +fi + +if [ $have_socat -eq 0 ];then + echo "SKIP: socat not installed" + if [ $ret -ne 0 ];then + exit $ret + fi + exit $ksft_skip +fi + +listener_ready() +{ + ss -N "$1" -lnt -o "sport = :5201" | grep -q 5201 +} + +ip netns exec "$srv" socat -u TCP-LISTEN:5201,fork STDOUT > /dev/null 2>/dev/null & +socatpid=$! + +busywait 1000 listener_ready "$srv" + +for i in $(seq 1 "$maxclients"); do + if [ $ret -ne 0 ]; then + break + fi + cl=$(eval echo \$cl"$i") + if ! ip netns exec "$cl" socat -4 -u STDIN TCP:10.3.0.99:5201,sourceport=10000 < /dev/null > /dev/null; then + echo "FAIL: Failure to connect for $cl" 1>&2 + ip netns exec "$gw" conntrack -S 1>&2 + ret=1 + fi +done +if [ $ret -eq 0 ];then + echo "PASS: socat connections for all $maxclients net namespaces" +fi + +kill $socatpid +wait + +for i in $(seq 1 "$maxclients"); do + if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null;then + ret=1 + echo "FAIL: can't find expected tcp entry for veth$i" 1>&2 + break + fi +done +if [ $ret -eq 0 ];then + echo "PASS: Found client connection for all $maxclients net namespaces" +fi + +if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null;then + ret=1 + echo "FAIL: cannot find return entry on veth0" 1>&2 +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh new file mode 100755 index 000000000000..6136ceec45e0 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -0,0 +1,672 @@ +#!/bin/bash +# +# This tests nf_queue: +# 1. can process packets from all hooks +# 2. support running nfqueue from more than one base chain +# +# shellcheck disable=SC2162,SC2317 + +source lib.sh +ret=0 +timeout=5 + +SCTP_TEST_TIMEOUT=60 + +cleanup() +{ + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + ip netns pids "$nsrouter" | xargs kill 2>/dev/null + + cleanup_all_ns + + rm -f "$TMPINPUT" + rm -f "$TMPFILE0" + rm -f "$TMPFILE1" + rm -f "$TMPFILE2" "$TMPFILE3" +} + +checktool "nft --version" "test without nft tool" +checktool "socat -h" "run test without socat" + +modprobe -q sctp + +trap cleanup EXIT + +setup_ns ns1 ns2 ns3 nsrouter + +TMPFILE0=$(mktemp) +TMPFILE1=$(mktemp) +TMPFILE2=$(mktemp) +TMPFILE3=$(mktemp) + +TMPINPUT=$(mktemp) +COUNT=200 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=$((COUNT/8)) +dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT" + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" +ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3" + +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsrouter" link set veth1 up +ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad + +ip -net "$nsrouter" link set veth2 up +ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2 +ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad + +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up +ip -net "$ns3" link set eth0 up + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" route add default via dead:1::1 + +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +ip -net "$ns3" addr add 10.0.3.99/24 dev eth0 +ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad +ip -net "$ns3" route add default via 10.0.3.1 +ip -net "$ns3" route add default via dead:3::1 + +load_ruleset() { + local name=$1 + local prio=$2 + +ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +table inet $name { + chain nfq { + ip protocol icmp queue bypass + icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass + } + chain pre { + type filter hook prerouting priority $prio; policy accept; + jump nfq + } + chain input { + type filter hook input priority $prio; policy accept; + jump nfq + } + chain forward { + type filter hook forward priority $prio; policy accept; + tcp dport 12345 queue num 2 + jump nfq + } + chain output { + type filter hook output priority $prio; policy accept; + tcp dport 12345 queue num 3 + tcp sport 23456 queue num 3 + jump nfq + } + chain post { + type filter hook postrouting priority $prio; policy accept; + jump nfq + } +} +EOF +} + +load_counter_ruleset() { + local prio=$1 + +ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +table inet countrules { + chain pre { + type filter hook prerouting priority $prio; policy accept; + counter + } + chain input { + type filter hook input priority $prio; policy accept; + counter + } + chain forward { + type filter hook forward priority $prio; policy accept; + counter + } + chain output { + type filter hook output priority $prio; policy accept; + counter + } + chain post { + type filter hook postrouting priority $prio; policy accept; + counter + } +} +EOF +} + +test_ping() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then + return 2 + fi + + return 0 +} + +test_ping_router() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then + return 3 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then + return 4 + fi + + return 0 +} + +test_queue_blackhole() { + local proto=$1 + +ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +table $proto blackh { + chain forward { + type filter hook forward priority 0; policy accept; + queue num 600 + } +} +EOF + if [ "$proto" = "ip" ] ;then + ip netns exec "$ns1" ping -W 2 -c 1 -q 10.0.2.99 > /dev/null + lret=$? + elif [ "$proto" = "ip6" ]; then + ip netns exec "$ns1" ping -W 2 -c 1 -q dead:2::99 > /dev/null + lret=$? + else + lret=111 + fi + + # queue without bypass keyword should drop traffic if no listener exists. + if [ "$lret" -eq 0 ];then + echo "FAIL: $proto expected failure, got $lret" 1>&2 + exit 1 + fi + + if ! ip netns exec "$nsrouter" nft delete table "$proto" blackh; then + echo "FAIL: $proto: Could not delete blackh table" + exit 1 + fi + + echo "PASS: $proto: statement with no listener results in packet drop" +} + +nf_queue_wait() +{ + local procfile="/proc/self/net/netfilter/nfnetlink_queue" + local netns id + + netns="$1" + id="$2" + + # if this file doesn't exist, nfnetlink_module isn't loaded. + # rather than loading it ourselves, wait for kernel module autoload + # completion, nfnetlink should do so automatically because nf_queue + # helper program, spawned in the background, asked for this functionality. + test -f "$procfile" && + ip netns exec "$netns" cat "$procfile" | grep -q "^ *$id " +} + +test_queue() +{ + local expected="$1" + local last="" + + # spawn nf_queue listeners + ip netns exec "$nsrouter" ./nf_queue -c -q 0 -t $timeout > "$TMPFILE0" & + ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t $timeout > "$TMPFILE1" & + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 0 + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1 + + if ! test_ping;then + echo "FAIL: netns routing/connectivity with active listener on queues 0 and 1: $ret" 1>&2 + exit $ret + fi + + if ! test_ping_router;then + echo "FAIL: netns router unreachable listener on queue 0 and 1: $ret" 1>&2 + exit $ret + fi + + wait + ret=$? + + for file in $TMPFILE0 $TMPFILE1; do + last=$(tail -n1 "$file") + if [ x"$last" != x"$expected packets total" ]; then + echo "FAIL: Expected $expected packets total, but got $last" 1>&2 + ip netns exec "$nsrouter" nft list ruleset + exit 1 + fi + done + + echo "PASS: Expected and received $last" +} + +listener_ready() +{ + ss -N "$1" -lnt -o "sport = :12345" | grep -q 12345 +} + +test_tcp_forward() +{ + ip netns exec "$nsrouter" ./nf_queue -q 2 & + local nfqpid=$! + + timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2 + + local tthen=$(date +%s) + + ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + wait_and_check_retval "$rpid" "tcp and nfqueue in forward chain" "$tthen" + kill "$nfqpid" +} + +test_tcp_localhost() +{ + timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & + local rpid=$! + + ip netns exec "$nsrouter" ./nf_queue -q 3 & + local nfqpid=$! + local tthen=$(date +%s) + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3 + + ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null + + wait_and_check_retval "$rpid" "tcp via loopback" "$tthen" + kill "$nfqpid" +} + +test_tcp_localhost_connectclose() +{ + ip netns exec "$nsrouter" ./nf_queue -q 3 & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3 + + timeout 10 ip netns exec "$nsrouter" ./connect_close -p 23456 -t 3 + + kill "$nfqpid" + wait && echo "PASS: tcp via loopback with connect/close" +} + +test_tcp_localhost_requeue() +{ +ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain output { + type filter hook output priority 0; policy accept; + tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0 + } + chain post { + type filter hook postrouting priority 0; policy accept; + tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0 + } +} +EOF + timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & + local rpid=$! + + ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t "$timeout" > "$TMPFILE2" & + + # nfqueue 1 will be called via output hook. But this time, + # re-queue the packet to nfqueue program on queue 2. + ip netns exec "$nsrouter" ./nf_queue -G -d 150 -c -q 0 -Q 1 -t "$timeout" > "$TMPFILE3" & + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" + ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" > /dev/null + + wait + + if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then + echo "FAIL: lost packets during requeue?!" 1>&2 + return + fi + + echo "PASS: tcp via loopback and re-queueing" +} + +test_icmp_vrf() { + if ! ip -net "$ns1" link add tvrf type vrf table 9876;then + echo "SKIP: Could not add vrf device" + return + fi + + ip -net "$ns1" li set eth0 master tvrf + ip -net "$ns1" li set tvrf up + + ip -net "$ns1" route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876 +ip netns exec "$ns1" nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain output { + type filter hook output priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } + chain post { + type filter hook postrouting priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } +} +EOF + ip netns exec "$ns1" ./nf_queue -q 1 & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1 + + ip netns exec "$ns1" ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null + + for n in output post; do + for d in tvrf eth0; do + if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then + kill "$nfqpid" + echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2 + ip netns exec "$ns1" nft list ruleset + ret=1 + return + fi + done + done + + kill "$nfqpid" + echo "PASS: icmp+nfqueue via vrf" +} + +sctp_listener_ready() +{ + ss -S -N "$1" -ln -o "sport = :12345" | grep -q 12345 +} + +check_output_files() +{ + local f1="$1" + local f2="$2" + local err="$3" + + if ! cmp "$f1" "$f2" ; then + echo "FAIL: $err: input and output file differ" 1>&2 + echo -n " Input file" 1>&2 + ls -l "$f1" 1>&2 + echo -n "Output file" 1>&2 + ls -l "$f2" 1>&2 + ret=1 + fi +} + +wait_and_check_retval() +{ + local rpid="$1" + local msg="$2" + local tthen="$3" + local tnow=$(date +%s) + + if wait "$rpid";then + echo -n "PASS: " + else + echo -n "FAIL: " + ret=1 + fi + + printf "%s (duration: %ds)\n" "$msg" $((tnow-tthen)) +} + +test_sctp_forward() +{ + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet sctpq { + chain forward { + type filter hook forward priority 0; policy accept; + sctp dport 12345 queue num 10 + } +} +EOF + timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" + + ip netns exec "$nsrouter" ./nf_queue -q 10 -G & + local nfqpid=$! + local tthen=$(date +%s) + + ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + if ! ip netns exec "$nsrouter" nft delete table inet sctpq; then + echo "FAIL: Could not delete sctpq table" + exit 1 + fi + + wait_and_check_retval "$rpid" "sctp and nfqueue in forward chain" "$tthen" + kill "$nfqpid" + + check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward" +} + +test_sctp_output() +{ + ip netns exec "$ns1" nft -f /dev/stdin <<EOF +table inet sctpq { + chain output { + type filter hook output priority 0; policy accept; + sctp dport 12345 queue num 11 + } +} +EOF + # reduce test file size, software segmentation causes sk wmem increase. + dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT" + + timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" + + ip netns exec "$ns1" ./nf_queue -q 11 & + local nfqpid=$! + local tthen=$(date +%s) + + ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + if ! ip netns exec "$ns1" nft delete table inet sctpq; then + echo "FAIL: Could not delete sctpq table" + exit 1 + fi + + # must wait before checking completeness of output file. + wait_and_check_retval "$rpid" "sctp and nfqueue in output chain with GSO" "$tthen" + kill "$nfqpid" + + check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output" +} + +udp_listener_ready() +{ + ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345 +} + +output_files_written() +{ + test -s "$1" && test -s "$2" +} + +test_udp_ct_race() +{ + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet udpq { + chain prerouting { + type nat hook prerouting priority dstnat - 5; policy accept; + ip daddr 10.6.6.6 udp dport 12345 counter dnat to numgen inc mod 2 map { 0 : 10.0.2.99, 1 : 10.0.3.99 } + } + chain postrouting { + type filter hook postrouting priority srcnat - 5; policy accept; + udp dport 12345 counter queue num 12 + } +} +EOF + :> "$TMPFILE1" + :> "$TMPFILE2" + + timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & + local rpid1=$! + + timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork,pf=ipv4 OPEN:"$TMPFILE2",trunc & + local rpid2=$! + + ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12 + + # Send two packets, one should end up in ns1, other in ns2. + # This is because nfqueue will delay packet for long enough so that + # second packet will not find existing conntrack entry. + echo "Packet 1" | ip netns exec "$ns1" socat -u STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 + echo "Packet 2" | ip netns exec "$ns1" socat -u STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 + + busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2" + + kill "$nfqpid" + + if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then + echo "FAIL: Expected One udp conntrack entry" + ip netns exec "$nsrouter" conntrack -L -p udp --dport 12345 + ret=1 + fi + + if ! ip netns exec "$nsrouter" nft delete table inet udpq; then + echo "FAIL: Could not delete udpq table" + ret=1 + return + fi + + NUMLINES1=$(wc -l < "$TMPFILE1") + NUMLINES2=$(wc -l < "$TMPFILE2") + + if [ "$NUMLINES1" -ne 1 ] || [ "$NUMLINES2" -ne 1 ]; then + ret=1 + echo "FAIL: uneven udp packet distribution: $NUMLINES1 $NUMLINES2" + echo -n "$TMPFILE1: ";cat "$TMPFILE1" + echo -n "$TMPFILE2: ";cat "$TMPFILE2" + return + fi + + echo "PASS: both udp receivers got one packet each" +} + +test_queue_removal() +{ + read tainted_then < /proc/sys/kernel/tainted + + ip netns exec "$ns1" nft -f - <<EOF +flush ruleset +table ip filter { + chain output { + type filter hook output priority 0; policy accept; + ip protocol icmp queue num 0 + } +} +EOF + ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0 + + ip netns exec "$ns1" ping -w 2 -f -c 10 127.0.0.1 -q >/dev/null + kill $nfqpid + + ip netns exec "$ns1" nft flush ruleset + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + if [ "$tainted_now" -eq 0 ];then + echo "PASS: queue program exiting while packets queued" + else + echo "TAINT: queue program exiting while packets queued" + dmesg + ret=1 + fi +} + +ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null + +load_ruleset "filter" 0 + +if test_ping; then + # queue bypass works (rules were skipped, no listener) + echo "PASS: ${ns1} can reach ${ns2}" +else + echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2 + exit $ret +fi + +test_queue_blackhole ip +test_queue_blackhole ip6 + +# dummy ruleset to add base chains between the +# queueing rules. We don't want the second reinject +# to re-execute the old hooks. +load_counter_ruleset 10 + +# we are hooking all: prerouting/input/forward/output/postrouting. +# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so: +# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply). +# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply. +# so we expect that userspace program receives 10 packets. +test_queue 10 + +# same. We queue to a second program as well. +load_ruleset "filter2" 20 +test_queue 20 +ip netns exec "$ns1" nft flush ruleset + +test_tcp_forward +test_tcp_localhost +test_tcp_localhost_connectclose +test_tcp_localhost_requeue +test_sctp_forward +test_sctp_output +test_udp_ct_race + +# should be last, adds vrf device in ns1 and changes routes +test_icmp_vrf +test_queue_removal + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_synproxy.sh b/tools/testing/selftests/net/netfilter/nft_synproxy.sh new file mode 100755 index 000000000000..293f667a6aec --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_synproxy.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ret=0 + +checktool "nft --version" "run test without nft tool" +checktool "iperf3 --version" "run test without iperf3" + +setup_ns nsr ns1 ns2 + +modprobe -q nf_conntrack + +cleanup() { + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + + cleanup_all_ns +} + +trap cleanup EXIT + +ip link add veth0 netns "$nsr" type veth peer name eth0 netns "$ns1" +ip link add veth1 netns "$nsr" type veth peer name eth0 netns "$ns2" + +for dev in veth0 veth1; do + ip -net "$nsr" link set "$dev" up +done + +ip -net "$nsr" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsr" addr add 10.0.2.1/24 dev veth1 + +ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth0.forwarding=1 +ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth1.forwarding=1 +ip netns exec "$nsr" sysctl -q net.netfilter.nf_conntrack_tcp_loose=0 + +for n in $ns1 $ns2; do + ip -net "$n" link set eth0 up +done +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns2" route add default via 10.0.2.1 + +# test basic connectivity +if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + echo "ERROR: $ns1 cannot reach $ns2" 1>&2 + exit 1 +fi + +if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then + echo "ERROR: $ns2 cannot reach $ns1" 1>&2 + exit 1 +fi + +ip netns exec "$ns2" iperf3 -s > /dev/null 2>&1 & +# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 & + +sleep 1 + +ip netns exec "$nsr" nft -f - <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority -300; policy accept; + meta iif veth0 tcp flags syn counter notrack + } + + chain forward { + type filter hook forward priority 0; policy accept; + + ct state new,established counter accept + + meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp + + ct state invalid counter drop + + # make ns2 unreachable w.o. tcp synproxy + tcp flags syn counter drop + } +} +EOF +if [ $? -ne 0 ]; then + echo "SKIP: Cannot add nft synproxy" + exit $ksft_skip +fi + +if ! ip netns exec "$ns1" timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null; then + echo "FAIL: iperf3 returned an error" 1>&2 + ret=1 + ip netns exec "$nsr" nft list ruleset +else + echo "PASS: synproxy connection successful" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh b/tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh new file mode 100755 index 000000000000..e208fb03eeb7 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh @@ -0,0 +1,358 @@ +#!/bin/bash +# +# This tests tproxy on the following scenario: +# +# +------------+ +# +-------+ | nsrouter | +-------+ +# |ns1 |.99 .1| |.1 .99| ns2| +# | eth0|---------------|veth0 veth1|------------------|eth0 | +# | | 10.0.1.0/24 | | 10.0.2.0/24 | | +# +-------+ dead:1::/64 | veth2 | dead:2::/64 +-------+ +# +------------+ +# |.1 +# | +# | +# | +-------+ +# | .99| ns3| +# +------------------------|eth0 | +# 10.0.3.0/24 | | +# dead:3::/64 +-------+ +# +# The tproxy implementation acts as an echo server so the client +# must receive the same message it sent if it has been proxied. +# If is not proxied the servers return PONG_NS# with the number +# of the namespace the server is running. +# +# shellcheck disable=SC2162,SC2317 + +source lib.sh +ret=0 +timeout=5 + +cleanup() +{ + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + ip netns pids "$ns3" | xargs kill 2>/dev/null + ip netns pids "$nsrouter" | xargs kill 2>/dev/null + + cleanup_all_ns +} + +checktool "nft --version" "test without nft tool" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT +setup_ns ns1 ns2 ns3 nsrouter + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" +ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3" + +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsrouter" link set veth1 up +ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad + +ip -net "$nsrouter" link set veth2 up +ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2 +ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad + +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up +ip -net "$ns3" link set eth0 up + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" route add default via dead:1::1 + +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +ip -net "$ns3" addr add 10.0.3.99/24 dev eth0 +ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad +ip -net "$ns3" route add default via 10.0.3.1 +ip -net "$ns3" route add default via dead:3::1 + +ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null + +test_ping() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then + return 2 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.3.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:3::99 > /dev/null; then + return 2 + fi + + return 0 +} + +test_ping_router() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then + return 3 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then + return 4 + fi + + return 0 +} + + +listener_ready() +{ + local ns="$1" + local port="$2" + local proto="$3" + ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port" +} + +test_tproxy() +{ + local traffic_origin="$1" + local ip_proto="$2" + local expect_ns1_ns2="$3" + local expect_ns1_ns3="$4" + local expect_nsrouter_ns2="$5" + local expect_nsrouter_ns3="$6" + + # derived variables + local testname="test_${ip_proto}_tcp_${traffic_origin}" + local socat_ipproto + local ns1_ip + local ns2_ip + local ns3_ip + local ns2_target + local ns3_target + local nftables_subject + local ip_command + + # socat 1.8.0 has a bug that requires to specify the IP family to bind (fixed in 1.8.0.1) + case $ip_proto in + "ip") + socat_ipproto="-4" + ns1_ip=10.0.1.99 + ns2_ip=10.0.2.99 + ns3_ip=10.0.3.99 + ns2_target="tcp:$ns2_ip:8080" + ns3_target="tcp:$ns3_ip:8080" + nftables_subject="ip daddr $ns2_ip tcp dport 8080" + ip_command="ip" + ;; + "ip6") + socat_ipproto="-6" + ns1_ip=dead:1::99 + ns2_ip=dead:2::99 + ns3_ip=dead:3::99 + ns2_target="tcp:[$ns2_ip]:8080" + ns3_target="tcp:[$ns3_ip]:8080" + nftables_subject="ip6 daddr $ns2_ip tcp dport 8080" + ip_command="ip -6" + ;; + *) + echo "FAIL: unsupported protocol" + exit 255 + ;; + esac + + case $traffic_origin in + # to capture the local originated traffic we need to mark the outgoing + # traffic so the policy based routing rule redirects it and can be processed + # in the prerouting chain. + "local") + nftables_rules=" +flush ruleset +table inet filter { + chain divert { + type filter hook prerouting priority 0; policy accept; + $nftables_subject tproxy $ip_proto to :12345 meta mark set 1 accept + } + chain output { + type route hook output priority 0; policy accept; + $nftables_subject meta mark set 1 accept + } +}" + ;; + "forward") + nftables_rules=" +flush ruleset +table inet filter { + chain divert { + type filter hook prerouting priority 0; policy accept; + $nftables_subject tproxy $ip_proto to :12345 meta mark set 1 accept + } +}" + ;; + *) + echo "FAIL: unsupported parameter for traffic origin" + exit 255 + ;; + esac + + # shellcheck disable=SC2046 # Intended splitting of ip_command + ip netns exec "$nsrouter" $ip_command rule add fwmark 1 table 100 + ip netns exec "$nsrouter" $ip_command route add local "${ns2_ip}" dev lo table 100 + echo "$nftables_rules" | ip netns exec "$nsrouter" nft -f /dev/stdin + + timeout "$timeout" ip netns exec "$nsrouter" socat "$socat_ipproto" tcp-listen:12345,fork,ip-transparent SYSTEM:"cat" 2>/dev/null & + local tproxy_pid=$! + + timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" tcp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null & + local server2_pid=$! + + timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" tcp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null & + local server3_pid=$! + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-t" + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" 8080 "-t" + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns3" 8080 "-t" + + local result + # request from ns1 to ns2 (forwarded traffic) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO "$ns2_target") + if [ "$result" == "$expect_ns1_ns2" ] ;then + echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2" + else + echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2, not \"${expect_ns1_ns2}\" as intended" + ret=1 + fi + + # request from ns1 to ns3(forwarded traffic) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO "$ns3_target") + if [ "$result" = "$expect_ns1_ns3" ] ;then + echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3" + else + echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3, not \"$expect_ns1_ns3\" as intended" + ret=1 + fi + + # request from nsrouter to ns2 (localy originated traffic) + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO "$ns2_target") + if [ "$result" == "$expect_nsrouter_ns2" ] ;then + echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2" + else + echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2, not \"$expect_nsrouter_ns2\" as intended" + ret=1 + fi + + # request from nsrouter to ns3 (localy originated traffic) + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO "$ns3_target") + if [ "$result" = "$expect_nsrouter_ns3" ] ;then + echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3" + else + echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3, not \"$expect_nsrouter_ns3\" as intended" + ret=1 + fi + + # cleanup + kill "$tproxy_pid" "$server2_pid" "$server3_pid" 2>/dev/null + # shellcheck disable=SC2046 # Intended splitting of ip_command + ip netns exec "$nsrouter" $ip_command rule del fwmark 1 table 100 + ip netns exec "$nsrouter" $ip_command route flush table 100 +} + + +test_ipv4_tcp_forward() +{ + local traffic_origin="forward" + local ip_proto="ip" + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="PONG_NS2" + local expect_nsrouter_ns3="PONG_NS3" + + test_tproxy "$traffic_origin" \ + "$ip_proto" \ + "$expect_ns1_ns2" \ + "$expect_ns1_ns3" \ + "$expect_nsrouter_ns2" \ + "$expect_nsrouter_ns3" +} + +test_ipv4_tcp_local() +{ + local traffic_origin="local" + local ip_proto="ip" + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="I_M_PROXIED" + local expect_nsrouter_ns3="PONG_NS3" + + test_tproxy "$traffic_origin" \ + "$ip_proto" \ + "$expect_ns1_ns2" \ + "$expect_ns1_ns3" \ + "$expect_nsrouter_ns2" \ + "$expect_nsrouter_ns3" +} + +test_ipv6_tcp_forward() +{ + local traffic_origin="forward" + local ip_proto="ip6" + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="PONG_NS2" + local expect_nsrouter_ns3="PONG_NS3" + + test_tproxy "$traffic_origin" \ + "$ip_proto" \ + "$expect_ns1_ns2" \ + "$expect_ns1_ns3" \ + "$expect_nsrouter_ns2" \ + "$expect_nsrouter_ns3" +} + +test_ipv6_tcp_local() +{ + local traffic_origin="local" + local ip_proto="ip6" + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="I_M_PROXIED" + local expect_nsrouter_ns3="PONG_NS3" + + test_tproxy "$traffic_origin" \ + "$ip_proto" \ + "$expect_ns1_ns2" \ + "$expect_ns1_ns3" \ + "$expect_nsrouter_ns2" \ + "$expect_nsrouter_ns3" +} + +if test_ping; then + # queue bypass works (rules were skipped, no listener) + echo "PASS: ${ns1} can reach ${ns2}" +else + echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2 + exit $ret +fi + +test_ipv4_tcp_forward +test_ipv4_tcp_local +test_ipv6_tcp_forward +test_ipv6_tcp_local + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh new file mode 100755 index 000000000000..d16de13fe5a7 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh @@ -0,0 +1,262 @@ +#!/bin/bash +# +# This tests tproxy on the following scenario: +# +# +------------+ +# +-------+ | nsrouter | +-------+ +# |ns1 |.99 .1| |.1 .99| ns2| +# | eth0|---------------|veth0 veth1|------------------|eth0 | +# | | 10.0.1.0/24 | | 10.0.2.0/24 | | +# +-------+ dead:1::/64 | veth2 | dead:2::/64 +-------+ +# +------------+ +# |.1 +# | +# | +# | +-------+ +# | .99| ns3| +# +------------------------|eth0 | +# 10.0.3.0/24 | | +# dead:3::/64 +-------+ +# +# The tproxy implementation acts as an echo server so the client +# must receive the same message it sent if it has been proxied. +# If is not proxied the servers return PONG_NS# with the number +# of the namespace the server is running. +# shellcheck disable=SC2162,SC2317 + +source lib.sh +ret=0 +# UDP is slow +timeout=15 + +cleanup() +{ + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + ip netns pids "$ns3" | xargs kill 2>/dev/null + ip netns pids "$nsrouter" | xargs kill 2>/dev/null + + cleanup_all_ns +} + +checktool "nft --version" "test without nft tool" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT +setup_ns ns1 ns2 ns3 nsrouter + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" +ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3" + +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsrouter" link set veth1 up +ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad + +ip -net "$nsrouter" link set veth2 up +ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2 +ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad + +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up +ip -net "$ns3" link set eth0 up + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" route add default via dead:1::1 + +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +ip -net "$ns3" addr add 10.0.3.99/24 dev eth0 +ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad +ip -net "$ns3" route add default via 10.0.3.1 +ip -net "$ns3" route add default via dead:3::1 + +ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null + +test_ping() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then + return 2 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.3.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:3::99 > /dev/null; then + return 2 + fi + + return 0 +} + +test_ping_router() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then + return 3 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then + return 4 + fi + + return 0 +} + + +listener_ready() +{ + local ns="$1" + local port="$2" + local proto="$3" + ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port" +} + +test_tproxy_udp_forward() +{ + local ip_proto="$1" + + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="PONG_NS2" + local expect_nsrouter_ns3="PONG_NS3" + + # derived variables + local testname="test_${ip_proto}_udp_forward" + local socat_ipproto + local ns1_ip + local ns2_ip + local ns3_ip + local ns1_ip_port + local ns2_ip_port + local ns3_ip_port + local ip_command + + # socat 1.8.0 has a bug that requires to specify the IP family to bind (fixed in 1.8.0.1) + case $ip_proto in + "ip") + socat_ipproto="-4" + ns1_ip=10.0.1.99 + ns2_ip=10.0.2.99 + ns3_ip=10.0.3.99 + ns1_ip_port="$ns1_ip:18888" + ns2_ip_port="$ns2_ip:8080" + ns3_ip_port="$ns3_ip:8080" + ip_command="ip" + ;; + "ip6") + socat_ipproto="-6" + ns1_ip=dead:1::99 + ns2_ip=dead:2::99 + ns3_ip=dead:3::99 + ns1_ip_port="[$ns1_ip]:18888" + ns2_ip_port="[$ns2_ip]:8080" + ns3_ip_port="[$ns3_ip]:8080" + ip_command="ip -6" + ;; + *) + echo "FAIL: unsupported protocol" + exit 255 + ;; + esac + + # shellcheck disable=SC2046 # Intended splitting of ip_command + ip netns exec "$nsrouter" $ip_command rule add fwmark 1 table 100 + ip netns exec "$nsrouter" $ip_command route add local "$ns2_ip" dev lo table 100 + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain divert { + type filter hook prerouting priority 0; policy accept; + $ip_proto daddr $ns2_ip udp dport 8080 tproxy $ip_proto to :12345 meta mark set 1 accept + } +} +EOF + + timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port" 2>/dev/null & + local tproxy_pid=$! + + timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null & + local server2_pid=$! + + timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null & + local server3_pid=$! + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-u" + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" 8080 "-u" + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns3" 8080 "-u" + + local result + # request from ns1 to ns2 (forwarded traffic) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888) + if [ "$result" == "$expect_ns1_ns2" ] ;then + echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2" + else + echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2, not \"${expect_ns1_ns2}\" as intended" + ret=1 + fi + + # request from ns1 to ns3 (forwarded traffic) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port") + if [ "$result" = "$expect_ns1_ns3" ] ;then + echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3" + else + echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3, not \"$expect_ns1_ns3\" as intended" + ret=1 + fi + + # request from nsrouter to ns2 (localy originated traffic) + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port") + if [ "$result" == "$expect_nsrouter_ns2" ] ;then + echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2" + else + echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2, not \"$expect_nsrouter_ns2\" as intended" + ret=1 + fi + + # request from nsrouter to ns3 (localy originated traffic) + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port") + if [ "$result" = "$expect_nsrouter_ns3" ] ;then + echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3" + else + echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3, not \"$expect_nsrouter_ns3\" as intended" + ret=1 + fi + + # cleanup + kill "$tproxy_pid" "$server2_pid" "$server3_pid" 2>/dev/null + # shellcheck disable=SC2046 # Intended splitting of ip_command + ip netns exec "$nsrouter" $ip_command rule del fwmark 1 table 100 + ip netns exec "$nsrouter" $ip_command route flush table 100 +} + + +if test_ping; then + # queue bypass works (rules were skipped, no listener) + echo "PASS: ${ns1} can reach ${ns2}" +else + echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2 + exit $ret +fi + +test_tproxy_udp_forward "ip" +test_tproxy_udp_forward "ip6" + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_zones_many.sh b/tools/testing/selftests/net/netfilter/nft_zones_many.sh new file mode 100755 index 000000000000..7db9982ba5a6 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_zones_many.sh @@ -0,0 +1,164 @@ +#!/bin/bash + +# Test insertion speed for packets with identical addresses/ports +# that are all placed in distinct conntrack zones. + +source lib.sh + +zones=2000 +[ "$KSFT_MACHINE_SLOW" = yes ] && zones=500 + +have_ct_tool=0 +ret=0 + +cleanup() +{ + cleanup_all_ns +} + +checktool "nft --version" "run test without nft tool" +checktool "socat -V" "run test without socat tool" + +setup_ns ns1 + +trap cleanup EXIT + +if conntrack -V > /dev/null 2>&1; then + have_ct_tool=1 +fi + +test_zones() { + local max_zones=$1 + +ip netns exec "$ns1" nft -f /dev/stdin<<EOF +flush ruleset +table inet raw { + map rndzone { + typeof numgen inc mod $max_zones : ct zone + } + + chain output { + type filter hook output priority -64000; policy accept; + udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone + } +} +EOF +if [ "$?" -ne 0 ];then + echo "SKIP: Cannot add nftables rules" + exit $ksft_skip +fi + + ip netns exec "$ns1" sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600 + + ( + echo "add element inet raw rndzone {" + for i in $(seq 1 "$max_zones");do + echo -n "$i : $i" + if [ "$i" -lt "$max_zones" ]; then + echo "," + else + echo "}" + fi + done + ) | ip netns exec "$ns1" nft -f /dev/stdin + + local i=0 + local j=0 + local outerstart + local stop + outerstart=$(date +%s%3N) + stop=$outerstart + + while [ "$i" -lt "$max_zones" ]; do + local start + start=$(date +%s%3N) + i=$((i + 1000)) + j=$((j + 1)) + # nft rule in output places each packet in a different zone. + dd if=/dev/zero bs=8k count=1000 2>/dev/null | ip netns exec "$ns1" socat -u STDIN UDP:127.0.0.1:12345,sourceport=12345 + if [ $? -ne 0 ] ;then + ret=1 + break + fi + + stop=$(date +%s%3N) + local duration=$((stop-start)) + echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)" + done + + if [ "$have_ct_tool" -eq 1 ]; then + local count duration + count=$(ip netns exec "$ns1" conntrack -C) + duration=$((stop-outerstart)) + + if [ "$count" -ge "$max_zones" ]; then + echo "PASS: inserted $count entries from packet path in $duration ms total" + else + ip netns exec "$ns1" conntrack -S 1>&2 + echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries" + ret=1 + fi + fi + + if [ $ret -ne 0 ];then + echo "FAIL: insert $max_zones entries from packet path" 1>&2 + fi +} + +test_conntrack_tool() { + local max_zones=$1 + + ip netns exec "$ns1" conntrack -F >/dev/null 2>/dev/null + + local outerstart start stop i + outerstart=$(date +%s%3N) + start=$(date +%s%3N) + stop="$start" + i=0 + while [ "$i" -lt "$max_zones" ]; do + i=$((i + 1)) + ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \ + --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1 + if [ $? -ne 0 ];then + ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \ + --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null + echo "FAIL: conntrack -I returned an error" + ret=1 + break + fi + + if [ $((i%1000)) -eq 0 ];then + stop=$(date +%s%3N) + + local duration=$((stop-start)) + echo "PASS: added 1000 entries in $duration ms (now $i total)" + start=$stop + fi + done + + local count + local duration + count=$(ip netns exec "$ns1" conntrack -C) + duration=$((stop-outerstart)) + + if [ "$count" -eq "$max_zones" ]; then + echo "PASS: inserted $count entries via ctnetlink in $duration ms" + else + ip netns exec "$ns1" conntrack -S 1>&2 + echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)" + ret=1 + fi +} + +test_zones $zones + +if [ "$have_ct_tool" -eq 1 ];then + test_conntrack_tool $zones +else + echo "SKIP: Could not run ctnetlink insertion test without conntrack tool" + if [ $ret -eq 0 ];then + exit $ksft_skip + fi +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/packetdrill/common.sh b/tools/testing/selftests/net/netfilter/packetdrill/common.sh new file mode 100755 index 000000000000..ed36d535196d --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/common.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# for debugging set net.netfilter.nf_log_all_netns=1 in init_net +# or do not use net namespaces. +modprobe -q nf_conntrack +sysctl -q net.netfilter.nf_conntrack_log_invalid=6 + +# Flush old cached data (fastopen cookies). +ip tcp_metrics flush all > /dev/null 2>&1 + +# TCP min, default, and max receive and send buffer sizes. +sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))" +sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304" + +# TCP congestion control. +sysctl -q net.ipv4.tcp_congestion_control=cubic + +# TCP slow start after idle. +sysctl -q net.ipv4.tcp_slow_start_after_idle=0 + +# TCP Explicit Congestion Notification (ECN) +sysctl -q net.ipv4.tcp_ecn=0 + +sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 + +# Override the default qdisc on the tun device. +# Many tests fail with timing errors if the default +# is FQ and that paces their flows. +tc qdisc add dev tun0 root pfifo + +# Enable conntrack +$xtables -A INPUT -m conntrack --ctstate NEW -p tcp --syn diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt new file mode 100644 index 000000000000..d755bd64c54f --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt @@ -0,0 +1,118 @@ +// check that already-acked (retransmitted) packet is let through rather +// than tagged as INVALID. + +`packetdrill/common.sh` + +// should set -P DROP but it disconnects VM w.o. extra netns ++0 `$xtables -A INPUT -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 10) = 0 + ++0 < S 0:0(0) win 32792 <mss 1000> ++0 > S. 0:0(0) ack 1 <mss 1460> ++.01 < . 1:1(0) ack 1 win 65535 ++0 accept(3, ..., ...) = 4 + ++0.0001 < P. 1:1461(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 1461 win 65535 ++0.0001 < P. 1461:2921(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 2921 win 65535 ++0.0001 < P. 2921:4381(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 4381 win 65535 ++0.0001 < P. 4381:5841(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 5841 win 65535 ++0.0001 < P. 5841:7301(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 7301 win 65535 ++0.0001 < P. 7301:8761(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 8761 win 65535 ++0.0001 < P. 8761:10221(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 10221 win 65535 ++0.0001 < P. 10221:11681(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 11681 win 65535 ++0.0001 < P. 11681:13141(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 13141 win 65535 ++0.0001 < P. 13141:14601(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 14601 win 65535 ++0.0001 < P. 14601:16061(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 16061 win 65535 ++0.0001 < P. 16061:17521(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 17521 win 65535 ++0.0001 < P. 17521:18981(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 18981 win 65535 ++0.0001 < P. 18981:20441(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 20441 win 65535 ++0.0001 < P. 20441:21901(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 21901 win 65535 ++0.0001 < P. 21901:23361(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 23361 win 65535 ++0.0001 < P. 23361:24821(1460) ack 1 win 257 +0.055 > . 1:1(0) ack 24821 win 65535 ++0.0001 < P. 24821:26281(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 26281 win 65535 ++0.0001 < P. 26281:27741(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 27741 win 65535 ++0.0001 < P. 27741:29201(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 29201 win 65535 ++0.0001 < P. 29201:30661(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 30661 win 65535 ++0.0001 < P. 30661:32121(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 32121 win 65535 ++0.0001 < P. 32121:33581(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 33581 win 65535 ++0.0001 < P. 33581:35041(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 35041 win 65535 ++0.0001 < P. 35041:36501(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 36501 win 65535 ++0.0001 < P. 36501:37961(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 37961 win 65535 ++0.0001 < P. 37961:39421(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 39421 win 65535 ++0.0001 < P. 39421:40881(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 40881 win 65535 ++0.0001 < P. 40881:42341(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 42341 win 65535 ++0.0001 < P. 42341:43801(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 43801 win 65535 ++0.0001 < P. 43801:45261(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 45261 win 65535 ++0.0001 < P. 45261:46721(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 46721 win 65535 ++0.0001 < P. 46721:48181(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 48181 win 65535 ++0.0001 < P. 48181:49641(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 49641 win 65535 ++0.0001 < P. 49641:51101(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 51101 win 65535 ++0.0001 < P. 51101:52561(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 52561 win 65535 ++0.0001 < P. 52561:54021(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 54021 win 65535 ++0.0001 < P. 54021:55481(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 55481 win 65535 ++0.0001 < P. 55481:56941(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 56941 win 65535 ++0.0001 < P. 56941:58401(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 58401 win 65535 ++0.0001 < P. 58401:59861(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 59861 win 65535 ++0.0001 < P. 59861:61321(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 61321 win 65535 ++0.0001 < P. 61321:62781(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 62781 win 65535 ++0.0001 < P. 62781:64241(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 64241 win 65535 ++0.0001 < P. 64241:65701(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 65701 win 65535 ++0.0001 < P. 65701:67161(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 67161 win 65535 + +// nf_ct_proto_6: SEQ is under the lower bound (already ACKed data retransmitted) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.24.72 LEN=1500 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=34375 DPT=8080 SEQ=1 ACK=4162510439 WINDOW=257 RES=0x00 ACK PSH URGP=0 ++0.0001 < P. 1:1461(1460) ack 1 win 257 + +// only sent if above packet isn't flagged as invalid ++.0 > . 1:1(0) ack 67161 win 65535 + ++0 `$xtables -D INPUT -m conntrack --ctstate INVALID -j DROP` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt new file mode 100644 index 000000000000..dccdd4c009c6 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt @@ -0,0 +1,62 @@ +// check RST packet that doesn't exactly match expected next sequence +// number still transitions conntrack state to CLOSE iff its already in +// FIN/CLOSE_WAIT. + +`packetdrill/common.sh` + +// 5.771921 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture] +// 5.771994 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture] +// 5.772212 client_ip > server_ip TCP 66 45020 > 443 [ACK] Seq=1905874048 Ack=781810658 Win=36352 Len=0 TSval=3317842872 TSecr=675936334 +// 5.787924 server_ip > client_ip TLSv1.2 1300 [Packet size limited during capture] +// 5.788126 server_ip > client_ip TLSv1.2 90 Application Data +// 5.788207 server_ip > client_ip TCP 66 443 > 45020 [FIN, ACK] Seq=781811916 Ack=1905874048 Win=31104 Len=0 TSval=675936350 TSecr=3317842872 +// 5.788447 client_ip > server_ip TLSv1.2 90 Application Data +// 5.788479 client_ip > server_ip TCP 66 45020 > 443 [RST, ACK] Seq=1905874072 Ack=781811917 Win=39040 Len=0 TSval=3317842889 TSecr=675936350 +// 5.788581 server_ip > client_ip TCP 54 8443 > 45020 [RST] Seq=781811892 Win=0 Len=0 + ++0 `iptables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `iptables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + ++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460> + ++0 > . 1:1(0) ack 1 win 65535 ++0 < . 1:1001(1000) ack 1 win 65535 ++0 < . 1001:2001(1000) ack 1 win 65535 ++0 < . 2001:3001(1000) ack 1 win 65535 + ++0 > . 1:1(0) ack 1001 win 65535 ++0 > . 1:1(0) ack 2001 win 65535 ++0 > . 1:1(0) ack 3001 win 65535 + ++0 write(3, ..., 1000) = 1000 + ++0.0 > P. 1:1001(1000) ack 3001 win 65535 + ++0.1 read(3, ..., 1000) = 1000 + +// Conntrack should move to FIN_WAIT, then CLOSE_WAIT. ++0 < F. 3001:3001(0) ack 1001 win 65535 ++0 > . 1001:1001(0) ack 3002 win 65535 + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE_WAIT` + ++1 close(3) = 0 +// RST: unread data. FIN was seen, hence ack + 1 ++0 > R. 1001:1001(0) ack 3002 win 65535 +// ... and then, CLOSE. ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ ` + +// Spurious RST from peer -- no sk state. Should NOT get +// marked INVALID, because conntrack is already closing. ++0.1 < R 2001:2001(0) win 0 + +// No packets should have been marked INVALID ++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"` ++0 `iptables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt new file mode 100644 index 000000000000..686f18a3d9ef --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt @@ -0,0 +1,59 @@ +// check that out of window resets are marked as INVALID and conntrack remains +// in ESTABLISHED state. + +`packetdrill/common.sh` + ++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + ++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460> + ++0 > . 1:1(0) ack 1 win 65535 ++0 < . 1:1001(1000) ack 1 win 65535 ++0 < . 1001:2001(1000) ack 1 win 65535 ++0 < . 2001:3001(1000) ack 1 win 65535 + ++0 > . 1:1(0) ack 1001 win 65535 ++0 > . 1:1(0) ack 2001 win 65535 ++0 > . 1:1(0) ack 3001 win 65535 + ++0 write(3, ..., 1000) = 1000 + +// out of window ++0.0 < R 0:0(0) win 0 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + +// out of window ++0.0 < R 1000000:1000000(0) win 0 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + +// in-window but not exact match ++0.0 < R 42:42(0) win 0 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + ++0.0 > P. 1:1001(1000) ack 3001 win 65535 + ++0.1 read(3, ..., 1000) = 1000 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + ++0 < . 3001:3001(0) ack 1001 win 65535 + ++0.0 < R. 3000:3000(0) ack 1001 win 0 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + +// exact next sequence ++0.0 < R. 3001:3001(0) ack 1001 win 0 +// Conntrack should move to CLOSE + +// Expect four invalid RSTs ++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 4 "` ++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"` + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ ` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt new file mode 100644 index 000000000000..3442cd29bc93 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt @@ -0,0 +1,44 @@ +// Check connection re-use, i.e. peer that receives the SYN answers with +// a challenge-ACK. +// Check that conntrack lets all packets pass, including the challenge ack, +// and that a new connection is established. + +`packetdrill/common.sh` + +// S > +// . < (challnge-ack) +// R. > +// S > +// S. < +// Expected outcome: established connection. + ++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) +0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + +// Challenge ACK, old incarnation. +0.1 < . 145824453:145824453(0) ack 643160523 win 240 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0> + ++0.01 > R 643160523:643160523(0) win 0 + ++0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` + +// Must go through. ++0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + +// correct synack ++0.1 < S. 0:0(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0> + +// 3whs completes. ++0.01 > . 1:1(0) ack 1 win 256 <nop,nop,TS val 1 ecr 1> + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED` + +// No packets should have been marked INVALID ++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"` ++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt new file mode 100644 index 000000000000..3047160c4bf3 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt @@ -0,0 +1,51 @@ +// Check conntrack copes with syn/ack reply for a previous, old incarnation. + +// tcpdump with buggy sequence +// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083107423 ecr 0,nop,wscale 7], length 0 +// OLD synack, for old/previous S +// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 145824453, ack 643160523, win 65535, options [mss 8952,nop,wscale 5,TS val 3215437785 ecr 2082921663,nop,nop], length 0 +// This reset never makes it to the endpoint, elided in the packetdrill script +// 10.192.171.30.2049 > 10.176.25.8.829: Flags [R.], seq 1, ack 1, win 65535, options [mss 8952,nop,wscale 5,TS val 3215443451 ecr 2082921663,nop,nop], length 0 +// Syn retransmit, no change +// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083115583 ecr 0,nop,wscale 7], length 0 +// CORRECT synack, should be accepted, but conntrack classified this as INVALID: +// SEQ is over the upper bound (over the window of the receiver) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.37.78 LEN=40 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=8080 DPT=34500 SEQ=162602411 ACK=2124350315 .. +// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 162602410, ack 2375731742, win 65535, options [mss 8952,nop,wscale 5,TS val 3215445754 ecr 2083115583,nop,nop], length 0 + +`packetdrill/common.sh` + ++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) +0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + +// bogus/outdated synack, invalid ack value +0.1 < S. 145824453:145824453(0) ack 643160523 win 240 <mss 1440,nop,nop,TS val 1 ecr 1,nop,wscale 0> + +// syn retransmitted +1.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8> ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` + +// correct synack ++0 < S. 145758918:145758918(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0> ++0 write(3, ..., 1) = 1 + +// with buggy conntrack above packet is dropped, so SYN rtx is seen: +// script packet: 1.054007 . 1:1(0) ack 16777958 win 256 <nop,nop,TS val 1033 ecr 1> +// actual packet: 3.010000 S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8> ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED` + ++0 > P. 1:2(1) ack 4294901762 win 256 <nop,nop,TS val 1067 ecr 1> + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ASSURED | grep -q ESTABLISHED` + +// No packets should have been marked INVALID in OUTPUT direction, 1 in INPUT ++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"` ++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 1 "` + ++0 `$xtables -D INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `$xtables -D OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt new file mode 100644 index 000000000000..842242f8ccf7 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt @@ -0,0 +1,34 @@ +// Check reception of another SYN while we have an established conntrack state. +// Challenge ACK is supposed to pass through, RST reply should clear conntrack +// state and SYN retransmit should give us new 'SYN_RECV' connection state. + +`packetdrill/common.sh` + +// should show a match if bug is present: ++0 `iptables -A INPUT -m conntrack --ctstate INVALID -p tcp --tcp-flags SYN,ACK SYN,ACK` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 10) = 0 + ++0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7, TS val 1 ecr 0,nop,nop> ++0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,TS val 100 ecr 1,nop,wscale 8> ++.01 < . 1:1(0) ack 1 win 257 <TS val 1 ecr 100,nop,nop> ++0 accept(3, ..., ...) = 4 + ++0 < P. 1:101(100) ack 1 win 257 <TS val 2 ecr 100,nop,nop> ++.001 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 110 ecr 2> ++0 read(4, ..., 101) = 100 + +1.0 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 7, TS val 233 ecr 0,nop,nop> +// Won't expect this: challenge ack. + ++0 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 112 ecr 2> ++0 < R. 101:101(0) ack 1 win 257 ++0 close(4) = 0 + +1.5 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 0, TS val 233 ecr 0,nop,nop> + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep -q SYN_RECV` ++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"` diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh new file mode 100755 index 000000000000..24ad41d526d9 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/rpath.sh @@ -0,0 +1,181 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +# search for legacy iptables (it uses the xtables extensions +if iptables-legacy --version >/dev/null 2>&1; then + iptables='iptables-legacy' +elif iptables --version >/dev/null 2>&1; then + iptables='iptables' +else + iptables='' +fi + +if ip6tables-legacy --version >/dev/null 2>&1; then + ip6tables='ip6tables-legacy' +elif ip6tables --version >/dev/null 2>&1; then + ip6tables='ip6tables' +else + ip6tables='' +fi + +if nft --version >/dev/null 2>&1; then + nft='nft' +else + nft='' +fi + +if [ -z "$iptables$ip6tables$nft" ]; then + echo "SKIP: Test needs iptables, ip6tables or nft" + exit $ksft_skip +fi + +trap cleanup_all_ns EXIT + +# create two netns, keep IPv6 address when moving into VRF +setup_ns ns1 ns2 +ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1 + +# a standard connection between the netns, should not trigger rp filter +ip -net "$ns1" link add v0 type veth peer name v0 netns "$ns2" +ip -net "$ns1" link set v0 up; ip -net "$ns2" link set v0 up +ip -net "$ns1" a a 192.168.23.2/24 dev v0 +ip -net "$ns2" a a 192.168.23.1/24 dev v0 +ip -net "$ns1" a a fec0:23::2/64 dev v0 nodad +ip -net "$ns2" a a fec0:23::1/64 dev v0 nodad + +# rp filter testing: ns1 sends packets via v0 which ns2 would route back via d0 +ip -net "$ns2" link add d0 type dummy +ip -net "$ns2" link set d0 up +ip -net "$ns1" a a 192.168.42.2/24 dev v0 +ip -net "$ns2" a a 192.168.42.1/24 dev d0 +ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad +ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad + +# avoid neighbor lookups and enable martian IPv6 pings +ns2_hwaddr=$(ip -net "$ns2" link show dev v0 | \ + sed -n 's, *link/ether \([^ ]*\) .*,\1,p') +ns1_hwaddr=$(ip -net "$ns1" link show dev v0 | \ + sed -n 's, *link/ether \([^ ]*\) .*,\1,p') +ip -net "$ns1" neigh add fec0:42::1 lladdr "$ns2_hwaddr" nud permanent dev v0 +ip -net "$ns1" neigh add fec0:23::1 lladdr "$ns2_hwaddr" nud permanent dev v0 +ip -net "$ns2" neigh add fec0:42::2 lladdr "$ns1_hwaddr" nud permanent dev d0 +ip -net "$ns2" neigh add fec0:23::2 lladdr "$ns1_hwaddr" nud permanent dev v0 + +# firewall matches to test +[ -n "$iptables" ] && { + common='-t raw -A PREROUTING -s 192.168.0.0/16' + common+=' -p icmp --icmp-type echo-request' + if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then + echo "Cannot add rpfilter rule" + exit $ksft_skip + fi + ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert +} +[ -n "$ip6tables" ] && { + common='-t raw -A PREROUTING -s fec0::/16' + common+=' -p icmpv6 --icmpv6-type echo-request' + if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then + echo "Cannot add rpfilter rule" + exit $ksft_skip + fi + ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert +} +[ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF +table inet t { + chain c { + type filter hook prerouting priority raw; + ip saddr 192.168.0.0/16 icmp type echo-request \ + fib saddr . iif oif exists counter + ip6 saddr fec0::/16 icmpv6 type echo-request \ + fib saddr . iif oif exists counter + } +} +EOF + +die() { + echo "FAIL: $*" + #ip netns exec "$ns2" "$iptables" -t raw -vS + #ip netns exec "$ns2" "$ip6tables" -t raw -vS + #ip netns exec "$ns2" nft list ruleset + exit 1 +} + +# check rule counters, return true if rule did not match +ipt_zero_rule() { # (command) + [ -n "$1" ] || return 0 + ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0" +} +ipt_zero_reverse_rule() { # (command) + [ -n "$1" ] || return 0 + ip netns exec "$ns2" "$1" -t raw -vS | \ + grep -q -- "-m rpfilter --invert -c 0 0" +} +nft_zero_rule() { # (family) + [ -n "$nft" ] || return 0 + ip netns exec "$ns2" "$nft" list chain inet t c | \ + grep -q "$1 saddr .* counter packets 0 bytes 0" +} + +netns_ping() { # (netns, args...) + local netns="$1" + shift + ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null +} + +clear_counters() { + [ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z + [ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z + if [ -n "$nft" ]; then + ( + echo "delete table inet t"; + ip netns exec "$ns2" $nft -s list table inet t; + ) | ip netns exec "$ns2" $nft -f - + fi +} + +testrun() { + clear_counters + + # test 1: martian traffic should fail rpfilter matches + netns_ping "$ns1" -I v0 192.168.42.1 && \ + die "martian ping 192.168.42.1 succeeded" + netns_ping "$ns1" -I v0 fec0:42::1 && \ + die "martian ping fec0:42::1 succeeded" + + ipt_zero_rule "$iptables" || die "iptables matched martian" + ipt_zero_rule "$ip6tables" || die "ip6tables matched martian" + ipt_zero_reverse_rule "$iptables" && die "iptables not matched martian" + ipt_zero_reverse_rule "$ip6tables" && die "ip6tables not matched martian" + nft_zero_rule ip || die "nft IPv4 matched martian" + nft_zero_rule ip6 || die "nft IPv6 matched martian" + + clear_counters + + # test 2: rpfilter match should pass for regular traffic + netns_ping "$ns1" 192.168.23.1 || \ + die "regular ping 192.168.23.1 failed" + netns_ping "$ns1" fec0:23::1 || \ + die "regular ping fec0:23::1 failed" + + ipt_zero_rule "$iptables" && die "iptables match not effective" + ipt_zero_rule "$ip6tables" && die "ip6tables match not effective" + ipt_zero_reverse_rule "$iptables" || die "iptables match over-effective" + ipt_zero_reverse_rule "$ip6tables" || die "ip6tables match over-effective" + nft_zero_rule ip && die "nft IPv4 match not effective" + nft_zero_rule ip6 && die "nft IPv6 match not effective" + +} + +testrun + +# repeat test with vrf device in $ns2 +ip -net "$ns2" link add vrf0 type vrf table 10 +ip -net "$ns2" link set vrf0 up +ip -net "$ns2" link set v0 master vrf0 + +testrun + +echo "PASS: netfilter reverse path match works as intended" +exit 0 diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c new file mode 100644 index 000000000000..21bb1cfd8a85 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/sctp_collision.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> + +int main(int argc, char *argv[]) +{ + struct sockaddr_in saddr = {}, daddr = {}; + int sd, ret, len = sizeof(daddr); + struct timeval tv = {25, 0}; + char buf[] = "hello"; + + if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { + printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n", + argv[0]); + return -1; + } + + sd = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP); + if (sd < 0) { + printf("Failed to create sd\n"); + return -1; + } + + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = inet_addr(argv[2]); + saddr.sin_port = htons(atoi(argv[3])); + + ret = bind(sd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (ret < 0) { + printf("Failed to bind to address\n"); + goto out; + } + + ret = listen(sd, 5); + if (ret < 0) { + printf("Failed to listen on port\n"); + goto out; + } + + daddr.sin_family = AF_INET; + daddr.sin_addr.s_addr = inet_addr(argv[4]); + daddr.sin_port = htons(atoi(argv[5])); + + /* make test shorter than 25s */ + ret = setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + if (ret < 0) { + printf("Failed to setsockopt SO_RCVTIMEO\n"); + goto out; + } + + if (!strcmp(argv[1], "server")) { + sleep(1); /* wait a bit for client's INIT */ + ret = connect(sd, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to connect to peer\n"); + goto out; + } + ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); + if (ret < 0) { + printf("Failed to recv msg %d\n", ret); + goto out; + } + ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to send msg %d\n", ret); + goto out; + } + printf("Server: sent! %d\n", ret); + } + + if (!strcmp(argv[1], "client")) { + usleep(300000); /* wait a bit for server's listening */ + ret = connect(sd, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to connect to peer\n"); + goto out; + } + sleep(1); /* wait a bit for server's delayed INIT_ACK to reproduce the issue */ + ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to send msg %d\n", ret); + goto out; + } + ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); + if (ret < 0) { + printf("Failed to recv msg %d\n", ret); + goto out; + } + printf("Client: rcvd! %d\n", ret); + } + ret = 0; +out: + close(sd); + return ret; +} diff --git a/tools/testing/selftests/net/netfilter/settings b/tools/testing/selftests/net/netfilter/settings new file mode 100644 index 000000000000..abc5648b59ab --- /dev/null +++ b/tools/testing/selftests/net/netfilter/settings @@ -0,0 +1 @@ +timeout=1800 diff --git a/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh b/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh new file mode 100755 index 000000000000..912cb9583af1 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +if ! modprobe -q -n br_netfilter 2>&1; then + echo "SKIP: Test needs br_netfilter kernel module" + exit $ksft_skip +fi + +cleanup() +{ + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns host vtep router + +create_topology() +{ + ip link add host-eth0 netns "$host" type veth peer name vtep-host netns "$vtep" + ip link add vtep-router netns "$vtep" type veth peer name router-vtep netns "$router" +} + +setup_host() +{ + # bring ports up + ip -n "$host" addr add 10.0.0.1/24 dev host-eth0 + ip -n "$host" link set host-eth0 up + + # Add VLAN 10,20 + for vid in 10 20; do + ip -n "$host" link add link host-eth0 name host-eth0.$vid type vlan id $vid + ip -n "$host" addr add 10.0.$vid.1/24 dev host-eth0.$vid + ip -n "$host" link set host-eth0.$vid up + done +} + +setup_vtep() +{ + # create bridge on vtep + ip -n "$vtep" link add name br0 type bridge + ip -n "$vtep" link set br0 type bridge vlan_filtering 1 + + # VLAN 10 is untagged PVID + ip -n "$vtep" link set dev vtep-host master br0 + bridge -n "$vtep" vlan add dev vtep-host vid 10 pvid untagged + + # VLAN 20 as other VID + ip -n "$vtep" link set dev vtep-host master br0 + bridge -n "$vtep" vlan add dev vtep-host vid 20 + + # single-vxlan device on vtep + ip -n "$vtep" address add dev vtep-router 60.0.0.1/24 + ip -n "$vtep" link add dev vxd type vxlan external \ + vnifilter local 60.0.0.1 remote 60.0.0.2 dstport 4789 ttl 64 + ip -n "$vtep" link set vxd master br0 + + # Add VLAN-VNI 1-1 mappings + bridge -n "$vtep" link set dev vxd vlan_tunnel on + for vid in 10 20; do + bridge -n "$vtep" vlan add dev vxd vid $vid + bridge -n "$vtep" vlan add dev vxd vid $vid tunnel_info id $vid + bridge -n "$vtep" vni add dev vxd vni $vid + done + + # bring ports up + ip -n "$vtep" link set vxd up + ip -n "$vtep" link set vtep-router up + ip -n "$vtep" link set vtep-host up + ip -n "$vtep" link set dev br0 up +} + +setup_router() +{ + # bring ports up + ip -n "$router" link set router-vtep up +} + +setup() +{ + modprobe -q br_netfilter + create_topology + setup_host + setup_vtep + setup_router +} + +test_large_mtu_untagged_traffic() +{ + ip -n "$vtep" link set vxd mtu 1000 + ip -n "$host" neigh add 10.0.0.2 lladdr ca:fe:ba:be:00:01 dev host-eth0 + ip netns exec "$host" \ + ping -q 10.0.0.2 -I host-eth0 -c 1 -W 0.5 -s2000 > /dev/null 2>&1 + return 0 +} + +test_large_mtu_tagged_traffic() +{ + for vid in 10 20; do + ip -n "$vtep" link set vxd mtu 1000 + ip -n "$host" neigh add 10.0.$vid.2 lladdr ca:fe:ba:be:00:01 dev host-eth0.$vid + ip netns exec "$host" \ + ping -q 10.0.$vid.2 -I host-eth0.$vid -c 1 -W 0.5 -s2000 > /dev/null 2>&1 + done + return 0 +} + +do_test() +{ + # Frames will be dropped so ping will not succeed + # If it doesn't panic, it passes + test_large_mtu_tagged_traffic + test_large_mtu_untagged_traffic +} + +setup && \ +echo "Test for VxLAN fragmentation with large MTU in br_netfilter:" && \ +do_test && echo "PASS!" +exit $? diff --git a/tools/testing/selftests/net/netfilter/xt_string.sh b/tools/testing/selftests/net/netfilter/xt_string.sh new file mode 100755 index 000000000000..8d401c69e317 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/xt_string.sh @@ -0,0 +1,133 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# return code to signal skipped test +ksft_skip=4 +rc=0 + +source lib.sh + +checktool "socat -h" "run test without socat" +checktool "iptables --version" "test needs iptables" + +infile=$(mktemp) + +cleanup() +{ + ip netns del "$netns" + rm -f "$infile" +} + +trap cleanup EXIT + +setup_ns netns + +ip -net "$netns" link add d0 type dummy +ip -net "$netns" link set d0 up +ip -net "$netns" addr add 10.1.2.1/24 dev d0 + +pattern="foo bar baz" +patlen=11 +hdrlen=$((20 + 8)) # IPv4 + UDP + +#ip netns exec "$netns" tcpdump -npXi d0 & +#tcpdump_pid=$! +#trap 'kill $tcpdump_pid; ip netns del $netns' EXIT + +add_rule() { # (alg, from, to) + ip netns exec "$netns" \ + iptables -A OUTPUT -o d0 -m string \ + --string "$pattern" --algo "$1" --from "$2" --to "$3" +} +showrules() { # () + ip netns exec "$netns" iptables -v -S OUTPUT | grep '^-A' +} +zerorules() { + ip netns exec "$netns" iptables -Z OUTPUT +} +countrule() { # (pattern) + showrules | grep -c -- "$*" +} +send() { # (offset) + ( for ((i = 0; i < $1 - hdrlen; i++)); do + echo -n " " + done + echo -n "$pattern" + ) > "$infile" + + ip netns exec "$netns" socat -t 1 -u STDIN UDP-SENDTO:10.1.2.2:27374 < "$infile" +} + +add_rule bm 1000 1500 +add_rule bm 1400 1600 +add_rule kmp 1000 1500 +add_rule kmp 1400 1600 + +zerorules +send 0 +send $((1000 - patlen)) +if [ "$(countrule -c 0 0)" -ne 4 ]; then + echo "FAIL: rules match data before --from" + showrules + ((rc--)) +fi + +zerorules +send 1000 +send $((1400 - patlen)) +if [ "$(countrule -c 2)" -ne 2 ]; then + echo "FAIL: only two rules should match at low offset" + showrules + ((rc--)) +fi + +zerorules +send $((1500 - patlen)) +if [ "$(countrule -c 1)" -ne 4 ]; then + echo "FAIL: all rules should match at end of packet" + showrules + ((rc--)) +fi + +zerorules +send 1495 +if [ "$(countrule -c 1)" -ne 1 ]; then + echo "FAIL: only kmp with proper --to should match pattern spanning fragments" + showrules + ((rc--)) +fi + +zerorules +send 1500 +if [ "$(countrule -c 1)" -ne 2 ]; then + echo "FAIL: two rules should match pattern at start of second fragment" + showrules + ((rc--)) +fi + +zerorules +send $((1600 - patlen)) +if [ "$(countrule -c 1)" -ne 2 ]; then + echo "FAIL: two rules should match pattern at end of largest --to" + showrules + ((rc--)) +fi + +zerorules +send $((1600 - patlen + 1)) +if [ "$(countrule -c 1)" -ne 0 ]; then + echo "FAIL: no rules should match pattern extending largest --to" + showrules + ((rc--)) +fi + +zerorules +send 1600 +if [ "$(countrule -c 1)" -ne 0 ]; then + echo "FAIL: no rule should match pattern past largest --to" + showrules + ((rc--)) +fi + +[ $rc -eq 0 ] && echo "PASS: string match tests" +exit $rc diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c new file mode 100644 index 000000000000..07423f256f96 --- /dev/null +++ b/tools/testing/selftests/net/netlink-dumps.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <linux/genetlink.h> +#include <linux/neighbour.h> +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <linux/mqueue.h> +#include <linux/rtnetlink.h> + +#include "../kselftest_harness.h" + +#include <ynl.h> + +struct ext_ack { + int err; + + __u32 attr_offs; + __u32 miss_type; + __u32 miss_nest; + const char *str; +}; + +/* 0: no done, 1: done found, 2: extack found, -1: error */ +static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) +{ + const struct nlmsghdr *nlh; + const struct nlattr *attr; + ssize_t rem; + + for (rem = n; rem > 0; NLMSG_NEXT(nlh, rem)) { + nlh = (struct nlmsghdr *)&buf[n - rem]; + if (!NLMSG_OK(nlh, rem)) + return -1; + + if (nlh->nlmsg_type != NLMSG_DONE) + continue; + + ea->err = -*(int *)NLMSG_DATA(nlh); + + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return 1; + + ynl_attr_for_each(attr, nlh, sizeof(int)) { + switch (ynl_attr_type(attr)) { + case NLMSGERR_ATTR_OFFS: + ea->attr_offs = ynl_attr_get_u32(attr); + break; + case NLMSGERR_ATTR_MISS_TYPE: + ea->miss_type = ynl_attr_get_u32(attr); + break; + case NLMSGERR_ATTR_MISS_NEST: + ea->miss_nest = ynl_attr_get_u32(attr); + break; + case NLMSGERR_ATTR_MSG: + ea->str = ynl_attr_get_str(attr); + break; + } + } + + return 2; + } + + return 0; +} + +static const struct { + struct nlmsghdr nlhdr; + struct ndmsg ndm; + struct nlattr ahdr; + __u32 val; +} dump_neigh_bad = { + .nlhdr = { + .nlmsg_len = sizeof(dump_neigh_bad), + .nlmsg_type = RTM_GETNEIGH, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, + .nlmsg_seq = 1, + }, + .ndm = { + .ndm_family = 123, + }, + .ahdr = { + .nla_len = 4 + 4, + .nla_type = NDA_FLAGS_EXT, + }, + .val = -1, // should fail MASK validation +}; + +TEST(dump_extack) +{ + int netlink_sock; + char buf[8192]; + int one = 1; + int i, cnt; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + ASSERT_GE(netlink_sock, 0); + + n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_CAP_ACK, + &one, sizeof(one)); + ASSERT_EQ(n, 0); + n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)); + ASSERT_EQ(n, 0); + n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_GET_STRICT_CHK, + &one, sizeof(one)); + ASSERT_EQ(n, 0); + + /* Dump so many times we fill up the buffer */ + cnt = 64; + for (i = 0; i < cnt; i++) { + n = send(netlink_sock, &dump_neigh_bad, + sizeof(dump_neigh_bad), 0); + ASSERT_EQ(n, sizeof(dump_neigh_bad)); + } + + /* Read out the ENOBUFS */ + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + EXPECT_EQ(n, -1); + EXPECT_EQ(errno, ENOBUFS); + + for (i = 0; i < cnt; i++) { + struct ext_ack ea = {}; + + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + if (n < 0) { + ASSERT_GE(i, 10); + break; + } + ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); + + EXPECT_EQ(nl_get_extack(buf, n, &ea), 2); + EXPECT_EQ(ea.attr_offs, + sizeof(struct nlmsghdr) + sizeof(struct ndmsg)); + } +} + +static const struct { + struct nlmsghdr nlhdr; + struct genlmsghdr genlhdr; + struct nlattr ahdr; + __u16 val; + __u16 pad; +} dump_policies = { + .nlhdr = { + .nlmsg_len = sizeof(dump_policies), + .nlmsg_type = GENL_ID_CTRL, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, + .nlmsg_seq = 1, + }, + .genlhdr = { + .cmd = CTRL_CMD_GETPOLICY, + .version = 2, + }, + .ahdr = { + .nla_len = 6, + .nla_type = CTRL_ATTR_FAMILY_ID, + }, + .val = GENL_ID_CTRL, + .pad = 0, +}; + +// Sanity check for the test itself, make sure the dump doesn't fit in one msg +TEST(test_sanity) +{ + int netlink_sock; + char buf[8192]; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ASSERT_GE(netlink_sock, 0); + + n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0); + ASSERT_EQ(n, sizeof(dump_policies)); + + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); + + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); + + close(netlink_sock); +} + +TEST(close_in_progress) +{ + int netlink_sock; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ASSERT_GE(netlink_sock, 0); + + n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0); + ASSERT_EQ(n, sizeof(dump_policies)); + + close(netlink_sock); +} + +TEST(close_with_ref) +{ + char cookie[NOTIFY_COOKIE_LEN] = {}; + int netlink_sock, mq_fd; + struct sigevent sigev; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ASSERT_GE(netlink_sock, 0); + + n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0); + ASSERT_EQ(n, sizeof(dump_policies)); + + mq_fd = syscall(__NR_mq_open, "sed", O_CREAT | O_WRONLY, 0600, 0); + ASSERT_GE(mq_fd, 0); + + memset(&sigev, 0, sizeof(sigev)); + sigev.sigev_notify = SIGEV_THREAD; + sigev.sigev_value.sival_ptr = cookie; + sigev.sigev_signo = netlink_sock; + + syscall(__NR_mq_notify, mq_fd, &sigev); + + close(netlink_sock); + + // give mqueue time to fire + usleep(100 * 1000); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh index 6974474c26f3..38871bdef67f 100755 --- a/tools/testing/selftests/net/netns-name.sh +++ b/tools/testing/selftests/net/netns-name.sh @@ -7,10 +7,12 @@ set -o pipefail DEV=dummy-dev0 DEV2=dummy-dev1 ALT_NAME=some-alt-name +NSIM_ADDR=2025 RET_CODE=0 cleanup() { + cleanup_netdevsim $NSIM_ADDR cleanup_ns $NS $test_ns } @@ -25,12 +27,15 @@ setup_ns NS test_ns # # Test basic move without a rename +# Use netdevsim because it has extra asserts for notifiers. # -ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns $test_ns || + +nsim=$(create_netdevsim $NSIM_ADDR $NS) +ip -netns $NS link set dev $nsim netns $test_ns || fail "Can't perform a netns move" -ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" -ip -netns $test_ns link del $DEV || fail +ip -netns $test_ns link show dev $nsim >> /dev/null || + fail "Device not found after move" +cleanup_netdevsim $NSIM_ADDR # # Test move with a conflict @@ -78,6 +83,16 @@ ip -netns $NS link show dev $ALT_NAME 2> /dev/null && fail "Can still find alt-name after move" ip -netns $test_ns link del $DEV || fail +# +# Test no conflict of the same name/ifindex in different netns +# +ip -netns $NS link add name $DEV index 100 type dummy || fail +ip -netns $NS link add netns $test_ns name $DEV index 100 type dummy || + fail "Can create in netns without moving" +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found" +ip -netns $NS link del $DEV || fail +ip -netns $test_ns link del $DEV || fail + echo -ne "$(basename $0) \t\t\t\t" if [ $RET_CODE -eq 0 ]; then echo "[ OK ]" diff --git a/tools/testing/selftests/net/netns-sysctl.sh b/tools/testing/selftests/net/netns-sysctl.sh new file mode 100755 index 000000000000..45c34a3b9aae --- /dev/null +++ b/tools/testing/selftests/net/netns-sysctl.sh @@ -0,0 +1,40 @@ +#!/bin/bash -e +# SPDX-License-Identifier: GPL-2.0 +# +# This test checks that the network buffer sysctls are present +# in a network namespaces, and that they are readonly. + +source lib.sh + +cleanup() { + cleanup_ns $test_ns +} + +trap cleanup EXIT + +fail() { + echo "ERROR: $*" >&2 + exit 1 +} + +setup_ns test_ns + +for sc in {r,w}mem_{default,max}; do + # check that this is writable in a netns + [ -w "/proc/sys/net/core/$sc" ] || + fail "$sc isn't writable in the init netns!" + + # change the value in the host netns + sysctl -qw "net.core.$sc=300000" || + fail "Can't write $sc in init netns!" + + # check that the value is read from the init netns + [ "$(ip netns exec $test_ns sysctl -n "net.core.$sc")" -eq 300000 ] || + fail "Value for $sc mismatch!" + + # check that this isn't writable in a netns + ip netns exec $test_ns [ -w "/proc/sys/net/core/$sc" ] && + fail "$sc is writable in a netns!" +done + +echo 'Test passed OK' diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py new file mode 100755 index 000000000000..beaee5e4e2aa --- /dev/null +++ b/tools/testing/selftests/net/nl_netdev.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import time +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_eq, ksft_ge, ksft_busy_wait +from lib.py import NetdevFamily, NetdevSimDev, ip + + +def empty_check(nf) -> None: + devs = nf.dev_get({}, dump=True) + ksft_ge(len(devs), 1) + + +def lo_check(nf) -> None: + lo_info = nf.dev_get({"ifindex": 1}) + ksft_eq(len(lo_info['xdp-features']), 0) + ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0) + + +def napi_list_check(nf) -> None: + with NetdevSimDev(queue_count=100) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 100) + + for q in [50, 0, 99]: + for i in range(4): + nsim.dfs_write("queue_reset", f"{q} {i}") + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 100, + comment=f"queue count after reset queue {q} mode {i}") + + +def nsim_rxq_reset_down(nf) -> None: + """ + Test that the queue API supports resetting a queue + while the interface is down. We should convert this + test to testing real HW once more devices support + queue API. + """ + with NetdevSimDev(queue_count=4) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} down") + for i in [0, 2, 3]: + nsim.dfs_write("queue_reset", f"1 {i}") + + +def page_pool_check(nf) -> None: + with NetdevSimDev() as nsimdev: + nsim = nsimdev.nsims[0] + + def up(): + ip(f"link set dev {nsim.ifname} up") + + def down(): + ip(f"link set dev {nsim.ifname} down") + + def get_pp(): + pp_list = nf.page_pool_get({}, dump=True) + return [pp for pp in pp_list if pp.get("ifindex") == nsim.ifindex] + + # No page pools when down + down() + ksft_eq(len(get_pp()), 0) + + # Up, empty page pool appears + up() + pp_list = get_pp() + ksft_ge(len(pp_list), 0) + refs = sum([pp["inflight"] for pp in pp_list]) + ksft_eq(refs, 0) + + # Down, it disappears, again + down() + pp_list = get_pp() + ksft_eq(len(pp_list), 0) + + # Up, allocate a page + up() + nsim.dfs_write("pp_hold", "y") + pp_list = nf.page_pool_get({}, dump=True) + refs = sum([pp["inflight"] for pp in pp_list if pp.get("ifindex") == nsim.ifindex]) + ksft_ge(refs, 1) + + # Now let's leak a page + down() + pp_list = get_pp() + ksft_eq(len(pp_list), 1) + refs = sum([pp["inflight"] for pp in pp_list]) + ksft_eq(refs, 1) + attached = [pp for pp in pp_list if "detach-time" not in pp] + ksft_eq(len(attached), 0) + + # New pp can get created, and we'll have two + up() + pp_list = get_pp() + attached = [pp for pp in pp_list if "detach-time" not in pp] + detached = [pp for pp in pp_list if "detach-time" in pp] + ksft_eq(len(attached), 1) + ksft_eq(len(detached), 1) + + # Free the old page and the old pp is gone + nsim.dfs_write("pp_hold", "n") + # Freeing check is once a second so we may need to retry + ksft_busy_wait(lambda: len(get_pp()) == 1, deadline=2) + + # And down... + down() + ksft_eq(len(get_pp()), 0) + + # Last, leave the page hanging for destroy, nothing to check + # we're trying to exercise the orphaning path in the kernel + up() + nsim.dfs_write("pp_hold", "y") + + +def main() -> None: + nf = NetdevFamily() + ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, + nsim_rxq_reset_down], + args=(nf, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile index 2f1508abc826..3fd1da2ec07d 100644 --- a/tools/testing/selftests/net/openvswitch/Makefile +++ b/tools/testing/selftests/net/openvswitch/Makefile @@ -2,7 +2,7 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := openvswitch.sh diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 5cae53543849..3c8d3455d8e7 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # OVS kernel module self tests @@ -11,6 +11,11 @@ ksft_skip=4 PAUSE_ON_FAIL=no VERBOSE=0 TRACING=0 +WAIT_TIMEOUT=5 + +if test "X$KSFT_MACHINE_SLOW" == "Xyes"; then + WAIT_TIMEOUT=10 +fi tests=" arp_ping eth-arp: Basic arp ping between two NS @@ -20,10 +25,37 @@ tests=" nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT netlink_checks ovsnl: validate netlink attrs and settings upcall_interfaces ovs: test the upcall interfaces - drop_reason drop: test drop reasons are emitted" + drop_reason drop: test drop reasons are emitted + psample psample: Sampling packets with psample" info() { - [ $VERBOSE = 0 ] || echo $* + [ "${ovs_dir}" != "" ] && + echo "`date +"[%m-%d %H:%M:%S]"` $*" >> ${ovs_dir}/debug.log + [ $VERBOSE = 0 ] || echo $* +} + +ovs_wait() { + info "waiting $WAIT_TIMEOUT s for: $@" + + if "$@" ; then + info "wait succeeded immediately" + return 0 + fi + + # A quick re-check helps speed up small races in fast systems. + # However, fractional sleeps might not necessarily work. + local start=0 + sleep 0.1 || { sleep 1; start=1; } + + for (( i=start; i<WAIT_TIMEOUT; i++ )); do + if "$@" ; then + info "wait succeeded after $i seconds" + return 0 + fi + sleep 1 + done + info "wait failed after $i seconds" + return 1 } ovs_base=`pwd` @@ -65,7 +97,8 @@ ovs_setenv() { ovs_sbx() { if test "X$2" != X; then - (ovs_setenv $1; shift; "$@" >> ${ovs_dir}/debug.log) + (ovs_setenv $1; shift; + info "run cmd: $@"; "$@" >> ${ovs_dir}/debug.log) else ovs_setenv $1 fi @@ -102,12 +135,21 @@ ovs_netns_spawn_daemon() { shift netns=$1 shift - info "spawning cmd: $*" - ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr & + if [ "$netns" == "_default" ]; then + $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr & + else + ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr & + fi pid=$! ovs_sbx "$sbx" on_exit "kill -TERM $pid 2>/dev/null" } +ovs_spawn_daemon() { + sbx=$1 + shift + ovs_netns_spawn_daemon $sbx "_default" $* +} + ovs_add_netns_and_veths () { info "Adding netns attached: sbx:$1 dp:$2 {$3, $4, $5}" ovs_sbx "$1" ip netns add "$3" || return 1 @@ -129,8 +171,10 @@ ovs_add_netns_and_veths () { ovs_add_if "$1" "$2" "$4" -u || return 1 fi - [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \ - tcpdump -i any -s 65535 + if [ $TRACING -eq 1 ]; then + ovs_netns_spawn_daemon "$1" "$3" tcpdump -l -i any -s 6553 + ovs_wait grep -q "listening on any" ${ovs_dir}/stderr + fi return 0 } @@ -139,7 +183,7 @@ ovs_add_flow () { info "Adding flow to DP: sbx:$1 br:$2 flow:$3 act:$4" ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-flow "$2" "$3" "$4" if [ $? -ne 0 ]; then - echo "Flow [ $3 : $4 ] failed" >> ${ovs_dir}/debug.log + info "Flow [ $3 : $4 ] failed" return 1 fi return 0 @@ -170,6 +214,19 @@ ovs_drop_reason_count() return `echo "$perf_output" | grep "$pattern" | wc -l` } +ovs_test_flow_fails () { + ERR_MSG="Flow actions may not be safe on all matching packets" + + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") + ovs_add_flow $@ &> /dev/null $@ && return 1 + POST_TEST=$(dmesg | grep -c "${ERR_MSG}") + + if [ "$PRE_TEST" == "$POST_TEST" ]; then + return 1 + fi + return 0 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." @@ -184,10 +241,100 @@ usage() { exit 1 } + +# psample test +# - use psample to observe packets +test_psample() { + sbx_add "test_psample" || return $? + + # Add a datapath with per-vport dispatching. + ovs_add_dp "test_psample" psample -V 2:1 || return 1 + + info "create namespaces" + ovs_add_netns_and_veths "test_psample" "psample" \ + client c0 c1 172.31.110.10/24 -u || return 1 + ovs_add_netns_and_veths "test_psample" "psample" \ + server s0 s1 172.31.110.20/24 -u || return 1 + + # Check if psample actions can be configured. + ovs_add_flow "test_psample" psample \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'psample(group=1)' &> /dev/null + if [ $? == 1 ]; then + info "no support for psample - skipping" + ovs_exit_sig + return $ksft_skip + fi + + ovs_del_flows "test_psample" psample + + # Test action verification. + OLDIFS=$IFS + IFS='*' + min_key='in_port(1),eth(),eth_type(0x0800),ipv4()' + for testcase in \ + "cookie to large"*"psample(group=1,cookie=1615141312111009080706050403020100)" \ + "no group with cookie"*"psample(cookie=abcd)" \ + "no group"*"psample()"; + do + set -- $testcase; + ovs_test_flow_fails "test_psample" psample $min_key $2 + if [ $? == 1 ]; then + info "failed - $1" + return 1 + fi + done + IFS=$OLDIFS + + ovs_del_flows "test_psample" psample + # Allow ARP + ovs_add_flow "test_psample" psample \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_psample" psample \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + + # Sample first 14 bytes of all traffic. + ovs_add_flow "test_psample" psample \ + "in_port(1),eth(),eth_type(0x0800),ipv4()" \ + "trunc(14),psample(group=1,cookie=c0ffee),2" + + # Sample all traffic. In this case, use a sample() action with both + # psample and an upcall emulating simultaneous local sampling and + # sFlow / IPFIX. + nlpid=$(grep -E "listening on upcall packet handler" \ + $ovs_dir/s0.out | cut -d ":" -f 2 | tr -d ' ') + + ovs_add_flow "test_psample" psample \ + "in_port(2),eth(),eth_type(0x0800),ipv4()" \ + "sample(sample=100%,actions(psample(group=2,cookie=eeff0c),userspace(pid=${nlpid},userdata=eeff0c))),1" + + # Record psample data. + ovs_spawn_daemon "test_psample" python3 $ovs_base/ovs-dpctl.py psample-events + ovs_wait grep -q "listening for psample events" ${ovs_dir}/stdout + + # Send a single ping. + ovs_sbx "test_psample" ip netns exec client ping -I c1 172.31.110.20 -c 1 || return 1 + + # We should have received one userspace action upcall and 2 psample packets. + ovs_wait grep -q "userspace action command" $ovs_dir/s0.out || return 1 + + # client -> server samples should only contain the first 14 bytes of the packet. + ovs_wait grep -qE "rate:4294967295,group:1,cookie:c0ffee data:[0-9a-f]{28}$" \ + $ovs_dir/stdout || return 1 + + ovs_wait grep -q "rate:4294967295,group:2,cookie:eeff0c" $ovs_dir/stdout || return 1 + + return 0 +} + # drop_reason test # - drop packets and verify the right drop reason is reported test_drop_reason() { which perf >/dev/null 2>&1 || return $ksft_skip + which pahole >/dev/null 2>&1 || return $ksft_skip + + ovs_drop_subsys=$(pahole -C skb_drop_reason_subsys | + awk '/OPENVSWITCH/ { print $3; }' | + tr -d ,) sbx_add "test_drop_reason" || return $? @@ -231,7 +378,7 @@ test_drop_reason() { "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop' ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20 - ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION + ovs_drop_reason_count 0x${ovs_drop_subsys}0001 # OVS_DROP_FLOW_ACTION if [[ "$?" -ne "2" ]]; then info "Did not detect expected drops: $?" return 1 @@ -248,7 +395,7 @@ test_drop_reason() { ovs_drop_record_and_run \ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000 - ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR + ovs_drop_reason_count 0x${ovs_drop_subsys}0004 # OVS_DROP_EXPLICIT_ACTION_ERROR if [[ "$?" -ne "1" ]]; then info "Did not detect expected explicit error drops: $?" return 1 @@ -256,7 +403,7 @@ test_drop_reason() { ovs_drop_record_and_run \ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000 - ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION + ovs_drop_reason_count 0x${ovs_drop_subsys}0003 # OVS_DROP_EXPLICIT_ACTION if [[ "$?" -ne "1" ]]; then info "Did not detect expected explicit drops: $?" return 1 @@ -599,7 +746,8 @@ test_upcall_interfaces() { ovs_add_netns_and_veths "test_upcall_interfaces" ui0 upc left0 l0 \ 172.31.110.1/24 -u || return 1 - sleep 1 + ovs_wait grep -q "listening on upcall packet handler" ${ovs_dir}/left0.out + info "sending arping" ip netns exec upc arping -I l0 172.31.110.20 -c 1 \ >$ovs_dir/arping.stdout 2>$ovs_dir/arping.stderr @@ -613,16 +761,20 @@ run_test() { tname="$1" tdesc="$2" - if ! lsmod | grep openvswitch >/dev/null 2>&1; then - stdbuf -o0 printf "TEST: %-60s [NOMOD]\n" "${tdesc}" - return $ksft_skip - fi - if python3 ovs-dpctl.py -h 2>&1 | \ grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}" return $ksft_skip fi + + python3 ovs-dpctl.py show >/dev/null 2>&1 || \ + echo "[DPCTL] show exception." + + if ! lsmod | grep openvswitch >/dev/null 2>&1; then + stdbuf -o0 printf "TEST: %-60s [NOMOD]\n" "${tdesc}" + return $ksft_skip + fi + printf "TEST: %-60s [START]\n" "${tname}" unset IFS diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 5e0e539a323d..8a0396bfaf99 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -8,8 +8,10 @@ import argparse import errno import ipaddress import logging +import math import multiprocessing import re +import socket import struct import sys import time @@ -26,13 +28,16 @@ try: from pyroute2.netlink import genlmsg from pyroute2.netlink import nla from pyroute2.netlink import nlmsg_atoms + from pyroute2.netlink.event import EventSocket from pyroute2.netlink.exceptions import NetlinkError from pyroute2.netlink.generic import GenericNetlinkSocket + from pyroute2.netlink.nlsocket import Marshal import pyroute2 + import pyroute2.iproute except ModuleNotFoundError: print("Need to install the python pyroute2 package >= 0.6.") - sys.exit(0) + sys.exit(1) OVS_DATAPATH_FAMILY = "ovs_datapath" @@ -58,6 +63,7 @@ OVS_FLOW_CMD_DEL = 2 OVS_FLOW_CMD_GET = 3 OVS_FLOW_CMD_SET = 4 +UINT32_MAX = 0xFFFFFFFF def macstr(mac): outstr = ":".join(["%02X" % i for i in mac]) @@ -198,6 +204,18 @@ def convert_ipv4(data): return int(ipaddress.IPv4Address(ip)), int(ipaddress.IPv4Address(mask)) +def convert_ipv6(data): + ip, _, mask = data.partition('/') + + if not ip: + ip = mask = 0 + elif not mask: + mask = 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' + elif mask.isdigit(): + mask = ipaddress.IPv6Network("::/" + mask).hostmask + + return ipaddress.IPv6Address(ip).packed, ipaddress.IPv6Address(mask).packed + def convert_int(size): def convert_int_sized(data): value, _, mask = data.partition('/') @@ -267,6 +285,75 @@ def parse_extract_field( return str_skipped, data +def parse_attrs(actstr, attr_desc): + """Parses the given action string and returns a list of netlink + attributes based on a list of attribute descriptions. + + Each element in the attribute description list is a tuple such as: + (name, attr_name, parse_func) + where: + name: is the string representing the attribute + attr_name: is the name of the attribute as defined in the uAPI. + parse_func: is a callable accepting a string and returning either + a single object (the parsed attribute value) or a tuple of + two values (the parsed attribute value and the remaining string) + + Returns a list of attributes and the remaining string. + """ + def parse_attr(actstr, key, func): + actstr = actstr[len(key) :] + + if not func: + return None, actstr + + delim = actstr[0] + actstr = actstr[1:] + + if delim == "=": + pos = strcspn(actstr, ",)") + ret = func(actstr[:pos]) + else: + ret = func(actstr) + + if isinstance(ret, tuple): + (datum, actstr) = ret + else: + datum = ret + actstr = actstr[strcspn(actstr, ",)"):] + + if delim == "(": + if not actstr or actstr[0] != ")": + raise ValueError("Action contains unbalanced parentheses") + + actstr = actstr[1:] + + actstr = actstr[strspn(actstr, ", ") :] + + return datum, actstr + + attrs = [] + attr_desc = list(attr_desc) + while actstr and actstr[0] != ")" and attr_desc: + found = False + for i, (key, attr, func) in enumerate(attr_desc): + if actstr.startswith(key): + datum, actstr = parse_attr(actstr, key, func) + attrs.append([attr, datum]) + found = True + del attr_desc[i] + + if not found: + raise ValueError("Unknown attribute: '%s'" % actstr) + + actstr = actstr[strspn(actstr, ", ") :] + + if actstr[0] != ")": + raise ValueError("Action string contains extra garbage or has " + "unbalanced parenthesis: '%s'" % actstr) + + return attrs, actstr[1:] + + class ovs_dp_msg(genlmsg): # include the OVS version # We need a custom header rather than just being able to rely on @@ -282,15 +369,15 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_UNSPEC", "none"), ("OVS_ACTION_ATTR_OUTPUT", "uint32"), ("OVS_ACTION_ATTR_USERSPACE", "userspace"), - ("OVS_ACTION_ATTR_SET", "none"), + ("OVS_ACTION_ATTR_SET", "ovskey"), ("OVS_ACTION_ATTR_PUSH_VLAN", "none"), ("OVS_ACTION_ATTR_POP_VLAN", "flag"), - ("OVS_ACTION_ATTR_SAMPLE", "none"), + ("OVS_ACTION_ATTR_SAMPLE", "sample"), ("OVS_ACTION_ATTR_RECIRC", "uint32"), ("OVS_ACTION_ATTR_HASH", "none"), ("OVS_ACTION_ATTR_PUSH_MPLS", "none"), ("OVS_ACTION_ATTR_POP_MPLS", "flag"), - ("OVS_ACTION_ATTR_SET_MASKED", "none"), + ("OVS_ACTION_ATTR_SET_MASKED", "ovskey"), ("OVS_ACTION_ATTR_CT", "ctact"), ("OVS_ACTION_ATTR_TRUNC", "uint32"), ("OVS_ACTION_ATTR_PUSH_ETH", "none"), @@ -304,8 +391,85 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_ADD_MPLS", "none"), ("OVS_ACTION_ATTR_DEC_TTL", "none"), ("OVS_ACTION_ATTR_DROP", "uint32"), + ("OVS_ACTION_ATTR_PSAMPLE", "psample"), ) + class psample(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_PSAMPLE_ATTR_UNSPEC", "none"), + ("OVS_PSAMPLE_ATTR_GROUP", "uint32"), + ("OVS_PSAMPLE_ATTR_COOKIE", "array(uint8)"), + ) + + def dpstr(self, more=False): + args = "group=%d" % self.get_attr("OVS_PSAMPLE_ATTR_GROUP") + + cookie = self.get_attr("OVS_PSAMPLE_ATTR_COOKIE") + if cookie: + args += ",cookie(%s)" % \ + "".join(format(x, "02x") for x in cookie) + + return "psample(%s)" % args + + def parse(self, actstr): + desc = ( + ("group", "OVS_PSAMPLE_ATTR_GROUP", int), + ("cookie", "OVS_PSAMPLE_ATTR_COOKIE", + lambda x: list(bytearray.fromhex(x))) + ) + + attrs, actstr = parse_attrs(actstr, desc) + + for attr in attrs: + self["attrs"].append(attr) + + return actstr + + class sample(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_SAMPLE_ATTR_UNSPEC", "none"), + ("OVS_SAMPLE_ATTR_PROBABILITY", "uint32"), + ("OVS_SAMPLE_ATTR_ACTIONS", "ovsactions"), + ) + + def dpstr(self, more=False): + args = [] + + args.append("sample={:.2f}%".format( + 100 * self.get_attr("OVS_SAMPLE_ATTR_PROBABILITY") / + UINT32_MAX)) + + actions = self.get_attr("OVS_SAMPLE_ATTR_ACTIONS") + if actions: + args.append("actions(%s)" % actions.dpstr(more)) + + return "sample(%s)" % ",".join(args) + + def parse(self, actstr): + def parse_nested_actions(actstr): + subacts = ovsactions() + parsed_len = subacts.parse(actstr) + return subacts, actstr[parsed_len :] + + def percent_to_rate(percent): + percent = float(percent.strip('%')) + return int(math.floor(UINT32_MAX * (percent / 100.0) + .5)) + + desc = ( + ("sample", "OVS_SAMPLE_ATTR_PROBABILITY", percent_to_rate), + ("actions", "OVS_SAMPLE_ATTR_ACTIONS", parse_nested_actions), + ) + attrs, actstr = parse_attrs(actstr, desc) + + for attr in attrs: + self["attrs"].append(attr) + + return actstr + class ctact(nla): nla_flags = NLA_F_NESTED @@ -427,50 +591,77 @@ class ovsactions(nla): print_str += "userdata=" for f in self.get_attr("OVS_USERSPACE_ATTR_USERDATA"): print_str += "%x." % f - if self.get_attr("OVS_USERSPACE_ATTR_TUN_PORT") is not None: + if self.get_attr("OVS_USERSPACE_ATTR_EGRESS_TUN_PORT") is not None: print_str += "egress_tun_port=%d" % self.get_attr( - "OVS_USERSPACE_ATTR_TUN_PORT" + "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT" ) print_str += ")" return print_str + def parse(self, actstr): + attrs_desc = ( + ("pid", "OVS_USERSPACE_ATTR_PID", int), + ("userdata", "OVS_USERSPACE_ATTR_USERDATA", + lambda x: list(bytearray.fromhex(x))), + ("egress_tun_port", "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT", int) + ) + + attrs, actstr = parse_attrs(actstr, attrs_desc) + for attr in attrs: + self["attrs"].append(attr) + + return actstr + def dpstr(self, more=False): print_str = "" - for field in self.nla_map: + for field in self["attrs"]: if field[1] == "none" or self.get_attr(field[0]) is None: continue if print_str != "": print_str += "," - if field[1] == "uint32": - if field[0] == "OVS_ACTION_ATTR_OUTPUT": - print_str += "%d" % int(self.get_attr(field[0])) - elif field[0] == "OVS_ACTION_ATTR_RECIRC": - print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) - elif field[0] == "OVS_ACTION_ATTR_TRUNC": - print_str += "trunc(%d)" % int(self.get_attr(field[0])) - elif field[0] == "OVS_ACTION_ATTR_DROP": - print_str += "drop(%d)" % int(self.get_attr(field[0])) - elif field[1] == "flag": - if field[0] == "OVS_ACTION_ATTR_CT_CLEAR": - print_str += "ct_clear" - elif field[0] == "OVS_ACTION_ATTR_POP_VLAN": - print_str += "pop_vlan" - elif field[0] == "OVS_ACTION_ATTR_POP_ETH": - print_str += "pop_eth" - elif field[0] == "OVS_ACTION_ATTR_POP_NSH": - print_str += "pop_nsh" - elif field[0] == "OVS_ACTION_ATTR_POP_MPLS": - print_str += "pop_mpls" + if field[0] == "OVS_ACTION_ATTR_OUTPUT": + print_str += "%d" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_RECIRC": + print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_TRUNC": + print_str += "trunc(%d)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_DROP": + print_str += "drop(%d)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_CT_CLEAR": + print_str += "ct_clear" + elif field[0] == "OVS_ACTION_ATTR_POP_VLAN": + print_str += "pop_vlan" + elif field[0] == "OVS_ACTION_ATTR_POP_ETH": + print_str += "pop_eth" + elif field[0] == "OVS_ACTION_ATTR_POP_NSH": + print_str += "pop_nsh" + elif field[0] == "OVS_ACTION_ATTR_POP_MPLS": + print_str += "pop_mpls" else: datum = self.get_attr(field[0]) if field[0] == "OVS_ACTION_ATTR_CLONE": print_str += "clone(" print_str += datum.dpstr(more) print_str += ")" + elif field[0] == "OVS_ACTION_ATTR_SET" or \ + field[0] == "OVS_ACTION_ATTR_SET_MASKED": + print_str += "set" + field = datum + mask = None + if field[0] == "OVS_ACTION_ATTR_SET_MASKED": + print_str += "_masked" + field = datum[0] + mask = datum[1] + print_str += "(" + print_str += field.dpstr(mask, more) + print_str += ")" else: - print_str += datum.dpstr(more) + try: + print_str += datum.dpstr(more) + except: + print_str += "{ATTR: %s not decoded}" % field[0] return print_str @@ -489,7 +680,7 @@ class ovsactions(nla): actstr, reason = parse_extract_field( actstr, "drop(", - "([0-9]+)", + r"([0-9]+)", lambda x: int(x, 0), False, None, @@ -502,9 +693,9 @@ class ovsactions(nla): actstr = actstr[len("drop"): ] return (totallen - len(actstr)) - elif parse_starts_block(actstr, "^(\d+)", False, True): + elif parse_starts_block(actstr, r"^(\d+)", False, True): actstr, output = parse_extract_field( - actstr, None, "(\d+)", lambda x: int(x), False, "0" + actstr, None, r"(\d+)", lambda x: int(x), False, "0" ) self["attrs"].append(["OVS_ACTION_ATTR_OUTPUT", output]) parsed = True @@ -512,7 +703,7 @@ class ovsactions(nla): actstr, recircid = parse_extract_field( actstr, "recirc(", - "([0-9a-fA-Fx]+)", + r"([0-9a-fA-Fx]+)", lambda x: int(x, 0), False, 0, @@ -531,7 +722,7 @@ class ovsactions(nla): for flat_act in parse_flat_map: if parse_starts_block(actstr, flat_act[0], False): actstr = actstr[len(flat_act[0]):] - self["attrs"].append([flat_act[1]]) + self["attrs"].append([flat_act[1], True]) actstr = actstr[strspn(actstr, ", ") :] parsed = True @@ -544,6 +735,25 @@ class ovsactions(nla): self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts)) actstr = actstr[parsedLen:] parsed = True + elif parse_starts_block(actstr, "set(", False): + parencount += 1 + k = ovskey() + actstr = actstr[len("set("):] + actstr = k.parse(actstr, None) + self["attrs"].append(("OVS_ACTION_ATTR_SET", k)) + if not actstr.startswith(")"): + actstr = ")" + actstr + parsed = True + elif parse_starts_block(actstr, "set_masked(", False): + parencount += 1 + k = ovskey() + m = ovskey() + actstr = actstr[len("set_masked("):] + actstr = k.parse(actstr, m) + self["attrs"].append(("OVS_ACTION_ATTR_SET_MASKED", [k, m])) + if not actstr.startswith(")"): + actstr = ")" + actstr + parsed = True elif parse_starts_block(actstr, "ct(", False): parencount += 1 actstr = actstr[len("ct(") :] @@ -588,17 +798,17 @@ class ovsactions(nla): actstr = actstr[3:] actstr, ip_block_min = parse_extract_field( - actstr, "=", "([0-9a-fA-F\.]+)", str, False + actstr, "=", r"([0-9a-fA-F\.]+)", str, False ) actstr, ip_block_max = parse_extract_field( - actstr, "-", "([0-9a-fA-F\.]+)", str, False + actstr, "-", r"([0-9a-fA-F\.]+)", str, False ) actstr, proto_min = parse_extract_field( - actstr, ":", "(\d+)", int, False + actstr, ":", r"(\d+)", int, False ) actstr, proto_max = parse_extract_field( - actstr, "-", "(\d+)", int, False + actstr, "-", r"(\d+)", int, False ) if t is not None: @@ -637,6 +847,37 @@ class ovsactions(nla): self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact]) parsed = True + elif parse_starts_block(actstr, "sample(", False): + sampleact = self.sample() + actstr = sampleact.parse(actstr[len("sample(") : ]) + self["attrs"].append(["OVS_ACTION_ATTR_SAMPLE", sampleact]) + parsed = True + + elif parse_starts_block(actstr, "psample(", False): + psampleact = self.psample() + actstr = psampleact.parse(actstr[len("psample(") : ]) + self["attrs"].append(["OVS_ACTION_ATTR_PSAMPLE", psampleact]) + parsed = True + + elif parse_starts_block(actstr, "userspace(", False): + uact = self.userspace() + actstr = uact.parse(actstr[len("userspace(") : ]) + self["attrs"].append(["OVS_ACTION_ATTR_USERSPACE", uact]) + parsed = True + + elif parse_starts_block(actstr, "trunc(", False): + parencount += 1 + actstr, val = parse_extract_field( + actstr, + "trunc(", + r"([0-9]+)", + int, + False, + None, + ) + self["attrs"].append(["OVS_ACTION_ATTR_TRUNC", val]) + parsed = True + actstr = actstr[strspn(actstr, ", ") :] while parencount > 0: parencount -= 1 @@ -675,7 +916,7 @@ class ovskey(nla): ("OVS_KEY_ATTR_ARP", "ovs_key_arp"), ("OVS_KEY_ATTR_ND", "ovs_key_nd"), ("OVS_KEY_ATTR_SKB_MARK", "uint32"), - ("OVS_KEY_ATTR_TUNNEL", "none"), + ("OVS_KEY_ATTR_TUNNEL", "ovs_key_tunnel"), ("OVS_KEY_ATTR_SCTP", "ovs_key_sctp"), ("OVS_KEY_ATTR_TCP_FLAGS", "be16"), ("OVS_KEY_ATTR_DP_HASH", "uint32"), @@ -907,21 +1148,21 @@ class ovskey(nla): "src", "src", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big"), - lambda x: ipaddress.IPv6Address(x), + lambda x: ipaddress.IPv6Address(x).packed if x else 0, + convert_ipv6, ), ( "dst", "dst", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big"), - lambda x: ipaddress.IPv6Address(x), + lambda x: ipaddress.IPv6Address(x).packed if x else 0, + convert_ipv6, ), - ("label", "label", "%d", int), - ("proto", "proto", "%d", int), - ("tclass", "tclass", "%d", int), - ("hlimit", "hlimit", "%d", int), - ("frag", "frag", "%d", int), + ("label", "label", "%d", lambda x: int(x) if x else 0), + ("proto", "proto", "%d", lambda x: int(x) if x else 0), + ("tclass", "tclass", "%d", lambda x: int(x) if x else 0), + ("hlimit", "hlimit", "%d", lambda x: int(x) if x else 0), + ("frag", "frag", "%d", lambda x: int(x) if x else 0), ) def __init__( @@ -1119,7 +1360,7 @@ class ovskey(nla): "target", "target", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big"), + convert_ipv6, ), ("sll", "sll", macstr, lambda x: int.from_bytes(x, "big")), ("tll", "tll", macstr, lambda x: int.from_bytes(x, "big")), @@ -1204,13 +1445,13 @@ class ovskey(nla): "src", "src", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big", convertmac), + convert_ipv6, ), ( "dst", "dst", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big"), + convert_ipv6, ), ("tp_src", "tp_src", "%d", int), ("tp_dst", "tp_dst", "%d", int), @@ -1235,6 +1476,163 @@ class ovskey(nla): init=init, ) + class ovs_key_tunnel(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_TUNNEL_KEY_ATTR_ID", "be64"), + ("OVS_TUNNEL_KEY_ATTR_IPV4_SRC", "ipaddr"), + ("OVS_TUNNEL_KEY_ATTR_IPV4_DST", "ipaddr"), + ("OVS_TUNNEL_KEY_ATTR_TOS", "uint8"), + ("OVS_TUNNEL_KEY_ATTR_TTL", "uint8"), + ("OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT", "flag"), + ("OVS_TUNNEL_KEY_ATTR_CSUM", "flag"), + ("OVS_TUNNEL_KEY_ATTR_OAM", "flag"), + ("OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS", "array(uint32)"), + ("OVS_TUNNEL_KEY_ATTR_TP_SRC", "be16"), + ("OVS_TUNNEL_KEY_ATTR_TP_DST", "be16"), + ("OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS", "none"), + ("OVS_TUNNEL_KEY_ATTR_IPV6_SRC", "ipaddr"), + ("OVS_TUNNEL_KEY_ATTR_IPV6_DST", "ipaddr"), + ("OVS_TUNNEL_KEY_ATTR_PAD", "none"), + ("OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS", "none"), + ("OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE", "flag"), + ) + + def parse(self, flowstr, mask=None): + if not flowstr.startswith("tunnel("): + return None, None + + k = ovskey.ovs_key_tunnel() + if mask is not None: + mask = ovskey.ovs_key_tunnel() + + flowstr = flowstr[len("tunnel("):] + + v6_address = None + + fields = [ + ("tun_id=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_ID", + 0xffffffffffffffff, None, None), + + ("src=", r"([0-9a-fA-F\.]+)", str, + "OVS_TUNNEL_KEY_ATTR_IPV4_SRC", "255.255.255.255", "0.0.0.0", + False), + ("dst=", r"([0-9a-fA-F\.]+)", str, + "OVS_TUNNEL_KEY_ATTR_IPV4_DST", "255.255.255.255", "0.0.0.0", + False), + + ("ipv6_src=", r"([0-9a-fA-F:]+)", str, + "OVS_TUNNEL_KEY_ATTR_IPV6_SRC", + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "::", True), + ("ipv6_dst=", r"([0-9a-fA-F:]+)", str, + "OVS_TUNNEL_KEY_ATTR_IPV6_DST", + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "::", True), + + ("tos=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TOS", 255, 0, + None), + ("ttl=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TTL", 255, 0, + None), + + ("tp_src=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TP_SRC", + 65535, 0, None), + ("tp_dst=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TP_DST", + 65535, 0, None), + ] + + forced_include = ["OVS_TUNNEL_KEY_ATTR_TTL"] + + for prefix, regex, typ, attr_name, mask_val, default_val, v46_flag in fields: + flowstr, value = parse_extract_field(flowstr, prefix, regex, typ, False) + if not attr_name: + raise Exception("Bad list value in tunnel fields") + + if value is None and attr_name in forced_include: + value = default_val + mask_val = default_val + + if value is not None: + if v46_flag is not None: + if v6_address is None: + v6_address = v46_flag + if v46_flag != v6_address: + raise ValueError("Cannot mix v6 and v4 addresses") + k["attrs"].append([attr_name, value]) + if mask is not None: + mask["attrs"].append([attr_name, mask_val]) + else: + if v46_flag is not None: + if v6_address is None or v46_flag != v6_address: + continue + if mask is not None: + mask["attrs"].append([attr_name, default_val]) + + if k["attrs"][0][0] != "OVS_TUNNEL_KEY_ATTR_ID": + raise ValueError("Needs a tunid set") + + if flowstr.startswith("flags("): + flowstr = flowstr[len("flags("):] + flagspos = flowstr.find(")") + flags = flowstr[:flagspos] + flowstr = flowstr[flagspos + 1:] + + flag_attrs = { + "df": "OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT", + "csum": "OVS_TUNNEL_KEY_ATTR_CSUM", + "oam": "OVS_TUNNEL_KEY_ATTR_OAM" + } + + for flag in flags.split("|"): + if flag in flag_attrs: + k["attrs"].append([flag_attrs[flag], True]) + if mask is not None: + mask["attrs"].append([flag_attrs[flag], True]) + + flowstr = flowstr[strspn(flowstr, ", ") :] + return flowstr, k, mask + + def dpstr(self, mask=None, more=False): + print_str = "tunnel(" + + flagsattrs = [] + for k in self["attrs"]: + noprint = False + if k[0] == "OVS_TUNNEL_KEY_ATTR_ID": + print_str += "tun_id=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV4_SRC": + print_str += "src=%s" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV4_DST": + print_str += "dst=%s" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV6_SRC": + print_str += "ipv6_src=%s" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV6_DST": + print_str += "ipv6_dst=%s" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_TOS": + print_str += "tos=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_TTL": + print_str += "ttl=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_TP_SRC": + print_str += "tp_src=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_TP_DST": + print_str += "tp_dst=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT": + noprint = True + flagsattrs.append("df") + elif k[0] == "OVS_TUNNEL_KEY_ATTR_CSUM": + noprint = True + flagsattrs.append("csum") + elif k[0] == "OVS_TUNNEL_KEY_ATTR_OAM": + noprint = True + flagsattrs.append("oam") + + if not noprint: + print_str += "," + + if len(flagsattrs): + print_str += "flags(" + "|".join(flagsattrs) + ")" + print_str += ")" + return print_str + class ovs_key_mpls(nla): fields = (("lse", ">I"),) @@ -1243,6 +1641,7 @@ class ovskey(nla): ("OVS_KEY_ATTR_PRIORITY", "skb_priority", intparse), ("OVS_KEY_ATTR_SKB_MARK", "skb_mark", intparse), ("OVS_KEY_ATTR_RECIRC_ID", "recirc_id", intparse), + ("OVS_KEY_ATTR_TUNNEL", "tunnel", ovskey.ovs_key_tunnel), ("OVS_KEY_ATTR_DP_HASH", "dp_hash", intparse), ("OVS_KEY_ATTR_CT_STATE", "ct_state", parse_ct_state), ("OVS_KEY_ATTR_CT_ZONE", "ct_zone", intparse), @@ -1309,7 +1708,7 @@ class ovskey(nla): mask["attrs"].append([field[0], m]) self["attrs"].append([field[0], k]) - flowstr = flowstr[strspn(flowstr, "),") :] + flowstr = flowstr[strspn(flowstr, "), ") :] return flowstr @@ -1346,6 +1745,13 @@ class ovskey(nla): True, ), ( + "OVS_KEY_ATTR_TUNNEL", + "tunnel", + None, + False, + False, + ), + ( "OVS_KEY_ATTR_CT_STATE", "ct_state", "0x%04x", @@ -1617,7 +2023,7 @@ class OvsVport(GenericNetlinkSocket): ("OVS_VPORT_ATTR_PORT_NO", "uint32"), ("OVS_VPORT_ATTR_TYPE", "uint32"), ("OVS_VPORT_ATTR_NAME", "asciiz"), - ("OVS_VPORT_ATTR_OPTIONS", "none"), + ("OVS_VPORT_ATTR_OPTIONS", "vportopts"), ("OVS_VPORT_ATTR_UPCALL_PID", "array(uint32)"), ("OVS_VPORT_ATTR_STATS", "vportstats"), ("OVS_VPORT_ATTR_PAD", "none"), @@ -1625,6 +2031,13 @@ class OvsVport(GenericNetlinkSocket): ("OVS_VPORT_ATTR_NETNSID", "uint32"), ) + class vportopts(nla): + nla_map = ( + ("OVS_TUNNEL_ATTR_UNSPEC", "none"), + ("OVS_TUNNEL_ATTR_DST_PORT", "uint16"), + ("OVS_TUNNEL_ATTR_EXTENSION", "none"), + ) + class vportstats(nla): fields = ( ("rx_packets", "=Q"), @@ -1693,7 +2106,7 @@ class OvsVport(GenericNetlinkSocket): raise ne return reply - def attach(self, dpindex, vport_ifname, ptype): + def attach(self, dpindex, vport_ifname, ptype, dport, lwt): msg = OvsVport.ovs_vport_msg() msg["cmd"] = OVS_VPORT_CMD_NEW @@ -1702,12 +2115,43 @@ class OvsVport(GenericNetlinkSocket): msg["dpifindex"] = dpindex port_type = OvsVport.str_to_type(ptype) - msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type]) msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname]) msg["attrs"].append( ["OVS_VPORT_ATTR_UPCALL_PID", [self.upcall_packet.epid]] ) + TUNNEL_DEFAULTS = [("geneve", 6081), + ("vxlan", 4789)] + + for tnl in TUNNEL_DEFAULTS: + if ptype == tnl[0]: + if not dport: + dport = tnl[1] + + if not lwt: + vportopt = OvsVport.ovs_vport_msg.vportopts() + vportopt["attrs"].append( + ["OVS_TUNNEL_ATTR_DST_PORT", socket.htons(dport)] + ) + msg["attrs"].append( + ["OVS_VPORT_ATTR_OPTIONS", vportopt] + ) + else: + port_type = OvsVport.OVS_VPORT_TYPE_NETDEV + ipr = pyroute2.iproute.IPRoute() + + if tnl[0] == "geneve": + ipr.link("add", ifname=vport_ifname, kind=tnl[0], + geneve_port=dport, + geneve_collect_metadata=True, + geneve_udp_zero_csum6_rx=1) + elif tnl[0] == "vxlan": + ipr.link("add", ifname=vport_ifname, kind=tnl[0], + vxlan_learning=0, vxlan_collect_metadata=1, + vxlan_udp_zero_csum6_rx=1, vxlan_port=dport) + break + msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type]) + try: reply = self.nlm_request( msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK @@ -2018,10 +2462,71 @@ class OvsFlow(GenericNetlinkSocket): print("MISS upcall[%d/%s]: %s" % (seq, pktpres, keystr), flush=True) def execute(self, packetmsg): - print("userspace execute command") + print("userspace execute command", flush=True) def action(self, packetmsg): - print("userspace action command") + print("userspace action command", flush=True) + + +class psample_sample(genlmsg): + nla_map = ( + ("PSAMPLE_ATTR_IIFINDEX", "none"), + ("PSAMPLE_ATTR_OIFINDEX", "none"), + ("PSAMPLE_ATTR_ORIGSIZE", "none"), + ("PSAMPLE_ATTR_SAMPLE_GROUP", "uint32"), + ("PSAMPLE_ATTR_GROUP_SEQ", "none"), + ("PSAMPLE_ATTR_SAMPLE_RATE", "uint32"), + ("PSAMPLE_ATTR_DATA", "array(uint8)"), + ("PSAMPLE_ATTR_GROUP_REFCOUNT", "none"), + ("PSAMPLE_ATTR_TUNNEL", "none"), + ("PSAMPLE_ATTR_PAD", "none"), + ("PSAMPLE_ATTR_OUT_TC", "none"), + ("PSAMPLE_ATTR_OUT_TC_OCC", "none"), + ("PSAMPLE_ATTR_LATENCY", "none"), + ("PSAMPLE_ATTR_TIMESTAMP", "none"), + ("PSAMPLE_ATTR_PROTO", "none"), + ("PSAMPLE_ATTR_USER_COOKIE", "array(uint8)"), + ) + + def dpstr(self): + fields = [] + data = "" + for (attr, value) in self["attrs"]: + if attr == "PSAMPLE_ATTR_SAMPLE_GROUP": + fields.append("group:%d" % value) + if attr == "PSAMPLE_ATTR_SAMPLE_RATE": + fields.append("rate:%d" % value) + if attr == "PSAMPLE_ATTR_USER_COOKIE": + value = "".join(format(x, "02x") for x in value) + fields.append("cookie:%s" % value) + if attr == "PSAMPLE_ATTR_DATA" and len(value) > 0: + data = "data:%s" % "".join(format(x, "02x") for x in value) + + return ("%s %s" % (",".join(fields), data)).strip() + + +class psample_msg(Marshal): + PSAMPLE_CMD_SAMPLE = 0 + PSAMPLE_CMD_GET_GROUP = 1 + PSAMPLE_CMD_NEW_GROUP = 2 + PSAMPLE_CMD_DEL_GROUP = 3 + PSAMPLE_CMD_SET_FILTER = 4 + msg_map = {PSAMPLE_CMD_SAMPLE: psample_sample} + + +class PsampleEvent(EventSocket): + genl_family = "psample" + mcast_groups = ["packets"] + marshal_class = psample_msg + + def read_samples(self): + print("listening for psample events", flush=True) + while True: + try: + for msg in self.get(): + print(msg.dpstr(), flush=True) + except NetlinkError as ne: + raise ne def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()): @@ -2053,12 +2558,19 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()): for iface in ndb.interfaces: rep = vpl.info(iface.ifname, ifindex) if rep is not None: + opts = "" + vpo = rep.get_attr("OVS_VPORT_ATTR_OPTIONS") + if vpo: + dpo = vpo.get_attr("OVS_TUNNEL_ATTR_DST_PORT") + if dpo: + opts += " tnl-dport:%s" % socket.ntohs(dpo) print( - " port %d: %s (%s)" + " port %d: %s (%s%s)" % ( rep.get_attr("OVS_VPORT_ATTR_PORT_NO"), rep.get_attr("OVS_VPORT_ATTR_NAME"), OvsVport.type_to_str(rep.get_attr("OVS_VPORT_ATTR_TYPE")), + opts, ) ) @@ -2081,7 +2593,7 @@ def main(argv): help="Increment 'verbose' output counter.", default=0, ) - subparsers = parser.add_subparsers() + subparsers = parser.add_subparsers(dest="subcommand") showdpcmd = subparsers.add_parser("show") showdpcmd.add_argument( @@ -2120,12 +2632,30 @@ def main(argv): "--ptype", type=str, default="netdev", - choices=["netdev", "internal"], + choices=["netdev", "internal", "geneve", "vxlan"], help="Interface type (default netdev)", ) + addifcmd.add_argument( + "-p", + "--dport", + type=int, + default=0, + help="Destination port (0 for default)" + ) + addifcmd.add_argument( + "-l", + "--lwt", + type=bool, + default=True, + help="Use LWT infrastructure instead of vport (default true)." + ) delifcmd = subparsers.add_parser("del-if") delifcmd.add_argument("dpname", help="Datapath Name") delifcmd.add_argument("delif", help="Interface name for adding") + delifcmd.add_argument("-d", + "--dellink", + type=bool, default=False, + help="Delete the link as well.") dumpflcmd = subparsers.add_parser("dump-flows") dumpflcmd.add_argument("dumpdp", help="Datapath Name") @@ -2138,6 +2668,8 @@ def main(argv): delfscmd = subparsers.add_parser("del-flows") delfscmd.add_argument("flsbr", help="Datapath name") + subparsers.add_parser("psample-events") + args = parser.parse_args() if args.verbose > 0: @@ -2152,6 +2684,9 @@ def main(argv): sys.setrecursionlimit(100000) + if args.subcommand == "psample-events": + PsampleEvent().read_samples() + if hasattr(args, "showdp"): found = False for iface in ndb.interfaces: @@ -2186,7 +2721,8 @@ def main(argv): print("DP '%s' not found." % args.dpname) return 1 dpindex = rep["dpifindex"] - rep = ovsvp.attach(rep["dpifindex"], args.addif, args.ptype) + rep = ovsvp.attach(rep["dpifindex"], args.addif, args.ptype, + args.dport, args.lwt) msg = "vport '%s'" % args.addif if rep and rep["header"]["error"] is None: msg += " added." @@ -2207,6 +2743,9 @@ def main(argv): msg += " removed." else: msg += " failed to remove." + if args.dellink: + ipr = pyroute2.iproute.IPRoute() + ipr.link("del", index=ipr.link_lookup(ifname=args.delif)[0]) elif hasattr(args, "dumpdp"): rep = ovsdp.info(args.dumpdp, 0) if rep is None: diff --git a/tools/testing/selftests/net/openvswitch/settings b/tools/testing/selftests/net/openvswitch/settings new file mode 100644 index 000000000000..e2206265f67c --- /dev/null +++ b/tools/testing/selftests/net/openvswitch/settings @@ -0,0 +1 @@ +timeout=900 diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore new file mode 100644 index 000000000000..ee44c081ca7c --- /dev/null +++ b/tools/testing/selftests/net/ovpn/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0+ +ovpn-cli diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile new file mode 100644 index 000000000000..e0926d76b4c8 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES) +VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null) +ifeq ($(VAR_CFLAGS),) +VAR_CFLAGS = -I/usr/include/libnl3 +endif +CFLAGS += $(VAR_CFLAGS) + + +LDLIBS = -lmbedtls -lmbedcrypto +VAR_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null) +ifeq ($(VAR_LDLIBS),) +VAR_LDLIBS = -lnl-genl-3 -lnl-3 +endif +LDLIBS += $(VAR_LDLIBS) + + +TEST_FILES = common.sh + +TEST_PROGS = test.sh \ + test-large-mtu.sh \ + test-chachapoly.sh \ + test-tcp.sh \ + test-float.sh \ + test-close-socket.sh \ + test-close-socket-tcp.sh + +TEST_GEN_FILES := ovpn-cli + +include ../../lib.mk diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh new file mode 100644 index 000000000000..88869c675d03 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/common.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt} +TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt} +OVPN_CLI=${OVPN_CLI:-./ovpn-cli} +ALG=${ALG:-aes} +PROTO=${PROTO:-UDP} +FLOAT=${FLOAT:-0} + +LAN_IP="11.11.11.11" + +create_ns() { + ip netns add peer${1} +} + +setup_ns() { + MODE="P2P" + + if [ ${1} -eq 0 ]; then + MODE="MP" + for p in $(seq 1 ${NUM_PEERS}); do + ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p} + + ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p} + ip -n peer0 addr add fd00:0:0:${p}::1/64 dev veth${p} + ip -n peer0 link set veth${p} up + + ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p} + ip -n peer${p} addr add fd00:0:0:${p}::2/64 dev veth${p} + ip -n peer${p} link set veth${p} up + done + fi + + ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE + ip -n peer${1} addr add ${2} dev tun${1} + # add a secondary IP to peer 1, to test a LAN behind a client + if [ ${1} -eq 1 -a -n "${LAN_IP}" ]; then + ip -n peer${1} addr add ${LAN_IP} dev tun${1} + ip -n peer0 route add ${LAN_IP} via $(echo ${2} |sed -e s'!/.*!!') dev tun0 + fi + if [ -n "${3}" ]; then + ip -n peer${1} link set mtu ${3} dev tun${1} + fi + ip -n peer${1} link set tun${1} up +} + +add_peer() { + if [ "${PROTO}" == "UDP" ]; then + if [ ${1} -eq 0 ]; then + ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 ${UDP_PEERS_FILE} + + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \ + data64.key + done + else + RADDR=$(awk "NR == ${1} {print \$2}" ${UDP_PEERS_FILE}) + RPORT=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE}) + LPORT=$(awk "NR == ${1} {print \$5}" ${UDP_PEERS_FILE}) + ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} ${1} ${LPORT} \ + ${RADDR} ${RPORT} + ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} ${1} 1 0 ${ALG} 1 \ + data64.key + fi + else + if [ ${1} -eq 0 ]; then + (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${TCP_PEERS_FILE} && { + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \ + ${ALG} 0 data64.key + done + }) & + sleep 5 + else + ip netns exec peer${1} ${OVPN_CLI} connect tun${1} ${1} 10.10.${1}.1 1 \ + data64.key + fi + fi +} + +cleanup() { + # some ovpn-cli processes sleep in background so they need manual poking + killall $(basename ${OVPN_CLI}) 2>/dev/null || true + + # netns peer0 is deleted without erasing ifaces first + for p in $(seq 1 10); do + ip -n peer${p} link set tun${p} down 2>/dev/null || true + ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true + done + for p in $(seq 1 10); do + ip -n peer0 link del veth${p} 2>/dev/null || true + done + for p in $(seq 0 10); do + ip netns del peer${p} 2>/dev/null || true + done +} + +if [ "${PROTO}" == "UDP" ]; then + NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')} +else + NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')} +fi + + diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config new file mode 100644 index 000000000000..71946ba9fa17 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/config @@ -0,0 +1,10 @@ +CONFIG_NET=y +CONFIG_INET=y +CONFIG_STREAM_PARSER=y +CONFIG_NET_UDP_TUNNEL=y +CONFIG_DST_CACHE=y +CONFIG_CRYPTO=y +CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_GCM=y +CONFIG_CRYPTO_CHACHA20POLY1305=y +CONFIG_OVPN=m diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key new file mode 100644 index 000000000000..a99e88c4e290 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/data64.key @@ -0,0 +1,5 @@ +jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3B +ia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9 +uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6 +KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tE +BofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w== diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c new file mode 100644 index 000000000000..de9c26f98b2e --- /dev/null +++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c @@ -0,0 +1,2383 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel accelerator + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + */ + +#include <stdio.h> +#include <inttypes.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <time.h> + +#include <linux/ovpn.h> +#include <linux/types.h> +#include <linux/netlink.h> + +#include <netlink/socket.h> +#include <netlink/netlink.h> +#include <netlink/genl/genl.h> +#include <netlink/genl/family.h> +#include <netlink/genl/ctrl.h> + +#include <mbedtls/base64.h> +#include <mbedtls/error.h> + +#include <sys/socket.h> + +/* defines to make checkpatch happy */ +#define strscpy strncpy +#define __always_unused __attribute__((__unused__)) + +/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we + * have to explicitly do it to prevent the kernel from failing upon + * parsing of the message + */ +#define nla_nest_start(_msg, _type) \ + nla_nest_start(_msg, (_type) | NLA_F_NESTED) + +/* libnl < 3.11.0 does not implement nla_get_uint() */ +uint64_t ovpn_nla_get_uint(struct nlattr *attr) +{ + if (nla_len(attr) == sizeof(uint32_t)) + return nla_get_u32(attr); + else + return nla_get_u64(attr); +} + +typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg); + +enum ovpn_key_direction { + KEY_DIR_IN = 0, + KEY_DIR_OUT, +}; + +#define KEY_LEN (256 / 8) +#define NONCE_LEN 8 + +#define PEER_ID_UNDEF 0x00FFFFFF +#define MAX_PEERS 10 + +struct nl_ctx { + struct nl_sock *nl_sock; + struct nl_msg *nl_msg; + struct nl_cb *nl_cb; + + int ovpn_dco_id; +}; + +enum ovpn_cmd { + CMD_INVALID, + CMD_NEW_IFACE, + CMD_DEL_IFACE, + CMD_LISTEN, + CMD_CONNECT, + CMD_NEW_PEER, + CMD_NEW_MULTI_PEER, + CMD_SET_PEER, + CMD_DEL_PEER, + CMD_GET_PEER, + CMD_NEW_KEY, + CMD_DEL_KEY, + CMD_GET_KEY, + CMD_SWAP_KEYS, + CMD_LISTEN_MCAST, +}; + +struct ovpn_ctx { + enum ovpn_cmd cmd; + + __u8 key_enc[KEY_LEN]; + __u8 key_dec[KEY_LEN]; + __u8 nonce[NONCE_LEN]; + + enum ovpn_cipher_alg cipher; + + sa_family_t sa_family; + + unsigned long peer_id; + unsigned long lport; + + union { + struct sockaddr_in in4; + struct sockaddr_in6 in6; + } remote; + + union { + struct sockaddr_in in4; + struct sockaddr_in6 in6; + } peer_ip; + + bool peer_ip_set; + + unsigned int ifindex; + char ifname[IFNAMSIZ]; + enum ovpn_mode mode; + bool mode_set; + + int socket; + int cli_sockets[MAX_PEERS]; + + __u32 keepalive_interval; + __u32 keepalive_timeout; + + enum ovpn_key_direction key_dir; + enum ovpn_key_slot key_slot; + int key_id; + + const char *peers_file; +}; + +static int ovpn_nl_recvmsgs(struct nl_ctx *ctx) +{ + int ret; + + ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb); + + switch (ret) { + case -NLE_INTR: + fprintf(stderr, + "netlink received interrupt due to signal - ignoring\n"); + break; + case -NLE_NOMEM: + fprintf(stderr, "netlink out of memory error\n"); + break; + case -NLE_AGAIN: + fprintf(stderr, + "netlink reports blocking read - aborting wait\n"); + break; + default: + if (ret) + fprintf(stderr, "netlink reports error (%d): %s\n", + ret, nl_geterror(-ret)); + break; + } + + return ret; +} + +static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd, + int flags) +{ + struct nl_ctx *ctx; + int err, ret; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return NULL; + + ctx->nl_sock = nl_socket_alloc(); + if (!ctx->nl_sock) { + fprintf(stderr, "cannot allocate netlink socket\n"); + goto err_free; + } + + nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192); + + ret = genl_connect(ctx->nl_sock); + if (ret) { + fprintf(stderr, "cannot connect to generic netlink: %s\n", + nl_geterror(ret)); + goto err_sock; + } + + /* enable Extended ACK for detailed error reporting */ + err = 1; + setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK, + &err, sizeof(err)); + + ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME); + if (ctx->ovpn_dco_id < 0) { + fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n", + ctx->ovpn_dco_id); + goto err_free; + } + + ctx->nl_msg = nlmsg_alloc(); + if (!ctx->nl_msg) { + fprintf(stderr, "cannot allocate netlink message\n"); + goto err_sock; + } + + ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!ctx->nl_cb) { + fprintf(stderr, "failed to allocate netlink callback\n"); + goto err_msg; + } + + nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb); + + genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0); + + if (ovpn->ifindex > 0) + NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex); + + return ctx; +nla_put_failure: +err_msg: + nlmsg_free(ctx->nl_msg); +err_sock: + nl_socket_free(ctx->nl_sock); +err_free: + free(ctx); + return NULL; +} + +static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd) +{ + return nl_ctx_alloc_flags(ovpn, cmd, 0); +} + +static void nl_ctx_free(struct nl_ctx *ctx) +{ + if (!ctx) + return; + + nl_socket_free(ctx->nl_sock); + nlmsg_free(ctx->nl_msg); + nl_cb_put(ctx->nl_cb); + free(ctx); +} + +static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused, + struct nlmsgerr *err, void *arg) +{ + struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1; + struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1]; + int len = nlh->nlmsg_len; + struct nlattr *attrs; + int *ret = arg; + int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh); + + *ret = err->error; + + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return NL_STOP; + + if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) + ack_len += err->msg.nlmsg_len - sizeof(*nlh); + + if (len <= ack_len) + return NL_STOP; + + attrs = (void *)((uint8_t *)nlh + ack_len); + len -= ack_len; + + nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL); + if (tb_msg[NLMSGERR_ATTR_MSG]) { + len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]), + nla_len(tb_msg[NLMSGERR_ATTR_MSG])); + fprintf(stderr, "kernel error: %*s\n", len, + (char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG])); + } + + if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) { + fprintf(stderr, "missing required nesting type %u\n", + nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST])); + } + + if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) { + fprintf(stderr, "missing required attribute type %u\n", + nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE])); + } + + return NL_STOP; +} + +static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused, + void *arg) +{ + int *status = arg; + + *status = 0; + return NL_SKIP; +} + +static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused, + void *arg) +{ + int *status = arg; + + *status = 0; + return NL_STOP; +} + +static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb) +{ + int status = 1; + + nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status); + nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish, + &status); + nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status); + + if (cb) + nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx); + + nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg); + + while (status == 1) + ovpn_nl_recvmsgs(ctx); + + if (status < 0) + fprintf(stderr, "failed to send netlink message: %s (%d)\n", + strerror(-status), status); + + return status; +} + +static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx) +{ + int idx_enc, idx_dec, ret = -1; + unsigned char *ckey = NULL; + __u8 *bkey = NULL; + size_t olen = 0; + long ckey_len; + FILE *fp; + + fp = fopen(file, "r"); + if (!fp) { + fprintf(stderr, "cannot open: %s\n", file); + return -1; + } + + /* get file size */ + fseek(fp, 0L, SEEK_END); + ckey_len = ftell(fp); + rewind(fp); + + /* if the file is longer, let's just read a portion */ + if (ckey_len > 256) + ckey_len = 256; + + ckey = malloc(ckey_len); + if (!ckey) + goto err; + + ret = fread(ckey, 1, ckey_len, fp); + if (ret != ckey_len) { + fprintf(stderr, + "couldn't read enough data from key file: %dbytes read\n", + ret); + goto err; + } + + olen = 0; + ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len); + if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) { + char buf[256]; + + mbedtls_strerror(ret, buf, sizeof(buf)); + fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf, + ret); + + goto err; + } + + bkey = malloc(olen); + if (!bkey) { + fprintf(stderr, "cannot allocate binary key buffer\n"); + goto err; + } + + ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len); + if (ret) { + char buf[256]; + + mbedtls_strerror(ret, buf, sizeof(buf)); + fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf, + ret); + + goto err; + } + + if (olen < 2 * KEY_LEN + NONCE_LEN) { + fprintf(stderr, + "not enough data in key file, found %zdB but needs %dB\n", + olen, 2 * KEY_LEN + NONCE_LEN); + goto err; + } + + switch (ctx->key_dir) { + case KEY_DIR_IN: + idx_enc = 0; + idx_dec = 1; + break; + case KEY_DIR_OUT: + idx_enc = 1; + idx_dec = 0; + break; + default: + goto err; + } + + memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN); + memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN); + memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN); + + ret = 0; + +err: + fclose(fp); + free(bkey); + free(ckey); + + return ret; +} + +static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx) +{ + if (strcmp(cipher, "aes") == 0) + ctx->cipher = OVPN_CIPHER_ALG_AES_GCM; + else if (strcmp(cipher, "chachapoly") == 0) + ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305; + else if (strcmp(cipher, "none") == 0) + ctx->cipher = OVPN_CIPHER_ALG_NONE; + else + return -ENOTSUP; + + return 0; +} + +static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx) +{ + int in_dir; + + in_dir = strtoll(dir, NULL, 10); + switch (in_dir) { + case KEY_DIR_IN: + case KEY_DIR_OUT: + ctx->key_dir = in_dir; + break; + default: + fprintf(stderr, + "invalid key direction provided. Can be 0 or 1 only\n"); + return -1; + } + + return 0; +} + +static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto) +{ + struct sockaddr_storage local_sock = { 0 }; + struct sockaddr_in6 *in6; + struct sockaddr_in *in; + int ret, s, sock_type; + size_t sock_len; + + if (proto == IPPROTO_UDP) + sock_type = SOCK_DGRAM; + else if (proto == IPPROTO_TCP) + sock_type = SOCK_STREAM; + else + return -EINVAL; + + s = socket(family, sock_type, 0); + if (s < 0) { + perror("cannot create socket"); + return -1; + } + + switch (family) { + case AF_INET: + in = (struct sockaddr_in *)&local_sock; + in->sin_family = family; + in->sin_port = htons(ctx->lport); + in->sin_addr.s_addr = htonl(INADDR_ANY); + sock_len = sizeof(*in); + break; + case AF_INET6: + in6 = (struct sockaddr_in6 *)&local_sock; + in6->sin6_family = family; + in6->sin6_port = htons(ctx->lport); + in6->sin6_addr = in6addr_any; + sock_len = sizeof(*in6); + break; + default: + return -1; + } + + int opt = 1; + + ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + + if (ret < 0) { + perror("setsockopt for SO_REUSEADDR"); + return ret; + } + + ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); + if (ret < 0) { + perror("setsockopt for SO_REUSEPORT"); + return ret; + } + + if (family == AF_INET6) { + opt = 0; + if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt, + sizeof(opt))) { + perror("failed to set IPV6_V6ONLY"); + return -1; + } + } + + ret = bind(s, (struct sockaddr *)&local_sock, sock_len); + if (ret < 0) { + perror("cannot bind socket"); + goto err_socket; + } + + ctx->socket = s; + ctx->sa_family = family; + return 0; + +err_socket: + close(s); + return -1; +} + +static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family) +{ + return ovpn_socket(ctx, family, IPPROTO_UDP); +} + +static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family) +{ + int ret; + + ret = ovpn_socket(ctx, family, IPPROTO_TCP); + if (ret < 0) + return ret; + + ret = listen(ctx->socket, 10); + if (ret < 0) { + perror("listen"); + close(ctx->socket); + return -1; + } + + return 0; +} + +static int ovpn_accept(struct ovpn_ctx *ctx) +{ + socklen_t socklen; + int ret; + + socklen = sizeof(ctx->remote); + ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen); + if (ret < 0) { + perror("accept"); + goto err; + } + + fprintf(stderr, "Connection received!\n"); + + switch (socklen) { + case sizeof(struct sockaddr_in): + case sizeof(struct sockaddr_in6): + break; + default: + fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n"); + close(ret); + ret = -EINVAL; + goto err; + } + + return ret; +err: + close(ctx->socket); + return ret; +} + +static int ovpn_connect(struct ovpn_ctx *ovpn) +{ + socklen_t socklen; + int s, ret; + + s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0); + if (s < 0) { + perror("cannot create socket"); + return -1; + } + + switch (ovpn->remote.in4.sin_family) { + case AF_INET: + socklen = sizeof(struct sockaddr_in); + break; + case AF_INET6: + socklen = sizeof(struct sockaddr_in6); + break; + default: + return -EOPNOTSUPP; + } + + ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen); + if (ret < 0) { + perror("connect"); + goto err; + } + + fprintf(stderr, "connected\n"); + + ovpn->socket = s; + + return 0; +err: + close(s); + return ret; +} + +static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket); + + if (!is_tcp) { + switch (ovpn->remote.in4.sin_family) { + case AF_INET: + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4, + ovpn->remote.in4.sin_addr.s_addr); + NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT, + ovpn->remote.in4.sin_port); + break; + case AF_INET6: + NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6, + sizeof(ovpn->remote.in6.sin6_addr), + &ovpn->remote.in6.sin6_addr); + NLA_PUT_U32(ctx->nl_msg, + OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID, + ovpn->remote.in6.sin6_scope_id); + NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT, + ovpn->remote.in6.sin6_port); + break; + default: + fprintf(stderr, + "Invalid family for remote socket address\n"); + goto nla_put_failure; + } + } + + if (ovpn->peer_ip_set) { + switch (ovpn->peer_ip.in4.sin_family) { + case AF_INET: + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4, + ovpn->peer_ip.in4.sin_addr.s_addr); + break; + case AF_INET6: + NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6, + sizeof(struct in6_addr), + &ovpn->peer_ip.in6.sin6_addr); + break; + default: + fprintf(stderr, "Invalid family for peer address\n"); + goto nla_put_failure; + } + } + + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_set_peer(struct ovpn_ctx *ovpn) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL, + ovpn->keepalive_interval); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT, + ovpn->keepalive_timeout); + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_del_peer(struct ovpn_ctx *ovpn) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused) +{ + struct nlattr *pattrs[OVPN_A_PEER_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + __u16 rport = 0, lport = 0; + + nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!attrs[OVPN_A_PEER]) { + fprintf(stderr, "no packet content in netlink message\n"); + return NL_SKIP; + } + + nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]), + nla_len(attrs[OVPN_A_PEER]), NULL); + + if (pattrs[OVPN_A_PEER_ID]) + fprintf(stderr, "* Peer %u\n", + nla_get_u32(pattrs[OVPN_A_PEER_ID])); + + if (pattrs[OVPN_A_PEER_SOCKET_NETNSID]) + fprintf(stderr, "\tsocket NetNS ID: %d\n", + nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID])); + + if (pattrs[OVPN_A_PEER_VPN_IPV4]) { + char buf[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]), + buf, sizeof(buf)); + fprintf(stderr, "\tVPN IPv4: %s\n", buf); + } + + if (pattrs[OVPN_A_PEER_VPN_IPV6]) { + char buf[INET6_ADDRSTRLEN]; + + inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]), + buf, sizeof(buf)); + fprintf(stderr, "\tVPN IPv6: %s\n", buf); + } + + if (pattrs[OVPN_A_PEER_LOCAL_PORT]) + lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT])); + + if (pattrs[OVPN_A_PEER_REMOTE_PORT]) + rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT])); + + if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) { + void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6]; + char buf[INET6_ADDRSTRLEN]; + int scope_id = -1; + + if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) { + void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]; + + scope_id = nla_get_u32(p); + } + + inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport, + scope_id); + + if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) { + void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6]; + + inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport); + } + } + + if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) { + void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4]; + char buf[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport); + + if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) { + void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4]; + + inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf)); + fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport); + } + } + + if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) { + void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]; + + fprintf(stderr, "\tKeepalive interval: %u sec\n", + nla_get_u32(p)); + } + + if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]) + fprintf(stderr, "\tKeepalive timeout: %u sec\n", + nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])); + + if (pattrs[OVPN_A_PEER_VPN_RX_BYTES]) + fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES])); + + if (pattrs[OVPN_A_PEER_VPN_TX_BYTES]) + fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES])); + + if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS]) + fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS])); + + if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS]) + fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS])); + + if (pattrs[OVPN_A_PEER_LINK_RX_BYTES]) + fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES])); + + if (pattrs[OVPN_A_PEER_LINK_TX_BYTES]) + fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES])); + + if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS]) + fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS])); + + if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS]) + fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS])); + + return NL_SKIP; +} + +static int ovpn_get_peer(struct ovpn_ctx *ovpn) +{ + int flags = 0, ret = -1; + struct nlattr *attr; + struct nl_ctx *ctx; + + if (ovpn->peer_id == PEER_ID_UNDEF) + flags = NLM_F_DUMP; + + ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags); + if (!ctx) + return -ENOMEM; + + if (ovpn->peer_id != PEER_ID_UNDEF) { + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, attr); + } + + ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_new_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf, *key_dir; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher); + + key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce); + nla_nest_end(ctx->nl_msg, key_dir); + + key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce); + nla_nest_end(ctx->nl_msg, key_dir); + + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_del_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused) +{ + struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + + nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!attrs[OVPN_A_KEYCONF]) { + fprintf(stderr, "no packet content in netlink message\n"); + return NL_SKIP; + } + + nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]), + nla_len(attrs[OVPN_A_KEYCONF]), NULL); + + if (kattrs[OVPN_A_KEYCONF_PEER_ID]) + fprintf(stderr, "* Peer %u\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID])); + if (kattrs[OVPN_A_KEYCONF_SLOT]) { + fprintf(stderr, "\t- Slot: "); + switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) { + case OVPN_KEY_SLOT_PRIMARY: + fprintf(stderr, "primary\n"); + break; + case OVPN_KEY_SLOT_SECONDARY: + fprintf(stderr, "secondary\n"); + break; + default: + fprintf(stderr, "invalid (%u)\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])); + break; + } + } + if (kattrs[OVPN_A_KEYCONF_KEY_ID]) + fprintf(stderr, "\t- Key ID: %u\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID])); + if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) { + fprintf(stderr, "\t- Cipher: "); + switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) { + case OVPN_CIPHER_ALG_NONE: + fprintf(stderr, "none\n"); + break; + case OVPN_CIPHER_ALG_AES_GCM: + fprintf(stderr, "aes-gcm\n"); + break; + case OVPN_CIPHER_ALG_CHACHA20_POLY1305: + fprintf(stderr, "chacha20poly1305\n"); + break; + default: + fprintf(stderr, "invalid (%u)\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])); + break; + } + } + + return NL_SKIP; +} + +static int ovpn_get_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, ovpn_handle_key); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_swap_keys(struct ovpn_ctx *ovpn) +{ + struct nl_ctx *ctx; + struct nlattr *kc; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP); + if (!ctx) + return -ENOMEM; + + kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, kc); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +/* Helper function used to easily add attributes to a rtnl message */ +static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type, + const void *data, int alen) +{ + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen) { + fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n", + __func__, maxlen); + return -EMSGSIZE; + } + + rta = nlmsg_tail(n); + rta->rta_type = type; + rta->rta_len = len; + + if (!data) + memset(RTA_DATA(rta), 0, alen); + else + memcpy(RTA_DATA(rta), data, alen); + + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + + return 0; +} + +static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size, + int attr) +{ + struct rtattr *nest = nlmsg_tail(msg); + + if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0) + return NULL; + + return nest; +} + +static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest) +{ + nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest; +} + +#define RT_SNDBUF_SIZE (1024 * 2) +#define RT_RCVBUF_SIZE (1024 * 4) + +/* Open RTNL socket */ +static int ovpn_rt_socket(void) +{ + int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd < 0) { + fprintf(stderr, "%s: cannot open netlink socket\n", __func__); + return fd; + } + + if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, + sizeof(sndbuf)) < 0) { + fprintf(stderr, "%s: SO_SNDBUF\n", __func__); + close(fd); + return -1; + } + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, + sizeof(rcvbuf)) < 0) { + fprintf(stderr, "%s: SO_RCVBUF\n", __func__); + close(fd); + return -1; + } + + return fd; +} + +/* Bind socket to Netlink subsystem */ +static int ovpn_rt_bind(int fd, uint32_t groups) +{ + struct sockaddr_nl local = { 0 }; + socklen_t addr_len; + + local.nl_family = AF_NETLINK; + local.nl_groups = groups; + + if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) { + fprintf(stderr, "%s: cannot bind netlink socket: %d\n", + __func__, errno); + return -errno; + } + + addr_len = sizeof(local); + if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) { + fprintf(stderr, "%s: cannot getsockname: %d\n", __func__, + errno); + return -errno; + } + + if (addr_len != sizeof(local)) { + fprintf(stderr, "%s: wrong address length %d\n", __func__, + addr_len); + return -EINVAL; + } + + if (local.nl_family != AF_NETLINK) { + fprintf(stderr, "%s: wrong address family %d\n", __func__, + local.nl_family); + return -EINVAL; + } + + return 0; +} + +typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg); + +/* Send Netlink message and run callback on reply (if specified) */ +static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer, + unsigned int groups, ovpn_parse_reply_cb cb, + void *arg_cb) +{ + int len, rem_len, fd, ret, rcv_len; + struct sockaddr_nl nladdr = { 0 }; + struct nlmsgerr *err; + struct nlmsghdr *h; + char buf[1024 * 16]; + struct iovec iov = { + .iov_base = payload, + .iov_len = payload->nlmsg_len, + }; + struct msghdr nlmsg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + nladdr.nl_family = AF_NETLINK; + nladdr.nl_pid = peer; + nladdr.nl_groups = groups; + + payload->nlmsg_seq = time(NULL); + + /* no need to send reply */ + if (!cb) + payload->nlmsg_flags |= NLM_F_ACK; + + fd = ovpn_rt_socket(); + if (fd < 0) { + fprintf(stderr, "%s: can't open rtnl socket\n", __func__); + return -errno; + } + + ret = ovpn_rt_bind(fd, 0); + if (ret < 0) { + fprintf(stderr, "%s: can't bind rtnl socket\n", __func__); + ret = -errno; + goto out; + } + + ret = sendmsg(fd, &nlmsg, 0); + if (ret < 0) { + fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__); + ret = -errno; + goto out; + } + + /* prepare buffer to store RTNL replies */ + memset(buf, 0, sizeof(buf)); + iov.iov_base = buf; + + while (1) { + /* + * iov_len is modified by recvmsg(), therefore has to be initialized before + * using it again + */ + iov.iov_len = sizeof(buf); + rcv_len = recvmsg(fd, &nlmsg, 0); + if (rcv_len < 0) { + if (errno == EINTR || errno == EAGAIN) { + fprintf(stderr, "%s: interrupted call\n", + __func__); + continue; + } + fprintf(stderr, "%s: rtnl: error on recvmsg()\n", + __func__); + ret = -errno; + goto out; + } + + if (rcv_len == 0) { + fprintf(stderr, + "%s: rtnl: socket reached unexpected EOF\n", + __func__); + ret = -EIO; + goto out; + } + + if (nlmsg.msg_namelen != sizeof(nladdr)) { + fprintf(stderr, + "%s: sender address length: %u (expected %zu)\n", + __func__, nlmsg.msg_namelen, sizeof(nladdr)); + ret = -EIO; + goto out; + } + + h = (struct nlmsghdr *)buf; + while (rcv_len >= (int)sizeof(*h)) { + len = h->nlmsg_len; + rem_len = len - sizeof(*h); + + if (rem_len < 0 || len > rcv_len) { + if (nlmsg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "%s: truncated message\n", + __func__); + ret = -EIO; + goto out; + } + fprintf(stderr, "%s: malformed message: len=%d\n", + __func__, len); + ret = -EIO; + goto out; + } + + if (h->nlmsg_type == NLMSG_DONE) { + ret = 0; + goto out; + } + + if (h->nlmsg_type == NLMSG_ERROR) { + err = (struct nlmsgerr *)NLMSG_DATA(h); + if (rem_len < (int)sizeof(struct nlmsgerr)) { + fprintf(stderr, "%s: ERROR truncated\n", + __func__); + ret = -EIO; + goto out; + } + + if (err->error) { + fprintf(stderr, "%s: (%d) %s\n", + __func__, err->error, + strerror(-err->error)); + ret = err->error; + goto out; + } + + ret = 0; + if (cb) { + int r = cb(h, arg_cb); + + if (r <= 0) + ret = r; + } + goto out; + } + + if (cb) { + int r = cb(h, arg_cb); + + if (r <= 0) { + ret = r; + goto out; + } + } else { + fprintf(stderr, "%s: RTNL: unexpected reply\n", + __func__); + } + + rcv_len -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((uint8_t *)h + + NLMSG_ALIGN(len)); + } + + if (nlmsg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "%s: message truncated\n", __func__); + continue; + } + + if (rcv_len) { + fprintf(stderr, "%s: rtnl: %d not parsed bytes\n", + __func__, rcv_len); + ret = -1; + goto out; + } + } +out: + close(fd); + + return ret; +} + +struct ovpn_link_req { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[256]; +}; + +static int ovpn_new_iface(struct ovpn_ctx *ovpn) +{ + struct rtattr *linkinfo, *data; + struct ovpn_link_req req = { 0 }; + int ret = -1; + + fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname, + ovpn->mode); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.n.nlmsg_type = RTM_NEWLINK; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname, + strlen(ovpn->ifname) + 1) < 0) + goto err; + + linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO); + if (!linkinfo) + goto err; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME, + strlen(OVPN_FAMILY_NAME) + 1) < 0) + goto err; + + if (ovpn->mode_set) { + data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA); + if (!data) + goto err; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE, + &ovpn->mode, sizeof(uint8_t)) < 0) + goto err; + + ovpn_nest_end(&req.n, data); + } + + ovpn_nest_end(&req.n, linkinfo); + + req.i.ifi_family = AF_PACKET; + + ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL); +err: + return ret; +} + +static int ovpn_del_iface(struct ovpn_ctx *ovpn) +{ + struct ovpn_link_req req = { 0 }; + + fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname, + ovpn->ifindex); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_DELLINK; + + req.i.ifi_family = AF_PACKET; + req.i.ifi_index = ovpn->ifindex; + + return ovpn_rt_send(&req.n, 0, 0, NULL, NULL); +} + +static int nl_seq_check(struct nl_msg (*msg)__always_unused, + void (*arg)__always_unused) +{ + return NL_OK; +} + +struct mcast_handler_args { + const char *group; + int id; +}; + +static int mcast_family_handler(struct nl_msg *msg, void *arg) +{ + struct mcast_handler_args *grp = arg; + struct nlattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *mcgrp; + int rem_mcgrp; + + nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!tb[CTRL_ATTR_MCAST_GROUPS]) + return NL_SKIP; + + nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) { + struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1]; + + nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX, + nla_data(mcgrp), nla_len(mcgrp), NULL); + + if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] || + !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]) + continue; + if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]), + grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]))) + continue; + grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]); + break; + } + + return NL_SKIP; +} + +static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused, + struct nlmsgerr *err, void *arg) +{ + int *ret = arg; + + *ret = err->error; + return NL_STOP; +} + +static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg) +{ + int *ret = arg; + + *ret = 0; + return NL_STOP; +} + +static int ovpn_handle_msg(struct nl_msg *msg, void *arg) +{ + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + struct nlmsghdr *nlh = nlmsg_hdr(msg); + char ifname[IF_NAMESIZE]; + int *ret = arg; + __u32 ifindex; + + fprintf(stderr, "received message from ovpn-dco\n"); + + *ret = -1; + + if (!genlmsg_valid_hdr(nlh, 0)) { + fprintf(stderr, "invalid header\n"); + return NL_STOP; + } + + if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL)) { + fprintf(stderr, "received bogus data from ovpn-dco\n"); + return NL_STOP; + } + + if (!attrs[OVPN_A_IFINDEX]) { + fprintf(stderr, "no ifindex in this message\n"); + return NL_STOP; + } + + ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]); + if (!if_indextoname(ifindex, ifname)) { + fprintf(stderr, "cannot resolve ifname for ifindex: %u\n", + ifindex); + return NL_STOP; + } + + switch (gnlh->cmd) { + case OVPN_CMD_PEER_DEL_NTF: + fprintf(stdout, "received CMD_PEER_DEL_NTF\n"); + break; + case OVPN_CMD_KEY_SWAP_NTF: + fprintf(stdout, "received CMD_KEY_SWAP_NTF\n"); + break; + default: + fprintf(stderr, "received unknown command: %d\n", gnlh->cmd); + return NL_STOP; + } + + *ret = 0; + return NL_OK; +} + +static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family, + const char *group) +{ + struct nl_msg *msg; + struct nl_cb *cb; + int ret, ctrlid; + struct mcast_handler_args grp = { + .group = group, + .id = -ENOENT, + }; + + msg = nlmsg_alloc(); + if (!msg) + return -ENOMEM; + + cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!cb) { + ret = -ENOMEM; + goto out_fail_cb; + } + + ctrlid = genl_ctrl_resolve(sock, "nlctrl"); + + genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + + ret = -ENOBUFS; + NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family); + + ret = nl_send_auto_complete(sock, msg); + if (ret < 0) + goto nla_put_failure; + + ret = 1; + + nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret); + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp); + + while (ret > 0) + nl_recvmsgs(sock, cb); + + if (ret == 0) + ret = grp.id; + nla_put_failure: + nl_cb_put(cb); + out_fail_cb: + nlmsg_free(msg); + return ret; +} + +static int ovpn_listen_mcast(void) +{ + struct nl_sock *sock; + struct nl_cb *cb; + int mcid, ret; + + sock = nl_socket_alloc(); + if (!sock) { + fprintf(stderr, "cannot allocate netlink socket\n"); + goto err_free; + } + + nl_socket_set_buffer_size(sock, 8192, 8192); + + ret = genl_connect(sock); + if (ret < 0) { + fprintf(stderr, "cannot connect to generic netlink: %s\n", + nl_geterror(ret)); + goto err_free; + } + + mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS); + if (mcid < 0) { + fprintf(stderr, "cannot get mcast group: %s\n", + nl_geterror(mcid)); + goto err_free; + } + + ret = nl_socket_add_membership(sock, mcid); + if (ret) { + fprintf(stderr, "failed to join mcast group: %d\n", ret); + goto err_free; + } + + ret = 1; + cb = nl_cb_alloc(NL_CB_DEFAULT); + nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret); + nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret); + + while (ret == 1) { + int err = nl_recvmsgs(sock, cb); + + if (err < 0) { + fprintf(stderr, + "cannot receive netlink message: (%d) %s\n", + err, nl_geterror(-err)); + ret = -1; + break; + } + } + + nl_cb_put(cb); +err_free: + nl_socket_free(sock); + return ret; +} + +static void usage(const char *cmd) +{ + fprintf(stderr, + "Usage %s <command> <iface> [arguments..]\n", + cmd); + fprintf(stderr, "where <command> can be one of the following\n\n"); + + fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tmode:\n"); + fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n"); + fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n"); + + fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + + fprintf(stderr, + "* listen <iface> <lport> <peers_file> [ipv6]: listen for incoming peer TCP connections\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: TCP port to listen to\n"); + fprintf(stderr, + "\tpeers_file: file containing one peer per line: Line format:\n"); + fprintf(stderr, "\t\t<peer_id> <vpnaddr>\n"); + fprintf(stderr, + "\tipv6: whether the socket should listen to the IPv6 wildcard address\n"); + + fprintf(stderr, + "* connect <iface> <peer_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the connecting peer\n"); + fprintf(stderr, "\traddr: peer IP address to connect to\n"); + fprintf(stderr, "\trport: peer TCP port to connect to\n"); + fprintf(stderr, + "\tkey_file: file containing the symmetric key for encryption\n"); + + fprintf(stderr, + "* new_peer <iface> <peer_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: local UDP port to bind to\n"); + fprintf(stderr, + "\tpeer_id: peer ID to be used in data packets to/from this peer\n"); + fprintf(stderr, "\traddr: peer IP address\n"); + fprintf(stderr, "\trport: peer UDP port\n"); + fprintf(stderr, "\tvpnaddr: peer VPN IP\n"); + + fprintf(stderr, + "* new_multi_peer <iface> <lport> <peers_file>: add multiple peers as listed in the file\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: local UDP port to bind to\n"); + fprintf(stderr, + "\tpeers_file: text file containing one peer per line. Line format:\n"); + fprintf(stderr, "\t\t<peer_id> <raddr> <rport> <vpnaddr>\n"); + + fprintf(stderr, + "* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + fprintf(stderr, + "\tkeepalive_interval: interval for sending ping messages\n"); + fprintf(stderr, + "\tkeepalive_timeout: time after which a peer is timed out\n"); + + fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n"); + + fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, + "\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n"); + + fprintf(stderr, + "* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, + "\tpeer_id: peer ID of the peer to configure the key for\n"); + fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n"); + fprintf(stderr, "\tkey_id: an ID from 0 to 7\n"); + fprintf(stderr, + "\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n"); + fprintf(stderr, + "\tkey_dir: key direction, must 0 on one host and 1 on the other\n"); + fprintf(stderr, "\tkey_file: file containing the pre-shared key\n"); + + fprintf(stderr, + "* del_key <iface> <peer_id> [slot]: erase existing data channel key\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n"); + + fprintf(stderr, + "* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n"); + fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n"); + + fprintf(stderr, + "* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + + fprintf(stderr, + "* listen_mcast: listen to ovpn netlink multicast messages\n"); +} + +static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host, + const char *service, const char *vpnip) +{ + int ret; + struct addrinfo *result; + struct addrinfo hints = { + .ai_family = ovpn->sa_family, + .ai_socktype = SOCK_DGRAM, + .ai_protocol = IPPROTO_UDP + }; + + if (host) { + ret = getaddrinfo(host, service, &hints, &result); + if (ret) { + fprintf(stderr, "getaddrinfo on remote error: %s\n", + gai_strerror(ret)); + return -1; + } + + if (!(result->ai_family == AF_INET && + result->ai_addrlen == sizeof(struct sockaddr_in)) && + !(result->ai_family == AF_INET6 && + result->ai_addrlen == sizeof(struct sockaddr_in6))) { + ret = -EINVAL; + goto out; + } + + memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen); + } + + if (vpnip) { + ret = getaddrinfo(vpnip, NULL, &hints, &result); + if (ret) { + fprintf(stderr, "getaddrinfo on vpnip error: %s\n", + gai_strerror(ret)); + return -1; + } + + if (!(result->ai_family == AF_INET && + result->ai_addrlen == sizeof(struct sockaddr_in)) && + !(result->ai_family == AF_INET6 && + result->ai_addrlen == sizeof(struct sockaddr_in6))) { + ret = -EINVAL; + goto out; + } + + memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen); + ovpn->sa_family = result->ai_family; + + ovpn->peer_ip_set = true; + } + + ret = 0; +out: + freeaddrinfo(result); + return ret; +} + +static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id, + const char *raddr, const char *rport, + const char *vpnip) +{ + ovpn->peer_id = strtoul(peer_id, NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + return ovpn_parse_remote(ovpn, raddr, rport, vpnip); +} + +static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn) +{ + int slot = strtoul(arg, NULL, 10); + + if (errno == ERANGE || slot < 1 || slot > 2) { + fprintf(stderr, "key slot out of range\n"); + return -1; + } + + switch (slot) { + case 1: + ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY; + break; + case 2: + ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY; + break; + } + + return 0; +} + +static int ovpn_send_tcp_data(int socket) +{ + uint16_t len = htons(1000); + uint8_t buf[1002]; + int ret; + + memcpy(buf, &len, sizeof(len)); + memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len)); + + ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL); + + fprintf(stdout, "Sent %u bytes over TCP socket\n", ret); + + return ret > 0 ? 0 : ret; +} + +static int ovpn_recv_tcp_data(int socket) +{ + uint8_t buf[1002]; + uint16_t len; + int ret; + + ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL); + + if (ret < 2) { + fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret); + return ret; + } + + memcpy(&len, buf, sizeof(len)); + len = ntohs(len); + + fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n", + ret, len); + + return 0; +} + +static enum ovpn_cmd ovpn_parse_cmd(const char *cmd) +{ + if (!strcmp(cmd, "new_iface")) + return CMD_NEW_IFACE; + + if (!strcmp(cmd, "del_iface")) + return CMD_DEL_IFACE; + + if (!strcmp(cmd, "listen")) + return CMD_LISTEN; + + if (!strcmp(cmd, "connect")) + return CMD_CONNECT; + + if (!strcmp(cmd, "new_peer")) + return CMD_NEW_PEER; + + if (!strcmp(cmd, "new_multi_peer")) + return CMD_NEW_MULTI_PEER; + + if (!strcmp(cmd, "set_peer")) + return CMD_SET_PEER; + + if (!strcmp(cmd, "del_peer")) + return CMD_DEL_PEER; + + if (!strcmp(cmd, "get_peer")) + return CMD_GET_PEER; + + if (!strcmp(cmd, "new_key")) + return CMD_NEW_KEY; + + if (!strcmp(cmd, "del_key")) + return CMD_DEL_KEY; + + if (!strcmp(cmd, "get_key")) + return CMD_GET_KEY; + + if (!strcmp(cmd, "swap_keys")) + return CMD_SWAP_KEYS; + + if (!strcmp(cmd, "listen_mcast")) + return CMD_LISTEN_MCAST; + + return CMD_INVALID; +} + +/* Send process to background and waits for signal. + * + * This helper is called at the end of commands + * creating sockets, so that the latter stay alive + * along with the process that created them. + * + * A signal is expected to be delivered in order to + * terminate the waiting processes + */ +static void ovpn_waitbg(void) +{ + daemon(1, 1); + pause(); +} + +static int ovpn_run_cmd(struct ovpn_ctx *ovpn) +{ + char peer_id[10], vpnip[INET6_ADDRSTRLEN], laddr[128], lport[10]; + char raddr[128], rport[10]; + int n, ret; + FILE *fp; + + switch (ovpn->cmd) { + case CMD_NEW_IFACE: + ret = ovpn_new_iface(ovpn); + break; + case CMD_DEL_IFACE: + ret = ovpn_del_iface(ovpn); + break; + case CMD_LISTEN: + ret = ovpn_listen(ovpn, ovpn->sa_family); + if (ret < 0) { + fprintf(stderr, "cannot listen on TCP socket\n"); + return ret; + } + + fp = fopen(ovpn->peers_file, "r"); + if (!fp) { + fprintf(stderr, "cannot open file: %s\n", + ovpn->peers_file); + return -1; + } + + int num_peers = 0; + + while ((n = fscanf(fp, "%s %s\n", peer_id, vpnip)) == 2) { + struct ovpn_ctx peer_ctx = { 0 }; + + if (num_peers == MAX_PEERS) { + fprintf(stderr, "max peers reached!\n"); + return -E2BIG; + } + + peer_ctx.ifindex = ovpn->ifindex; + peer_ctx.sa_family = ovpn->sa_family; + + peer_ctx.socket = ovpn_accept(ovpn); + if (peer_ctx.socket < 0) { + fprintf(stderr, "cannot accept connection!\n"); + return -1; + } + + /* store peer sockets to test TCP I/O */ + ovpn->cli_sockets[num_peers] = peer_ctx.socket; + + ret = ovpn_parse_new_peer(&peer_ctx, peer_id, NULL, + NULL, vpnip); + if (ret < 0) { + fprintf(stderr, "error while parsing line\n"); + return -1; + } + + ret = ovpn_new_peer(&peer_ctx, true); + if (ret < 0) { + fprintf(stderr, + "cannot add peer to VPN: %s %s\n", + peer_id, vpnip); + return ret; + } + num_peers++; + } + + for (int i = 0; i < num_peers; i++) { + ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]); + if (ret < 0) + break; + } + ovpn_waitbg(); + break; + case CMD_CONNECT: + ret = ovpn_connect(ovpn); + if (ret < 0) { + fprintf(stderr, "cannot connect TCP socket\n"); + return ret; + } + + ret = ovpn_new_peer(ovpn, true); + if (ret < 0) { + fprintf(stderr, "cannot add peer to VPN\n"); + close(ovpn->socket); + return ret; + } + + if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) { + ret = ovpn_new_key(ovpn); + if (ret < 0) { + fprintf(stderr, "cannot set key\n"); + return ret; + } + } + + ret = ovpn_send_tcp_data(ovpn->socket); + ovpn_waitbg(); + break; + case CMD_NEW_PEER: + ret = ovpn_udp_socket(ovpn, AF_INET6); + if (ret < 0) + return ret; + + ret = ovpn_new_peer(ovpn, false); + ovpn_waitbg(); + break; + case CMD_NEW_MULTI_PEER: + ret = ovpn_udp_socket(ovpn, AF_INET6); + if (ret < 0) + return ret; + + fp = fopen(ovpn->peers_file, "r"); + if (!fp) { + fprintf(stderr, "cannot open file: %s\n", + ovpn->peers_file); + return -1; + } + + while ((n = fscanf(fp, "%s %s %s %s %s %s\n", peer_id, laddr, + lport, raddr, rport, vpnip)) == 6) { + struct ovpn_ctx peer_ctx = { 0 }; + + peer_ctx.ifindex = ovpn->ifindex; + peer_ctx.socket = ovpn->socket; + peer_ctx.sa_family = AF_UNSPEC; + + ret = ovpn_parse_new_peer(&peer_ctx, peer_id, raddr, + rport, vpnip); + if (ret < 0) { + fprintf(stderr, "error while parsing line\n"); + return -1; + } + + ret = ovpn_new_peer(&peer_ctx, false); + if (ret < 0) { + fprintf(stderr, + "cannot add peer to VPN: %s %s %s %s\n", + peer_id, raddr, rport, vpnip); + return ret; + } + } + ovpn_waitbg(); + break; + case CMD_SET_PEER: + ret = ovpn_set_peer(ovpn); + break; + case CMD_DEL_PEER: + ret = ovpn_del_peer(ovpn); + break; + case CMD_GET_PEER: + if (ovpn->peer_id == PEER_ID_UNDEF) + fprintf(stderr, "List of peers connected to: %s\n", + ovpn->ifname); + + ret = ovpn_get_peer(ovpn); + break; + case CMD_NEW_KEY: + ret = ovpn_new_key(ovpn); + break; + case CMD_DEL_KEY: + ret = ovpn_del_key(ovpn); + break; + case CMD_GET_KEY: + ret = ovpn_get_key(ovpn); + break; + case CMD_SWAP_KEYS: + ret = ovpn_swap_keys(ovpn); + break; + case CMD_LISTEN_MCAST: + ret = ovpn_listen_mcast(); + break; + case CMD_INVALID: + break; + } + + return ret; +} + +static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[]) +{ + int ret; + + /* no args required for LISTEN_MCAST */ + if (ovpn->cmd == CMD_LISTEN_MCAST) + return 0; + + /* all commands need an ifname */ + if (argc < 3) + return -EINVAL; + + strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1); + ovpn->ifname[IFNAMSIZ - 1] = '\0'; + + /* all commands, except NEW_IFNAME, needs an ifindex */ + if (ovpn->cmd != CMD_NEW_IFACE) { + ovpn->ifindex = if_nametoindex(ovpn->ifname); + if (!ovpn->ifindex) { + fprintf(stderr, "cannot find interface: %s\n", + strerror(errno)); + return -1; + } + } + + switch (ovpn->cmd) { + case CMD_NEW_IFACE: + if (argc < 4) + break; + + if (!strcmp(argv[3], "P2P")) { + ovpn->mode = OVPN_MODE_P2P; + } else if (!strcmp(argv[3], "MP")) { + ovpn->mode = OVPN_MODE_MP; + } else { + fprintf(stderr, "Cannot parse iface mode: %s\n", + argv[3]); + return -1; + } + ovpn->mode_set = true; + break; + case CMD_DEL_IFACE: + break; + case CMD_LISTEN: + if (argc < 5) + return -EINVAL; + + ovpn->lport = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + ovpn->peers_file = argv[4]; + + if (argc > 5 && !strcmp(argv[5], "ipv6")) + ovpn->sa_family = AF_INET6; + break; + case CMD_CONNECT: + if (argc < 6) + return -EINVAL; + + ovpn->sa_family = AF_INET; + + ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5], + NULL); + if (ret < 0) { + fprintf(stderr, "Cannot parse remote peer data\n"); + return -1; + } + + if (argc > 6) { + ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY; + ovpn->key_id = 0; + ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM; + ovpn->key_dir = KEY_DIR_OUT; + + ret = ovpn_parse_key(argv[6], ovpn); + if (ret) + return -1; + } + break; + case CMD_NEW_PEER: + if (argc < 7) + return -EINVAL; + + ovpn->lport = strtoul(argv[4], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + const char *vpnip = (argc > 7) ? argv[7] : NULL; + + ret = ovpn_parse_new_peer(ovpn, argv[3], argv[5], argv[6], + vpnip); + if (ret < 0) + return -1; + break; + case CMD_NEW_MULTI_PEER: + if (argc < 5) + return -EINVAL; + + ovpn->lport = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + ovpn->peers_file = argv[4]; + break; + case CMD_SET_PEER: + if (argc < 6) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ovpn->keepalive_interval = strtoul(argv[4], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, + "keepalive interval value out of range\n"); + return -1; + } + + ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, + "keepalive interval value out of range\n"); + return -1; + } + break; + case CMD_DEL_PEER: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + break; + case CMD_GET_PEER: + ovpn->peer_id = PEER_ID_UNDEF; + if (argc > 3) { + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + } + break; + case CMD_NEW_KEY: + if (argc < 9) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return -1; + + ovpn->key_id = strtoul(argv[5], NULL, 10); + if (errno == ERANGE || ovpn->key_id > 2) { + fprintf(stderr, "key ID out of range\n"); + return -1; + } + + ret = ovpn_parse_cipher(argv[6], ovpn); + if (ret < 0) + return -1; + + ret = ovpn_parse_key_direction(argv[7], ovpn); + if (ret < 0) + return -1; + + ret = ovpn_parse_key(argv[8], ovpn); + if (ret) + return -1; + break; + case CMD_DEL_KEY: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return ret; + break; + case CMD_GET_KEY: + if (argc < 5) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return ret; + break; + case CMD_SWAP_KEYS: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + break; + case CMD_LISTEN_MCAST: + break; + case CMD_INVALID: + break; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct ovpn_ctx ovpn; + int ret; + + if (argc < 2) { + usage(argv[0]); + return -1; + } + + memset(&ovpn, 0, sizeof(ovpn)); + ovpn.sa_family = AF_UNSPEC; + ovpn.cipher = OVPN_CIPHER_ALG_NONE; + + ovpn.cmd = ovpn_parse_cmd(argv[1]); + if (ovpn.cmd == CMD_INVALID) { + fprintf(stderr, "Error: unknown command.\n\n"); + usage(argv[0]); + return -1; + } + + ret = ovpn_parse_cmd_args(&ovpn, argc, argv); + if (ret < 0) { + fprintf(stderr, "Error: invalid arguments.\n\n"); + if (ret == -EINVAL) + usage(argv[0]); + return ret; + } + + ret = ovpn_run_cmd(&ovpn); + if (ret) + fprintf(stderr, "Cannot execute command: %s (%d)\n", + strerror(-ret), ret); + + return ret; +} diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt new file mode 100644 index 000000000000..d753eebe8716 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt @@ -0,0 +1,5 @@ +1 5.5.5.2 +2 5.5.5.3 +3 5.5.5.4 +4 5.5.5.5 +5 5.5.5.6 diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh new file mode 100755 index 000000000000..32504079a2b8 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +ALG="chachapoly" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh new file mode 100755 index 000000000000..093d44772ffd --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +PROTO="TCP" + +source test-close-socket.sh diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh new file mode 100755 index 000000000000..5e48a8b67928 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +#set -x +set -e + +source ./common.sh + +cleanup + +modprobe -q ovpn || true + +for p in $(seq 0 ${NUM_PEERS}); do + create_ns ${p} +done + +for p in $(seq 0 ${NUM_PEERS}); do + setup_ns ${p} 5.5.5.$((${p} + 1))/24 +done + +for p in $(seq 0 ${NUM_PEERS}); do + add_peer ${p} +done + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120 +done + +sleep 1 + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) +done + +ip netns exec peer0 iperf3 -1 -s & +sleep 1 +ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 + +cleanup + +modprobe -r ovpn || true diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh new file mode 100755 index 000000000000..ba5d725e18b0 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-float.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +FLOAT="1" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh new file mode 100755 index 000000000000..ba3f1f315a34 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-tcp.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +PROTO="TCP" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh new file mode 100755 index 000000000000..e8acdc303307 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +#set -x +set -e + +source ./common.sh + +cleanup + +modprobe -q ovpn || true + +for p in $(seq 0 ${NUM_PEERS}); do + create_ns ${p} +done + +for p in $(seq 0 ${NUM_PEERS}); do + setup_ns ${p} 5.5.5.$((${p} + 1))/24 ${MTU} +done + +for p in $(seq 0 ${NUM_PEERS}); do + add_peer ${p} +done + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120 +done + +sleep 1 + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) + ip netns exec peer0 ping -qfc 500 -s 3000 -w 3 5.5.5.$((${p} + 1)) +done + +# ping LAN behind client 1 +ip netns exec peer0 ping -qfc 500 -w 3 ${LAN_IP} + +if [ "$FLOAT" == "1" ]; then + # make clients float.. + for p in $(seq 1 ${NUM_PEERS}); do + ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p} + ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p} + done + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1 + done +fi + +ip netns exec peer0 iperf3 -1 -s & +sleep 1 +ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 + +echo "Adding secondary key and then swap:" +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 data64.key + ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} ${p} 2 1 ${ALG} 1 data64.key + ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} ${p} +done + +sleep 1 + +echo "Querying all peers:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 +ip netns exec peer1 ${OVPN_CLI} get_peer tun1 + +echo "Querying peer 1:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1 + +echo "Querying non-existent peer 10:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 10 || true + +echo "Deleting peer 1:" +ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1 +ip netns exec peer1 ${OVPN_CLI} del_peer tun1 1 + +echo "Querying keys:" +for p in $(seq 2 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 1 + ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 2 +done + +echo "Deleting peer while sending traffic:" +(ip netns exec peer2 ping -qf -w 4 5.5.5.1)& +sleep 2 +ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2 +# following command fails in TCP mode +# (both ends get conn reset when one peer disconnects) +ip netns exec peer2 ${OVPN_CLI} del_peer tun2 2 || true + +echo "Deleting keys:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 1 + ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 2 +done + +echo "Setting timeout to 3s MP:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 0 0 +done +# wait for peers to timeout +sleep 5 + +echo "Setting timeout to 3s P2P:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 3 3 +done +sleep 5 + +cleanup + +modprobe -r ovpn || true diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt new file mode 100644 index 000000000000..e9773ddf875c --- /dev/null +++ b/tools/testing/selftests/net/ovpn/udp_peers.txt @@ -0,0 +1,6 @@ +1 10.10.1.1 1 10.10.1.2 1 5.5.5.2 +2 10.10.2.1 1 10.10.2.2 1 5.5.5.3 +3 10.10.3.1 1 10.10.3.2 1 5.5.5.4 +4 fd00:0:0:4::1 1 fd00:0:0:4::2 1 5.5.5.5 +5 fd00:0:0:5::1 1 fd00:0:0:5::2 1 5.5.5.6 +6 fd00:0:0:6::1 1 fd00:0:0:6::2 1 5.5.5.7 diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile new file mode 100644 index 000000000000..31cfb666ba8b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_INCLUDES := ksft_runner.sh \ + defaults.sh \ + set_sysctls.py \ + ../../kselftest/ktap_helpers.sh + +TEST_PROGS := $(wildcard *.pkt) + +include ../../lib.mk diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config new file mode 100644 index 000000000000..0237ed98f3c0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/config @@ -0,0 +1,11 @@ +CONFIG_IPV6=y +CONFIG_HZ_1000=y +CONFIG_HZ=1000 +CONFIG_NET_NS=y +CONFIG_NET_SCH_FIFO=y +CONFIG_NET_SCH_FQ=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYN_COOKIES=y +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_MD5SIG=y +CONFIG_TUN=y diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh new file mode 100755 index 000000000000..1095a7b22f44 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/defaults.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Set standard production config values that relate to TCP behavior. + +# Flush old cached data (fastopen cookies). +ip tcp_metrics flush all > /dev/null 2>&1 + +# TCP min, default, and max receive and send buffer sizes. +sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))" +sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304" + +# TCP timestamps. +sysctl -q net.ipv4.tcp_timestamps=1 + +# TCP SYN(ACK) retry thresholds +sysctl -q net.ipv4.tcp_syn_retries=5 +sysctl -q net.ipv4.tcp_synack_retries=5 + +# TCP Forward RTO-Recovery, RFC 5682. +sysctl -q net.ipv4.tcp_frto=2 + +# TCP Selective Acknowledgements (SACK) +sysctl -q net.ipv4.tcp_sack=1 + +# TCP Duplicate Selective Acknowledgements (DSACK) +sysctl -q net.ipv4.tcp_dsack=1 + +# TCP FACK (Forward Acknowldgement) +sysctl -q net.ipv4.tcp_fack=0 + +# TCP reordering degree ("dupthresh" threshold for entering Fast Recovery). +sysctl -q net.ipv4.tcp_reordering=3 + +# TCP congestion control. +sysctl -q net.ipv4.tcp_congestion_control=cubic + +# TCP slow start after idle. +sysctl -q net.ipv4.tcp_slow_start_after_idle=0 + +# TCP RACK and TLP. +sysctl -q net.ipv4.tcp_early_retrans=4 net.ipv4.tcp_recovery=1 + +# TCP method for deciding when to defer sending to accumulate big TSO packets. +sysctl -q net.ipv4.tcp_tso_win_divisor=3 + +# TCP Explicit Congestion Notification (ECN) +sysctl -q net.ipv4.tcp_ecn=0 + +sysctl -q net.ipv4.tcp_pacing_ss_ratio=200 +sysctl -q net.ipv4.tcp_pacing_ca_ratio=120 +sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 + +sysctl -q net.ipv4.tcp_fastopen=0x70403 +sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 + +sysctl -q net.ipv4.tcp_syncookies=1 + +# Override the default qdisc on the tun device. +# Many tests fail with timing errors if the default +# is FQ and that paces their flows. +tc qdisc add dev tun0 root pfifo + diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh new file mode 100755 index 000000000000..ef8b25a606d8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh" + +readonly ipv4_args=('--ip_version=ipv4 ' + '--local_ip=192.168.0.1 ' + '--gateway_ip=192.168.0.1 ' + '--netmask_ip=255.255.0.0 ' + '--remote_ip=192.0.2.1 ' + '-D CMSG_LEVEL_IP=SOL_IP ' + '-D CMSG_TYPE_RECVERR=IP_RECVERR ') + +readonly ipv6_args=('--ip_version=ipv6 ' + '--mtu=1520 ' + '--local_ip=fd3d:0a0b:17d6::1 ' + '--gateway_ip=fd3d:0a0b:17d6:8888::1 ' + '--remote_ip=fd3d:fa7b:d17d::1 ' + '-D CMSG_LEVEL_IP=SOL_IPV6 ' + '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ') + +if [ $# -ne 1 ]; then + ktap_exit_fail_msg "usage: $0 <script>" + exit "$KSFT_FAIL" +fi +script="$(basename $1)" + +if [ -z "$(which packetdrill)" ]; then + ktap_skip_all "packetdrill not found in PATH" + exit "$KSFT_SKIP" +fi + +declare -a optargs +failfunc=ktap_test_fail + +if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then + optargs+=('--tolerance_usecs=14000') + + # xfail tests that are known flaky with dbg config, not fixable. + # still run them for coverage (and expect 100% pass without dbg). + declare -ar xfail_list=( + "tcp_eor_no-coalesce-retrans.pkt" + "tcp_fast_recovery_prr-ss.*.pkt" + "tcp_slow_start_slow-start-after-win-update.pkt" + "tcp_timestamping.*.pkt" + "tcp_user_timeout_user-timeout-probe.pkt" + "tcp_zerocopy_epoll_.*.pkt" + "tcp_tcp_info_tcp-info-.*-limited.pkt" + ) + readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$" + [[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail +fi + +ktap_print_header +ktap_set_plan 2 + +unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass "ipv4" || $failfunc "ipv4" +unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass "ipv6" || $failfunc "ipv6" + +ktap_finished diff --git a/tools/testing/selftests/net/packetdrill/set_sysctls.py b/tools/testing/selftests/net/packetdrill/set_sysctls.py new file mode 100755 index 000000000000..5ddf456ae973 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/set_sysctls.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Sets sysctl values and writes a file that restores them. + +The arguments are of the form "<proc-file>=<val>" separated by spaces. +The program first reads the current value of the proc-file and creates +a shell script named "/tmp/sysctl_restore_${PACKETDRILL_PID}.sh" which +restores the values when executed. It then sets the new values. + +PACKETDRILL_PID is set by packetdrill to the pid of itself, so a .pkt +file could restore sysctls by running `/tmp/sysctl_restore_${PPID}.sh` +at the end. +""" + +import os +import subprocess +import sys + +filename = '/tmp/sysctl_restore_%s.sh' % os.environ['PACKETDRILL_PID'] + +# Open file for restoring sysctl values +restore_file = open(filename, 'w') +print('#!/bin/bash', file=restore_file) + +for a in sys.argv[1:]: + sysctl = a.split('=') + # sysctl[0] contains the proc-file name, sysctl[1] the new value + + # read current value and add restore command to file + cur_val = subprocess.check_output(['cat', sysctl[0]], universal_newlines=True) + print('echo "%s" > %s' % (cur_val.strip(), sysctl[0]), file=restore_file) + + # set new value + cmd = 'echo "%s" > %s' % (sysctl[1], sysctl[0]) + os.system(cmd) + +os.system('chmod u+x %s' % filename) diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt new file mode 100644 index 000000000000..38535701656e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking accept. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0...0.200 accept(3, ..., ...) = 4 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + + +.1 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt new file mode 100644 index 000000000000..3692ef102381 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking connect. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + + +.1...0.200 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.1 < S. 0:0(0) ack 1 win 5792 <mss 1460,nop,wscale 2,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt new file mode 100644 index 000000000000..914eabab367a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking read. +--tolerance_usecs=10000 + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0...0.100 read(4, ..., 2000) = 2000 + +.1 < P. 1:2001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 2001 + + +.1...0.200 read(4, ..., 2000) = 2000 + +.1 < P. 2001:4001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 + + +.1 < P. 4001:6001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 6001 + +0...0.000 read(4, ..., 1000) = 1000 + +0...0.000 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt new file mode 100644 index 000000000000..cec5a0725d95 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking write. +--tolerance_usecs=10000 + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_min_tso_segs=10 +` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 50000 <mss 1000,nop,wscale 0> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 50000 + +0 accept(3, ..., ...) = 4 + +// Kernel doubles our value -> sk->sk_sndbuf is set to 42000 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [21000], 4) = 0 + +0 getsockopt(4, SOL_SOCKET, SO_SNDBUF, [42000], [4]) = 0 + +// A write of 60000 does not block. + +0...0.300 write(4, ..., 61000) = 61000 // this write() blocks + + +.1 < . 1:1(0) ack 10001 win 50000 + + +.1 < . 1:1(0) ack 30001 win 50000 + +// This ACK should wakeup the write(). An ACK of 35001 does not. + +.1 < . 1:1(0) ack 36001 win 50000 + +// Reset to sysctls defaults. +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt new file mode 100644 index 000000000000..8514d6bdbb6d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test basic connection teardown where local process closes first: +// the local process calls close() first, so we send a FIN, and receive an ACK. +// Then we receive a FIN and ACK it. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.01...0.011 connect(3, ..., ...) = 0 + +0 > S 0:0(0) <...> + +0 < S. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + + +0 write(3, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1001 win 257 + + +0 close(3) = 0 + +0 > F. 1001:1001(0) ack 1 + +0 < . 1:1(0) ack 1002 win 257 + + +0 < F. 1:1(0) ack 1002 win 257 + +0 > . 1002:1002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt new file mode 100644 index 000000000000..04103134bd99 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test to make sure no RST is being sent when close() +// is called on a socket with SYN_SENT state. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <...> + +// Application decideds to close the socket in SYN_SENT state +// Make sure no RST is sent after close(). + +0 close(3) = 0 + +// Receive syn-ack to trigger the send side packet examination: +// If a RESET were sent right after close(), it would have failed with +// a mismatched timestamp. + +.1 < S. 0:0(0) ack 1 win 32000 <mss 1460,nop,wscale 7> + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt new file mode 100644 index 000000000000..5f3a2914213a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify behavior for the sequence: remote side sends FIN, then we close(). +// Since the remote side (client) closes first, we test our LAST_ACK code path. + +`./defaults.sh` + +// Initialize a server socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +// Client closes first. + +.01 < F. 1:1(0) ack 1 win 257 + +0 > . 1:1(0) ack 2 + +// App notices that client closed. + +0 read(4, ..., 1000) = 0 + +// Then we close. + +.01 close(4) = 0 + +0 > F. 1:1(0) ack 2 + +// Client ACKs our FIN. + +.01 < . 2:2(0) ack 2 win 257 + +// Verify that we send RST in response to any incoming segments +// (because the kernel no longer has any record of this socket). + +.01 < . 2:2(0) ack 2 win 257 + +0 > R 2:2(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt new file mode 100644 index 000000000000..643baf3267cf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test ECN: verify that Linux TCP ECN sending code uses ECT0 (not ECT1). +// +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 # fully enabled +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +// ECN handshake: send EW flags in SYN packet, E flag in SYN-ACK response ++.002 ... 0.004 connect(4, ..., ...) = 0 + + +0 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.002 < SE. 0:0(0) ack 1 win 32767 <mss 1000,nop,wscale 6,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + +// Write 1 MSS. ++.002 write(4, ..., 1000) = 1000 +// Send 1 MSS with ect0. + +0 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt new file mode 100644 index 000000000000..f95b9b3c9fa1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. The large chunk itself should be packetized as +// usual. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write another 10040B chunk with no coalescing options. + +0 send(4, ..., 10400, MSG_EOR) = 10400 + +// Write a 2KB chunk. This chunk should not be appended to the packets created +// the previous chunk. + +0 write(4, ..., 2000) = 2000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:20801(10800) ack 1 ++.001 < . 1:1(0) ack 20801 win 514 +// This 2KB packet should be sent alone. + +0 > P. 20801:22801(2000) ack 1 ++.001 < . 1:1(0) ack 22801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt new file mode 100644 index 000000000000..2ff66075288e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. Also, when packets are retransmitted, they +// will not be coalesce into the same skb. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 <sack 13201:14401,nop,nop> +// TCP should fill the hole but no coalescing should happen, and all +// retransmissions should be sent out as individual packets. + +// Note : This is timeout based retransmit. +// Do not put +0 here or flakes will come back. ++.004~+.008 > P. 12001:12401(400) ack 1 + ++.001 < . 1:1(0) ack 12401 win 514 <sack 13201:14401,nop,nop> + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 ++.001 < . 1:1(0) ack 12801 win 514 <sack 13201:14401,nop,nop> ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt new file mode 100644 index 000000000000..77039c5aac39 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write a 400B chunk with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 + +0 > P. 10801:20801(10000) ack 1 ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 20801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt new file mode 100644 index 000000000000..dd5a06250595 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk even though we have 10 back-to-back small +// writes. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 ++.001 < . 1:1(0) ack 12401 win 514 ++.001 < . 1:1(0) ack 12801 win 514 ++.001 < . 1:1(0) ack 13201 win 514 ++.001 < . 1:1(0) ack 13601 win 514 ++.001 < . 1:1(0) ack 14001 win 514 ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt new file mode 100644 index 000000000000..0d3c8077e830 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we test a simple case where in-flight == ssthresh +// all the way through recovery, so during fast recovery we send one segment +// for each segment SACKed/ACKed. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// RTT 100ms + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001. + +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:4001,nop,nop> +// Enter fast recovery. + +0 > . 1:1001(1000) ack 1 + +.01 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + +// Write some more, which we will send 1 MSS at a time, +// as in-flight segments are SACKed or ACKed. + +.01 write(4, ..., 7000) = 7000 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:5001,nop,nop> + +0 > . 10001:11001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:6001,nop,nop> + +0 > . 11001:12001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:7001,nop,nop> + +0 > . 12001:13001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:8001,nop,nop> + +0 > . 13001:14001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:9001,nop,nop> + +0 > . 14001:15001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:10001,nop,nop> + +0 > . 15001:16001(1000) ack 1 + + +.02 < . 1:1(0) ack 10001 win 320 + +0 > P. 16001:17001(1000) ack 1 +// Leave fast recovery. + +.01 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + + +.03 < . 1:1(0) ack 12001 win 320 + +.02 < . 1:1(0) ack 14001 win 320 + +.02 < . 1:1(0) ack 16001 win 320 + +.02 < . 1:1(0) ack 17001 win 320 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt new file mode 100644 index 000000000000..7842a10b6967 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4, and 11 to 16 are dropped. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write 20 data segments. + +0 write(4, ..., 20000) = 20000 + +0 > P. 1:10001(10000) ack 1 + +// Receive first DUPACK, entering PRR part + +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop> + +0 > . 10001:11001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop> + +0 > . 11001:12001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop> + +0 > . 1:1001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop> + +0 > . 2001:3001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop> + +0 > . 3001:4001(1000) ack 1 +// Enter PRR CRB ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:11001,nop,nop> + +0 > . 12001:13001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:12001,nop,nop> + +0 > . 13001:14001(1000) ack 1 +// Enter PRR slow start + +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:12001,nop,nop> + +0 > P. 14001:16001(2000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:12001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 > . 16001:17001(1000) ack 1 +// inflight reaches ssthresh, goes into packet conservation mode ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:13001,nop,nop> + +0 > . 17001:18001(1000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:14001,nop,nop> + +0 > . 18001:19001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt new file mode 100644 index 000000000000..b66d7644c3b6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4 are lost. The sender writes another 10 packets. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 20 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1,2,3,4 + +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop> ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop> + +0 > . 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop> + +0 > . 3001:4001(1000) ack 1 + +// Receiver ACKs all data. + +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 2001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 3001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 10001 win 320 + +// Writes another 10 packets, which the ssthresh*mss amount +// should be sent right away + +.01 write(4, ..., 10000) = 10000 + +0 > . 10001:17001(7000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt new file mode 100644 index 000000000000..8e87bfecabb5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we verify that the sender uses SACK info on an ACK +// below snd_una. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 8> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// RTT 10ms + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001,4001:5001,7001:8001. + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001 8001:9001,nop,nop> + +0 > . 1:1001(1000) ack 1 + ++.012 < . 1:1(0) ack 4001 win 320 <sack 8001:9001,nop,nop> + +0 > . 4001:7001(3000) ack 1 + + +0 write(4, ..., 10000) = 10000 + +// The following ACK was reordered - delayed so that it arrives with +// an ACK field below snd_una. Here we check that the newly-SACKed +// 2MSS at 5001:7001 cause us to send out 2 more MSS. ++.002 < . 1:1(0) ack 3001 win 320 <sack 5001:7001,nop,nop> + +0 > . 7001:8001(1000) ack 1 + +0 > . 10001:11001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt new file mode 100644 index 000000000000..df49c67645ac --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_INQ and TCP_CM_INQ on the client side. +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Connect to the server and enable TCP_INQ. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 setsockopt(3, SOL_TCP, TCP_INQ, [1], 4) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 5792 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +// Now we have 10K of data ready on the socket. + +0 < . 1:10001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 10001 <nop,nop,TS val 200 ecr 700> + +// We read 1K and we should have 9K ready to read. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 1000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=9000}]}, 0) = 1000 +// We read 9K and we should have no further data ready to read. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 9000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=0}]}, 0) = 9000 + +// Server sends more data and closes the connections. + +0 < F. 10001:20001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 20002 <nop,nop,TS val 200 ecr 700> + +// We read 10K and we should have one "fake" byte because the connection is +// closed. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 10000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=1}]}, 0) = 10000 +// Now, receive EOF. + +0 read(3, ..., 2000) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt new file mode 100644 index 000000000000..04a5e2590c62 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_INQ and TCP_CM_INQ on the server side. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + +// Accept the connection and enable TCP_INQ. + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_INQ, [1], 4) = 0 + +// Now we have 10K of data ready on the socket. + +0 < . 1:10001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 10001 + +// We read 2K and we should have 8K ready to read. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=8000}]}, 0) = 2000 +// We read 8K and we should have no further data ready to read. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 8000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=0}]}, 0) = 8000 +// Client sends more data and closes the connections. + +0 < F. 10001:20001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 20002 + +// We read 10K and we should have one "fake" byte because the connection is +// closed. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 10000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=1}]}, 0) = 10000 +// Now, receive error. + +0 read(3, ..., 2000) = -1 ENOTCONN (Transport endpoint is not connected) diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt new file mode 100644 index 000000000000..96b01eb5b7a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that doesn't support SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. +// We have one packet newly acked (1001:3001 were DUP-ACK'd) +// So we revert state back to Open. Slow start cwnd from 10 to 11 +// and send 11 - 9 = 2 packets + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt new file mode 100644 index 000000000000..642da51ec3a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that supports SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt new file mode 100644 index 000000000000..25dfef95d3f8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test what happens when client does not provide MD5 on SYN, +// but then does on the ACK that completes the three-way handshake. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// Ooh, weird: client provides MD5 option on the ACK: + +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop> + +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop> + +// The TCP listener refcount should be 2, but on buggy kernels it can be 0: + +0 `grep " 0A " /proc/net/tcp /proc/net/tcp6 | grep ":1F90"` + +// Now here comes the legit ACK: + +.01 < . 1:1(0) ack 1 win 514 + +// Make sure the connection is OK: + +0 accept(3, ..., ...) = 4 + + +.01 write(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt new file mode 100644 index 000000000000..7adae7a9ef4a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// This is a test inspired by an Android client app using SSL. This +// test verifies using TCP_NODELAY would save application latency +// (Perhaps even better with TCP_NAGLE). +// +`./defaults.sh +ethtool -K tun0 tso off gso off +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < S. 0:0(0) ack 1 win 5792 <mss 974,nop,nop,sackOK,nop,wscale 7> + +0 > . 1:1(0) ack 1 + +// SSL handshake (resumed session) + +0 write(4, ..., 517) = 517 + +0 > P. 1:518(517) ack 1 + +.1 < . 1:1(0) ack 518 win 229 + + +0 < P. 1:144(143) ack 1 win 229 + +0 > . 518:518(0) ack 144 + +0 read(4, ..., 1000) = 143 + +// Application POST header (51B) and body (2002B) + +0 write(4, ..., 51) = 51 + +0 > P. 518:569(51) ack 144 + +.03 write(4, ..., 2002) = 2002 + +0 > . 569:1543(974) ack 144 + +0 > P. 1543:2517(974) ack 144 +// Without disabling Nagle, this packet will not happen until the remote ACK. + +0 > P. 2517:2571(54) ack 144 + + +.1 < . 1:1(0) ack 2571 win 229 + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt new file mode 100644 index 000000000000..fa9c01813996 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test the MSG_MORE flag will correctly corks the tiny writes +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Disable Nagle by default on this socket. + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +// Test the basic case: MSG_MORE overwrites TCP_NODELAY and enables Nagle. + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 40}], msg_flags=0}, MSG_MORE) = 40 + +.21~+.215 > P. 1:41(40) ack 1 + +.01 < . 1:1(0) ack 41 win 257 + +// Test unsetting MSG_MORE releases the packet + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 100}], msg_flags=0}, MSG_MORE) = 100 ++.005 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 160}], msg_flags=0}, MSG_MORE) = 160 + +.01 sendmsg(4, {msg_name(...)=..., + msg_iov(3)=[{..., 100}, {..., 200}, {..., 195}], + msg_flags=0}, MSG_MORE) = 495 ++.008 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 5}], msg_flags=0}, 0) = 5 + +0 > P. 41:801(760) ack 1 + +.02 < . 1:1(0) ack 801 win 257 + + +// Test >MSS write will unleash MSS packets but hold on the remaining data. + +.1 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3100}], msg_flags=0}, MSG_MORE) = 3100 + +0 > . 801:3801(3000) ack 1 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 50}], msg_flags=0}, MSG_MORE) = 50 + + +.01 < . 1:1(0) ack 2801 win 257 +// Err... we relase the remaining right after the ACK? note that PUSH is reset + +0 > . 3801:3951(150) ack 1 + +// Test we'll hold on the subsequent writes when inflight (3801:3951) > 0 ++.001 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 1}], msg_flags=0}, MSG_MORE) = 1 ++.002 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2}], msg_flags=0}, MSG_MORE) = 2 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3}], msg_flags=0}, MSG_MORE) = 3 ++.004 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 4}], msg_flags=0}, MSG_MORE) = 4 + +.02 < . 1:1(0) ack 3951 win 257 + +0 > . 3951:3961(10) ack 1 + +.02 < . 1:1(0) ack 3961 win 257 + + +// Test the case a MSG_MORE send followed by a write flushes the data + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 20}], msg_flags=0}, MSG_MORE) = 20 + +.05 write(4, ..., 20) = 20 + +0 > P. 3961:4001(40) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt new file mode 100644 index 000000000000..0ddec5f7dc1a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_CORK and TCP_NODELAY sockopt behavior +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Unset TCP_CORK should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_CORK, [0], 4) = 0 + +0 > P. 1:81(80) ack 1 + +.01 < . 1:1(0) ack 81 win 257 + +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > P. 81:161(80) ack 1 + +.01 < . 1:1(0) ack 161 win 257 + +// Set MSG_MORE to hold small packets + +0 send(4, ..., 40, MSG_MORE) = 40 + +.05 send(4, ..., 40, MSG_MORE) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > . 161:241(80) ack 1 + +.01 < . 1:1(0) ack 241 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt new file mode 100644 index 000000000000..310ef31518da --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that setsockopt calls that force a route refresh do not +// cause problems matching SACKs with packets in the write queue. +// This variant tests IP_TOS. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_IP, IP_MTU_DISCOVER, [IP_PMTUDISC_DONT], 1) = 0 + +0...0.010 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 65535 <mss 1460,nop,wscale 2,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 1:5841(5840) ack 1 + +.01 < . 1:1(0) ack 5841 win 65535 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 5841:11681(5840) ack 1 + +.01 < . 1:1(0) ack 11681 win 65535 + + +.01 write(3, ..., 14600) = 14600 + +0 > P. 11681:26281(14600) ack 1 + +// Try the socket option that we know can force a route refresh. + +0 setsockopt(3, SOL_IP, IP_TOS, [4], 1) = 0 +// Then revert to avoid routing/mangling/etc implications of that setting. + +0 setsockopt(3, SOL_IP, IP_TOS, [0], 1) = 0 + +// Verify that we do not retransmit the SACKed segments. + +.01 < . 1:1(0) ack 13141 win 65535 <sack 16061:17521 20441:26281,nop,nop> + +0 > . 13141:16061(2920) ack 1 + +0 > P. 17521:20441(2920) ack 1 + +.01 < . 1:1(0) ack 26281 win 65535 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt new file mode 100644 index 000000000000..f185e1ac57ea --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests non-FACK SACK with SACKs coming in the order +// 2 6 8 3 9, to test what happens when we get a new SACKed range +// (for packet 3) that is on the right of an existing SACKed range +// (for packet 2). + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + + +.1 < . 1:1(0) ack 1 win 257 <sack 2001:3001,nop,nop> ++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001,nop,nop> ++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001 8001:9001,nop,nop> + +// 3 SACKed packets, so we enter Fast Recovery. + +0 > . 1:1001(1000) ack 1 + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_lost == 6, tcpi_lost }% + +// SACK for 3001:4001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. ++.007 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:9001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +0 %{ assert tcpi_reordering == 6, tcpi_reordering }% // 8001:9001 -> 3001:4001 is 6 + +// SACK for 9001:10001. + +.01 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// ACK for 1:1001 as packets from t=0.303 arrive. ++.083 < . 1:1(0) ack 1001 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 4,tcpi_lost }% + +// ACK for 1:4001 as packets from t=0.310 arrive. ++.017 < . 1:1(0) ack 4001 win 257 <sack 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 3,tcpi_lost }% + +// ACK for 1:7001 as packets from t=0.320 arrive. + +.01 < . 1:1(0) ack 7001 win 257 <sack 8001:10001,nop,nop> + +// ACK for all data as packets from t=0.403 arrive. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt new file mode 100644 index 000000000000..0093b4973934 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 3, and then a SACK for 4. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay the fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop> +// RACK reordering timer ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7, tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 3001:4001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:4001 7001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost # since 3001:4001 is no longer lost +assert tcpi_reordering == 5, tcpi_reordering # 7001:8001 -> 3001:4001 +}% + +// SACK for 4001:5001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. +// It uses the RFC3517 algorithm to mark 1:3001 lost +// because >=3 higher-sequence packets are SACKed. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:8001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 %{ +assert tcpi_lost == 5,tcpi_lost # SACK/RFC3517 thinks 1:3001 are lost +}% + +// SACK for 8001:9001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:9001,nop,nop> + +// SACK for 9001:10001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:10001,nop,nop> + +0 > . 5001:6001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt new file mode 100644 index 000000000000..980a832dc81c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 5, and then a SACK for 6. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay a fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop> ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7,tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 5001:6001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:6001 7001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost +assert tcpi_reordering == 3, tcpi_reordering # 7001:8001 -> 5001:6001 is 3 +}% + +// SACK for 6001:7001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:8001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// SACK for 8001:9001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:9001,nop,nop> + +0 > . 3001:4001(1000) ack 1 + +// SACK for 9001:10001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:10001,nop,nop> + +0 > . 4001:5001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt new file mode 100644 index 000000000000..6740859a1360 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Simplest possible test of open() and then sendfile(). +// We write some zeroes into a file (since packetdrill expects payloads +// to be all zeroes) and then open() the file, then use sendfile() +// and verify that the correct number of zeroes goes out. + +`./defaults.sh +/bin/rm -f /tmp/testfile +/bin/dd bs=1 count=5 if=/dev/zero of=/tmp/testfile status=none +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 open("/tmp/testfile", O_RDONLY) = 5 + +0 sendfile(4, 5, [0], 5) = 5 + +0 > P. 1:6(5) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt new file mode 100644 index 000000000000..795c476d222d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver ACKs every packet. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 1001 win 257 + +0 > P. 10001:12001(2000) ack 1 + + +0 < . 1:1(0) ack 2001 win 257 + +0 > P. 12001:14001(2000) ack 1 + ++.005 < . 1:1(0) ack 3001 win 257 + +0 > P. 14001:16001(2000) ack 1 + + +0 < . 1:1(0) ack 4001 win 257 + +0 > P. 16001:18001(2000) ack 1 + ++.005 < . 1:1(0) ack 5001 win 257 + +0 > P. 18001:20001(2000) ack 1 + + +0 < . 1:1(0) ack 6001 win 257 + +0 > P. 20001:22001(2000) ack 1 + ++.005 < . 1:1(0) ack 7001 win 257 + +0 > P. 22001:24001(2000) ack 1 + + +0 < . 1:1(0) ack 8001 win 257 + +0 > P. 24001:26001(2000) ack 1 + ++.005 < . 1:1(0) ack 9001 win 257 + +0 > P. 26001:28001(2000) ack 1 + + +0 < . 1:1(0) ack 10001 win 257 + +0 > P. 28001:30001(2000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt new file mode 100644 index 000000000000..9212ae1fd0f2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when an outstanding flight of packets is +// less than the current cwnd, and not big enough to bump up cwnd. +// +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +// Only send 5 packets. + +0 write(4, ..., 5000) = 5000 + +0 > P. 1:5001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 5001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt new file mode 100644 index 000000000000..416c901ddf51 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when an outstanding flight of packets is +// less than the current cwnd, but still big enough that in slow +// start we want to increase our cwnd a little. +// +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +// Only send 6 packets. + +0 write(4, ..., 6000) = 6000 + +0 > P. 1:6001(6000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt new file mode 100644 index 000000000000..a894b7d4559c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 > P. 10001:14001(4000) ack 1 + ++.005 < . 1:1(0) ack 4001 win 257 + +0 > P. 14001:18001(4000) ack 1 + ++.005 < . 1:1(0) ack 6001 win 257 + +0 > P. 18001:22001(4000) ack 1 + ++.005 < . 1:1(0) ack 8001 win 257 + +0 > P. 22001:26001(4000) ack 1 + ++.005 < . 1:1(0) ack 10001 win 257 + +0 > P. 26001:30001(4000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt new file mode 100644 index 000000000000..065fae9e9abd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver sends one ACK per 4 packets. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.11 < . 1:1(0) ack 4001 win 257 + +0 > P. 10001:18001(8000) ack 1 + + +.01 < . 1:1(0) ack 8001 win 257 + +0 > P. 18001:26001(8000) ack 1 + ++.005 < . 1:1(0) ack 10001 win 257 + +0 > P. 26001:30001(4000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt new file mode 100644 index 000000000000..11b213be1138 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start after idle +// This test expects tso size to be at least initial cwnd * mss + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \ + /proc/sys/net/ipv4/tcp_min_tso_segs=10` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 511 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 26000) = 26000 + +0 > P. 1:5001(5000) ack 1 + +0 > P. 5001:10001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.1 < . 1:1(0) ack 10001 win 511 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% + +0 > P. 10001:20001(10000) ack 1 + +0 > P. 20001:26001(6000) ack 1 + + +.1 < . 1:1(0) ack 26001 win 511 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +2 write(4, ..., 20000) = 20000 +// If slow start after idle works properly, we should send 5 MSS here (cwnd/2) + +0 > P. 26001:31001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt new file mode 100644 index 000000000000..577ed8c8852c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start after window update +// This test expects tso size to be at least initial cwnd * mss + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \ + /proc/sys/net/ipv4/tcp_min_tso_segs=10` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 511 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 26000) = 26000 + +0 > P. 1:5001(5000) ack 1 + +0 > P. 5001:10001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.1 < . 1:1(0) ack 10001 win 511 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% + +0 > P. 10001:20001(10000) ack 1 + +0 > P. 20001:26001(6000) ack 1 + + +.1 < . 1:1(0) ack 26001 win 0 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +0 write(4, ..., 20000) = 20000 +// 1st win0 probe ++.3~+.310 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + +// 2nd win0 probe ++.6~+.620 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + +// 3rd win0 probe ++1.2~+1.240 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +.9 < . 1:1(0) ack 26001 win 511 + +0 > P. 26001:31001(5000) ack 1 + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt new file mode 100644 index 000000000000..869f32c35a2a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when application-limited: in this case, +// with IW10, if we don't fully use our cwnd but instead +// send just 9 packets, then cwnd should grow to twice that +// value, or 18 packets. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 9000) = 9000 + +0 > P. 1:9001(9000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 8001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 9001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt new file mode 100644 index 000000000000..0f77b7955db6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when application-limited: in this case, +// with IW10, if we send exactly 10 packets then cwnd should grow to 20. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 8001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 10001 win 257 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt new file mode 100644 index 000000000000..7e9c83d617c2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow, even if TSQ triggers. +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Note we use FQ/pacing to check if TCP Small Queues is not hurting + +`./defaults.sh +tc qdisc replace dev tun0 root fq +sysctl -q net/ipv4/tcp_pacing_ss_ratio=200 +sysctl -e -q net.ipv4.tcp_min_tso_segs=2` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 500 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 40000) = 40000 +// This might change if we cook the initial packet with 10 MSS. + +0 > P. 1:2921(2920) ack 1 + +0 > P. 2921:5841(2920) ack 1 + +0 > P. 5841:8761(2920) ack 1 + +0 > P. 8761:11681(2920) ack 1 + +0 > P. 11681:14601(2920) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2921 win 500 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + +// Note: after this commit : "net_sched: sch_fq: account for schedule/timers drifts" +// FQ notices that this packet missed the 'time to send next packet' computed +// when prior packet (11681:14601(2920)) was sent. +// So FQ will allow following packet to be sent a bit earlier (quantum/2) +// (FQ commit allows an application/cwnd limited flow to get at most quantum/2 extra credit) + +0 > P. 14601:17521(2920) ack 1 + ++.003 < . 1:1(0) ack 5841 win 500 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.001 > P. 17521:20441(2920) ack 1 + ++.001 < . 1:1(0) ack 8761 win 500 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + +// remaining packets are delivered at a constant rate. ++.007 > P. 20441:23361(2920) ack 1 + ++.002 < . 1:1(0) ack 11681 win 500 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% ++.001 < . 1:1(0) ack 14601 win 500 + ++.004 > P. 23361:26281(2920) ack 1 + ++.007 > P. 26281:29201(2920) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt new file mode 100644 index 000000000000..0cbd43253236 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize a server socket + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_IP, IP_FREEBIND, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Connection should get accepted + +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <...> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0 pipe([5, 6]) = 0 + +0 < U. 1:101(100) ack 1 win 257 urg 100 + +0 splice(4, NULL, 6, NULL, 99, 0) = 99 + +0 splice(4, NULL, 6, NULL, 1, 0) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..8940726a3ec2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP fastopen behavior with NULL as buffer pointer, but a non-zero +// buffer length. +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + +// Cache warmup: send a Fast Open cookie request + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress) ++0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO,nop,nop> ++0 < S. 123:123(0) ack 1 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6,FO abcd1234,nop,nop> ++0 > . 1:1(0) ack 1 ++0 close(3) = 0 ++0 > F. 1:1(0) ack 1 ++0 < F. 1:1(0) ack 2 win 92 ++0 > . 2:2(0) ack 2 + +// Test with MSG_FASTOPEN without TCP_FASTOPEN_CONNECT. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 sendto(4, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(4) = 0 + +// Test with TCP_FASTOPEN_CONNECT without MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5 ++0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(5, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(5, ..., ...) = 0 ++0 sendto(5, NULL, 1, 0, ..., ...) = -1 ++0 close(5) = 0 + +// Test with both TCP_FASTOPEN_CONNECT and MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 6 ++0 fcntl(6, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(6, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(6, ..., ...) = 0 ++0 sendto(6, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(6) = 0 + +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt new file mode 100644 index 000000000000..b2b2cdf27e20 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we correctly skip zero-length IOVs. +`./defaults.sh` + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 40}, {..., 0}, {..., 20}], + msg_flags=0}, 0) = 60 + +0 > P. 1:61(60) ack 1 + +.01 < . 1:1(0) ack 61 win 257 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 0}, {..., 0}, {..., 0}], + msg_flags=0}, MSG_ZEROCOPY) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 10}, {..., 0}, {..., 50}], + msg_flags=0}, MSG_ZEROCOPY) = 60 + +0 > P. 61:121(60) ack 1 + +.01 < . 1:1(0) ack 121 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..59f5903f285c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test kernel behavior with NULL as buffer pointer + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.2 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 write(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 send(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 sendto(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) + + +0 < . 1:1001(1000) ack 1 win 200 + +0 read(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 recv(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 recvfrom(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt new file mode 100644 index 000000000000..d7fdb43a8e89 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tcpi_last_data_recv for active session +`./defaults.sh` + +// Create a socket and set it to non-blocking. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) ++0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.030 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8> ++0 > . 1:1(0) ack 1 + ++1 %{ assert 990 <= tcpi_last_data_recv <= 1010, tcpi_last_data_recv }% + ++0 < . 1:1001(1000) ack 1 win 300 ++0 > . 1:1(0) ack 1001 + ++0 %{ assert tcpi_last_data_recv <= 10, tcpi_last_data_recv }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt new file mode 100644 index 000000000000..a9bcd46f6cb6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test rwnd limited time in tcp_info for client side. + +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +// Server advertises 0 receive window. + +.01 < S. 0:0(0) ack 1 win 0 <mss 1000,nop,nop,sackOK> + + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +// Make sure that initial rwnd limited time is 0. + +0 %{ assert tcpi_rwnd_limited == 0, tcpi_rwnd_limited }% + +// Receive window limited time starts here. + +0 write(3, ..., 1000) = 1000 + +// Check that rwnd limited time in tcp_info is around 0.1s. + +.1 %{ assert 98000 <= tcpi_rwnd_limited <= 110000, tcpi_rwnd_limited }% + +// Server opens the receive window. + +.1 < . 1:1(0) ack 1 win 2000 + +// Check that rwnd limited time in tcp_info is around 0.2s. + +0 %{ assert 198000 <= tcpi_rwnd_limited <= 210000, tcpi_rwnd_limited }% + + +0 > P. 1:1001(1000) ack 1 + +// Server advertises a very small receive window. + +.03 < . 1:1(0) ack 1001 win 10 + +// Receive window limited time starts again. + +0 write(3, ..., 1000) = 1000 + +// Server opens the receive window again. + +.1 < . 1:1(0) ack 1001 win 2000 +// Check that rwnd limited time in tcp_info is around 0.3s +// and busy time is 0.3 + 0.03 (server opened small window temporarily). + +0 %{ assert 298000 <= tcpi_rwnd_limited <= 310000, tcpi_rwnd_limited;\ + assert 328000 <= tcpi_busy_time <= 340000, tcpi_busy_time;\ +}% + + +0 > P. 1001:2001(1000) ack 1 + +.02 < . 1:1(0) ack 2001 win 2000 + +0 %{ assert 348000 <= tcpi_busy_time <= 360000, tcpi_busy_time }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt new file mode 100644 index 000000000000..f0de2acd0f8e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test send-buffer-limited time in tcp_info for client side. +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8> + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [10000], 4) = 0 + +0 getsockopt(3, SOL_SOCKET, SO_SNDBUF, [20000], [4]) = 0 + + +.09...0.14 write(3, ..., 150000) = 150000 + + +.01 < . 1:1(0) ack 10001 win 10000 + + +.01 < . 1:1(0) ack 30001 win 10000 + +// cwnd goes from 40(60KB) to 80(120KB), and that we hit the tiny sndbuf limit 10KB + +.01 < . 1:1(0) ack 70001 win 10000 + + +.02 < . 1:1(0) ack 95001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited; \ + assert 49000 <= tcpi_busy_time <= 52000, tcpi_busy_time; \ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% + +// This ack frees up enough buffer so we are no longer +// buffer limited (socket flag SOCK_NOSPACE is cleared) + +.02 < . 1:1(0) ack 150001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited;\ + assert 69000 <= tcpi_busy_time <= 73000, tcpi_busy_time;\ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt new file mode 100644 index 000000000000..2087ec0c746a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that tx timestamping sends timestamps only for +// the last byte of each sendmsg. +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 20000 <mss 1000,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + + +0 write(3, ..., 11000) = 11000 + +0 > P. 1:10001(10000) ack 1 + +.01 < . 1:1(0) ack 10001 win 4000 + +0 > P. 10001:11001(1000) ack 1 + +.01 < . 1:1(0) ack 11001 win 4000 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// chunk to have a timestamp key of 10999 (i.e., 11000 - 1). + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the last byte should be received at t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt new file mode 100644 index 000000000000..876024a31110 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for partial writes (IPv4). +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 2000 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [1000], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// We have a partial write. + +0 write(3, ..., 10000) = 2964 + +0 > . 1:989(988) ack 1 <nop,nop,TS val 110 ecr 700> + +0 > P. 989:1977(988) ack 1 <nop,nop,TS val 110 ecr 700> + +.01 < . 1:1(0) ack 1977 win 92 <nop,nop,TS val 800 ecr 200> + +0 > P. 1977:2965(988) ack 1 <nop,nop,TS val 114 ecr 800> + +.01 < . 1:1(0) ack 2965 win 92 <nop,nop,TS val 800 ecr 200> + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received after the last ack at +// t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt new file mode 100644 index 000000000000..84d94780e6be --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for server-side (IPv4). +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// Write two 2KB chunks. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// and the second chunks to have timestamp keys of 1999 (i.e., 2000 - 1) and +// 3999 (i.e., 4000 - 1) respectively. + +0 write(4, ..., 2000) = 2000 + +0 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 + +0 > P. 2001:4001(2000) ack 1 + +.01 < . 1:1(0) ack 2001 win 514 + +.01 < . 1:1(0) ack 4001 win 514 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SCHED for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received at t=20ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the second chunk should be received at t=30ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt new file mode 100644 index 000000000000..e61424a7bd0a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send FIN packet with correct TSval +--tcp_ts_tick_usecs=1000 +--tolerance_usecs=7000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + + +1 close(4) = 0 +// Check that FIN TSval is updated properly, one second has passed since last sent packet. + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1200 ecr 200> diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt new file mode 100644 index 000000000000..174ce9a1bfc0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we reject TS val updates on a packet with invalid ACK sequence + +`./defaults.sh +` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +.1 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + +// bad packet with high tsval (its ACK sequence is above our sndnxt) + +0 < F. 1:1(0) ack 9999 win 20000 <nop,nop,TS val 200000 ecr 100> + + + +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100> + +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201> diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt new file mode 100644 index 000000000000..2e3b3bb7493a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send RST packet with correct TSval +--tcp_ts_tick_usecs=1000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + + +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100> + +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201> + + +1 close(4) = 0 +// Check that RST TSval is updated properly, one second has passed since last sent packet. + +0 > R. 1:1(0) ack 1001 <nop,nop,TS val 1200 ecr 201> diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt new file mode 100644 index 000000000000..183051ba0cae --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + + +0 < S 0:0(0) win 0 <mss 1460> + +0 > S. 0:0(0) ack 1 <mss 1460> + + +.1 < . 1:1(0) ack 1 win 65530 + +0 accept(3, ..., ...) = 4 + + +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 write(4, ..., 24) = 24 + +0 > P. 1:25(24) ack 1 + +.1 < . 1:1(0) ack 25 win 65530 + +0 %{ assert tcpi_probes == 0, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +// install a qdisc dropping all packets + +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0` + +0 write(4, ..., 24) = 24 + // When qdisc is congested we retry every 500ms + // (TCP_RESOURCE_PROBE_INTERVAL) and therefore + // we retry 6 times before hitting 3s timeout. + // First verify that the connection is alive: ++3.250 write(4, ..., 24) = 24 + // Now verify that shortly after that the socket is dead: + +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) + + +0 %{ assert tcpi_probes == 6, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt new file mode 100644 index 000000000000..2efe02bfba9c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +// Okay, we received nothing, and decide to close this idle socket. +// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth +// trying hard to cleanly close this flow, at the price of keeping +// a TCP structure in kernel for about 1 minute ! + +2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 close(4) = 0 + + +0 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.6~+.800 > F. 1:1(0) ack 1 + +// We finally receive something from the peer, but it is way too late +// Our socket vanished because TCP_USER_TIMEOUT was really small + +0 < . 1:2(1) ack 1 win 32792 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt new file mode 100644 index 000000000000..8bd60226ccfc --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that established connections drop a segment without the ACK flag set. + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.01 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + +// Receive a segment with no flags set, verify that it's not enqueued. + +.01 < - 1:1001(1000) win 20000 + +0 ioctl(4, SIOCINQ, [0]) = 0 + +// Receive a segment with ACK flag set, verify that it is enqueued. + +.01 < . 1:1001(1000) ack 1 win 20000 + +0 ioctl(4, SIOCINQ, [1000]) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt new file mode 100644 index 000000000000..a82c8899d36b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +// basic zerocopy test: +// +// send a packet with MSG_ZEROCOPY and receive the notification ID +// repeat and verify IDs are consecutive + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt new file mode 100644 index 000000000000..c01915e7f4a1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// batch zerocopy test: +// +// send multiple packets, then read one range of all notifications. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_MARK, [666], 4) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt new file mode 100644 index 000000000000..6509882932e9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Minimal client-side zerocopy test + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0...0 connect(4, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8> + +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > . 1:1(0) ack 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt new file mode 100644 index 000000000000..2cd78755cb2a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +// send with MSG_ZEROCOPY on a non-established socket +// +// verify that a send in state TCP_CLOSE correctly aborts the zerocopy +// operation, specifically it does not increment the zerocopy counter. +// +// First send on a closed socket and wait for (absent) notification. +// Then connect and send and verify that notification nr. is zero. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = -1 EPIPE (Broken pipe) + + +0.1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable) + + +0...0 connect(4, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8> + +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > . 1:1(0) ack 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt new file mode 100644 index 000000000000..7671c20e01cf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but +// it is not level-triggered either. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLET is set. send another packet with +// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, {events=EPOLLOUT|EPOLLET, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive only one EPOLLERR for the third send above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt new file mode 100644 index 000000000000..fadc480fdb7f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but +// it is not level-triggered either. this tests verify that the same behavior is +// maintained when we have EPOLLEXCLUSIVE. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLET is set. send another packet with +// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, + {events=EPOLLOUT|EPOLLET|EPOLLEXCLUSIVE, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive only one EPOLLERR for the third send above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt new file mode 100644 index 000000000000..5bfa0d1d2f4a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// This is a test to confirm that EPOLLERR is only fired once for an FD when +// EPOLLONESHOT is set. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLONESHOT is set. send another packet +// with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and +// confirm that EPOLLERR is correctly set. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, + {events=EPOLLOUT|EPOLLET|EPOLLONESHOT, fd=4}) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive no EPOLLERR for the third send above. + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + +// rearm the FD and verify the EPOLLERR is fired again. + +0 epoll_ctl(5, EPOLL_CTL_MOD, 4, {events=EPOLLOUT|EPOLLONESHOT, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt new file mode 100644 index 000000000000..4a73bbf46961 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +// Fastopen client zerocopy test: +// +// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the +// kernel returns the notification ID. +// +// Fastopen requires a stored cookie. Create two sockets. The first +// one will have no data in the initial send. On return 0 the +// zerocopy notification counter is not incremented. Verify this too. + +`./defaults.sh` + +// Send a FastOpen request, no cookie yet so no data in SYN + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 sendto(3, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 1000 ecr 0,nop,wscale 8,FO,nop,nop> + +.01 < S. 123:123(0) ack 1 win 14600 <mss 940,TS val 2000 ecr 1000,sackOK,nop,wscale 6, FO abcd1234,nop,nop> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 1001 ecr 2000> + +// Read from error queue: no zerocopy notification + +1 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable) + + +.01 close(3) = 0 + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1002 ecr 2000> + +.01 < F. 1:1(0) ack 2 win 92 <nop,nop,TS val 2001 ecr 1002> + +0 > . 2:2(0) ack 2 <nop,nop,TS val 1003 ecr 2001> + +// Send another Fastopen request, now SYN will have data + +.07 `sysctl -q net.ipv4.tcp_timestamps=0` + +.1 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5 + +0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(5, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 sendto(5, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = 500 + +0 > S 0:500(500) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO abcd1234,nop,nop> + +.05 < S. 5678:5678(0) ack 501 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6> + +0 > . 501:501(0) ack 1 + +// Read from error queue: now has first zerocopy notification + +0.5 recvmsg(5, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt new file mode 100644 index 000000000000..36086c5877ce --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +// Fastopen server zerocopy test: +// +// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the +// kernel returns the notification ID. + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207` + +// Set up a TFO server listening socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.1 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [2], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +// Client sends a SYN with data. + +.1 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK> + +// Server accepts and replies with data. ++.005 accept(3, ..., ...) = 4 + +0 read(4, ..., 1024) = 1000 + +0 sendto(4, ..., 1000, MSG_ZEROCOPY, ..., ...) = 1000 + +0 > P. 1:1001(1000) ack 1001 + +.05 < . 1001:1001(0) ack 1001 win 32792 + +// Read from error queue: now has first zerocopy notification + +0.1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt new file mode 100644 index 000000000000..672f817faca0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +// tcp_MAX_SKB_FRAGS test +// +// Verify that sending an iovec of tcp_MAX_SKB_FRAGS + 1 elements will +// 1) fit in a single packet without zerocopy +// 2) spill over into a second packet with zerocopy, +// because each iovec element becomes a frag +// 3) the PSH bit is set on an skb when it runs out of fragments + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + + // Each pinned zerocopy page is fully accounted to skb->truesize. + // This test generates a worst case packet with each frag storing + // one byte, but increasing truesize with a page (64KB on PPC). + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [2000000], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + // send an iov of 18 elements: just becomes a linear skb + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}], + msg_flags=0}, 0) = 18 + + +0 > P. 1:19(18) ack 1 + +0 < . 1:1(0) ack 19 win 257 + + // send a zerocopy iov of 18 elements: + +1 sendmsg(4, {msg_name(...)=..., + msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}], + msg_flags=0}, MSG_ZEROCOPY) = 18 + + // verify that it is split in one skb of 17 frags + 1 of 1 frag + // verify that both have the PSH bit set + +0 > P. 19:36(17) ack 1 + +0 < . 1:1(0) ack 36 win 257 + + +0 > P. 36:37(1) ack 1 + +0 < . 1:1(0) ack 37 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + // send a zerocopy iov of 64 elements: + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(64)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}], + msg_flags=0}, MSG_ZEROCOPY) = 64 + + // verify that it is split in skbs with 17 frags + +0 > P. 37:54(17) ack 1 + +0 < . 1:1(0) ack 54 win 257 + + +0 > P. 54:71(17) ack 1 + +0 < . 1:1(0) ack 71 win 257 + + +0 > P. 71:88(17) ack 1 + +0 < . 1:1(0) ack 88 win 257 + + +0 > P. 88:101(13) ack 1 + +0 < . 1:1(0) ack 101 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt new file mode 100644 index 000000000000..a9a1ac0aea4f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +// small packet zerocopy test: +// +// verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy +// packets of all sizes, including the smallest payload, 1B. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + // send 1B + +0 send(4, ..., 1, MSG_ZEROCOPY) = 1 + +0 > P. 1:2(1) ack 1 + +0 < . 1:1(0) ack 2 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + // send 1B again + +0 send(4, ..., 1, MSG_ZEROCOPY) = 1 + +0 > P. 2:3(1) ack 1 + +0 < . 1:1(0) ack 3 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index cfc84958025a..88e914c4eef9 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -197,9 +197,14 @@ # # - pmtu_ipv6_route_change # Same as above but with IPv6 +# +# - pmtu_ipv4_mp_exceptions +# Use the same topology as in pmtu_ipv4, but add routeable addresses +# on host A and B on lo reachable via both routers. Host A and B +# addresses have multipath routes to each other, b_r1 mtu = 1500. +# Check that PMTU exceptions are created for both paths. source lib.sh -source net_helper.sh PAUSE_ON_FAIL=no VERBOSE=0 @@ -266,7 +271,8 @@ tests=" list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 - pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1 + pmtu_ipv4_mp_exceptions ipv4: PMTU multipath nh exceptions 1" # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an @@ -343,6 +349,9 @@ tunnel6_a_addr="fd00:2::a" tunnel6_b_addr="fd00:2::b" tunnel6_mask="64" +host4_a_addr="192.168.99.99" +host4_b_addr="192.168.88.88" + dummy6_0_prefix="fc00:1000::" dummy6_1_prefix="fc00:1001::" dummy6_mask="64" @@ -681,13 +690,7 @@ setup_xfrm() { } setup_nettest_xfrm() { - if ! which nettest >/dev/null; then - PATH=$PWD:$PATH - if ! which nettest >/dev/null; then - echo "'nettest' command not found; skipping tests" - return 1 - fi - fi + check_gen_prog "nettest" [ ${1} -eq 6 ] && proto="-6" || proto="" port=${2} @@ -842,25 +845,97 @@ setup_bridge() { run_cmd ${ns_a} ip link set veth_A-C master br0 } +setup_ovs_via_internal_utility() { + type="${1}" + a_addr="${2}" + b_addr="${3}" + dport="${4}" + + run_cmd python3 ./openvswitch/ovs-dpctl.py add-if ovs_br0 ${type}_a -t ${type} || return 1 + + ports=$(python3 ./openvswitch/ovs-dpctl.py show) + br0_port=$(echo "$ports" | grep -E "\sovs_br0" | sed -e 's@port @@' | cut -d: -f1 | xargs) + type_a_port=$(echo "$ports" | grep ${type}_a | sed -e 's@port @@' | cut -d: -f1 | xargs) + veth_a_port=$(echo "$ports" | grep veth_A | sed -e 's@port @@' | cut -d: -f1 | xargs) + + v4_a_tun="${prefix4}.${a_r1}.1" + v4_b_tun="${prefix4}.${b_r1}.1" + + v6_a_tun="${prefix6}:${a_r1}::1" + v6_b_tun="${prefix6}:${b_r1}::1" + + if [ "${v4_a_tun}" = "${a_addr}" ]; then + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0800),ipv4()" \ + "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x86dd),ipv6()" \ + "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0800),ipv4()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x86dd),ipv6()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0806),arp()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0806),arp(sip=${veth4_c_addr},tip=${tunnel4_b_addr})" \ + "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + else + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0800),ipv4()" \ + "set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x86dd),ipv6()" \ + "set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,ipv6_src=${v6_b_tun},ipv6_dst=${v6_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0800),ipv4()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,ipv6_src=${v6_b_tun},ipv6_dst=${v6_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x86dd),ipv6()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,ipv6_src=${v6_b_tun},ipv6_dst=${v6_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0806),arp()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0806),arp(sip=${veth4_c_addr},tip=${tunnel4_b_addr})" \ + "set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + fi +} + +setup_ovs_via_vswitchd() { + type="${1}" + b_addr="${2}" + + run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \ + set interface ${type}_a type=${type} \ + options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1 +} + setup_ovs_vxlan_or_geneve() { type="${1}" a_addr="${2}" b_addr="${3}" + dport="6081" if [ "${type}" = "vxlan" ]; then + dport="4789" opts="${opts} ttl 64 dstport 4789" opts_b="local ${b_addr}" fi - run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \ - set interface ${type}_a type=${type} \ - options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1 + setup_ovs_via_internal_utility "${type}" "${a_addr}" "${b_addr}" \ + "${dport}" || \ + setup_ovs_via_vswitchd "${type}" "${b_addr}" || return 1 run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b + run_cmd ip link set ${type}_a up run_cmd ${ns_b} ip link set ${type}_b up } @@ -880,8 +955,24 @@ setup_ovs_vxlan6() { setup_ovs_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 } +setup_ovs_br_internal() { + run_cmd python3 ./openvswitch/ovs-dpctl.py add-dp ovs_br0 || \ + return 1 +} + +setup_ovs_br_vswitchd() { + run_cmd ovs-vsctl add-br ovs_br0 || return 1 +} + +setup_ovs_add_if() { + ifname="${1}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-if ovs_br0 \ + "${ifname}" || \ + run_cmd ovs-vsctl add-port ovs_br0 "${ifname}" +} + setup_ovs_bridge() { - run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip + setup_ovs_br_internal || setup_ovs_br_vswitchd || return $ksft_skip run_cmd ip link set ovs_br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C @@ -891,7 +982,7 @@ setup_ovs_bridge() { run_cmd ${ns_c} ip link set veth_C-A up run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A - run_cmd ovs-vsctl add-port ovs_br0 veth_A-C + setup_ovs_add_if veth_A-C # Move veth_A-R1 to init run_cmd ${ns_a} ip link set veth_A-R1 netns 1 @@ -902,6 +993,52 @@ setup_ovs_bridge() { run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2 } +setup_multipath_new() { + # Set up host A with multipath routes to host B host4_b_addr + run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo + run_cmd ${ns_a} ip nexthop add id 401 via ${prefix4}.${a_r1}.2 dev veth_A-R1 + run_cmd ${ns_a} ip nexthop add id 402 via ${prefix4}.${a_r2}.2 dev veth_A-R2 + run_cmd ${ns_a} ip nexthop add id 403 group 401/402 + run_cmd ${ns_a} ip route add ${host4_b_addr} src ${host4_a_addr} nhid 403 + + # Set up host B with multipath routes to host A host4_a_addr + run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo + run_cmd ${ns_b} ip nexthop add id 401 via ${prefix4}.${b_r1}.2 dev veth_B-R1 + run_cmd ${ns_b} ip nexthop add id 402 via ${prefix4}.${b_r2}.2 dev veth_B-R2 + run_cmd ${ns_b} ip nexthop add id 403 group 401/402 + run_cmd ${ns_b} ip route add ${host4_a_addr} src ${host4_b_addr} nhid 403 +} + +setup_multipath_old() { + # Set up host A with multipath routes to host B host4_b_addr + run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo + run_cmd ${ns_a} ip route add ${host4_b_addr} \ + src ${host4_a_addr} \ + nexthop via ${prefix4}.${a_r1}.2 weight 1 \ + nexthop via ${prefix4}.${a_r2}.2 weight 1 + + # Set up host B with multipath routes to host A host4_a_addr + run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo + run_cmd ${ns_b} ip route add ${host4_a_addr} \ + src ${host4_b_addr} \ + nexthop via ${prefix4}.${b_r1}.2 weight 1 \ + nexthop via ${prefix4}.${b_r2}.2 weight 1 +} + +setup_multipath() { + if [ "$USE_NH" = "yes" ]; then + setup_multipath_new + else + setup_multipath_old + fi + + # Set up routers with routes to dummies + run_cmd ${ns_r1} ip route add ${host4_a_addr} via ${prefix4}.${a_r1}.1 + run_cmd ${ns_r2} ip route add ${host4_a_addr} via ${prefix4}.${a_r2}.1 + run_cmd ${ns_r1} ip route add ${host4_b_addr} via ${prefix4}.${b_r1}.1 + run_cmd ${ns_r2} ip route add ${host4_b_addr} via ${prefix4}.${b_r2}.1 +} + setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip @@ -922,6 +1059,18 @@ trace() { sleep 1 } +cleanup_del_ovs_internal() { + # squelch the output of the del-if commands since it can be wordy + python3 ./openvswitch/ovs-dpctl.py del-if ovs_br0 -d true vxlan_a >/dev/null 2>&1 + python3 ./openvswitch/ovs-dpctl.py del-if ovs_br0 -d true geneve_a >/dev/null 2>&1 + python3 ./openvswitch/ovs-dpctl.py del-dp ovs_br0 >/dev/null 2>&1 +} + +cleanup_del_ovs_vswitchd() { + ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null + ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null +} + cleanup() { for pid in ${tcpdump_pids}; do kill ${pid} @@ -940,10 +1089,10 @@ cleanup() { cleanup_all_ns - ip link del veth_A-C 2>/dev/null - ip link del veth_A-R1 2>/dev/null - ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null - ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null + ip link del veth_A-C 2>/dev/null + ip link del veth_A-R1 2>/dev/null + cleanup_del_ovs_internal + cleanup_del_ovs_vswitchd rm -f "$tmpoutfile" } @@ -982,23 +1131,15 @@ link_get_mtu() { } route_get_dst_exception() { - ns_cmd="${1}" - dst="${2}" - dsfield="${3}" - - if [ -z "${dsfield}" ]; then - dsfield=0 - fi + ns_cmd="${1}"; shift - ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}" + ${ns_cmd} ip route get "$@" } route_get_dst_pmtu_from_exception() { - ns_cmd="${1}" - dst="${2}" - dsfield="${3}" + ns_cmd="${1}"; shift - mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")" + mtu_parse "$(route_get_dst_exception "${ns_cmd}" "$@")" } check_pmtu_value() { @@ -1141,10 +1282,10 @@ test_pmtu_ipv4_dscp_icmp_exception() { run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}" # Check that exceptions have been created with the correct PMTU - pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")" check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 - pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")" check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 } @@ -1191,9 +1332,9 @@ test_pmtu_ipv4_dscp_udp_exception() { UDP:"${dst2}":50000,tos="${dsfield}" # Check that exceptions have been created with the correct PMTU - pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")" check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 - pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")" check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 } @@ -1347,7 +1488,7 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { size=$(du -sb $tmpoutfile) size=${size%%/tmp/*} - [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1 + [ $size -ne 1048576 ] && err "File size $size mismatches expected value in locally bridged vxlan test" && return 1 done rm -f "$tmpoutfile" @@ -1397,6 +1538,12 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { outer_family=${3} ll_mtu=4000 + if [ "${type}" = "vxlan" ]; then + tun_a="vxlan_sys_4789" + elif [ "${type}" = "geneve" ]; then + tun_a="genev_sys_6081" + fi + if [ ${outer_family} -eq 4 ]; then setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header @@ -1407,17 +1554,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi - if [ "${type}" = "vxlan" ]; then - tun_a="vxlan_sys_4789" - elif [ "${type}" = "geneve" ]; then - tun_a="genev_sys_6081" - fi - - trace "" "${tun_a}" "${ns_b}" ${type}_b \ - "" veth_A-R1 "${ns_r1}" veth_R1-A \ - "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \ - "" ovs_br0 "" veth-A-C \ - "${ns_c}" veth_C-A + trace "" ${type}_a "${ns_b}" ${type}_b \ + "" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \ + "" ovs_br0 "" veth-A_C \ + "${ns_c}" veth_C-A "" "${tun_a}" if [ ${family} -eq 4 ]; then ping=ping @@ -1436,8 +1577,9 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { mtu "${ns_b}" veth_B-R1 ${ll_mtu} mtu "${ns_r1}" veth_R1-B ${ll_mtu} - mtu "" ${tun_a} $((${ll_mtu} + 1000)) - mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) + mtu "" ${tun_a} $((${ll_mtu} + 1000)) 2>/dev/null || \ + mtu "" ${type}_a $((${ll_mtu} + 1000)) 2>/dev/null + mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1 @@ -1961,7 +2103,7 @@ check_running() { pid=${1} cmd=${2} - [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ] + [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "${cmd}" ] } test_cleanup_vxlanX_exception() { @@ -2234,6 +2376,36 @@ test_pmtu_ipv6_route_change() { test_pmtu_ipvX_route_change 6 } +test_pmtu_ipv4_mp_exceptions() { + setup namespaces routing multipath || return $ksft_skip + + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1500 + mtu "${ns_b}" veth_B-R1 1500 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Ping and expect two nexthop exceptions for two routes + run_cmd ${ns_a} ping -q -M want -i 0.1 -c 1 -s 1800 "${host4_b_addr}" + + # Check that exceptions have been created with the correct PMTU + pmtu_a_R1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R1)" + pmtu_a_R2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R2)" + + check_pmtu_value "1500" "${pmtu_a_R1}" "exceeding MTU (veth_A-R1)" || return 1 + check_pmtu_value "1500" "${pmtu_a_R2}" "exceeding MTU (veth_A-R2)" || return 1 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." diff --git a/tools/testing/selftests/net/proc_net_pktgen.c b/tools/testing/selftests/net/proc_net_pktgen.c new file mode 100644 index 000000000000..69444fb29577 --- /dev/null +++ b/tools/testing/selftests/net/proc_net_pktgen.c @@ -0,0 +1,690 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * proc_net_pktgen: kselftest for /proc/net/pktgen interface + * + * Copyright (c) 2025 Peter Seiderer <ps.report@gmx.net> + * + */ +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +static const char ctrl_cmd_stop[] = "stop"; +static const char ctrl_cmd_start[] = "start"; +static const char ctrl_cmd_reset[] = "reset"; + +static const char wrong_ctrl_cmd[] = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"; + +static const char thr_cmd_add_loopback_0[] = "add_device lo@0"; +static const char thr_cmd_rm_loopback_0[] = "rem_device_all"; + +static const char wrong_thr_cmd[] = "forsureawrongcommand"; +static const char legacy_thr_cmd[] = "max_before_softirq"; + +static const char wrong_dev_cmd[] = "forsurewrongcommand"; +static const char dev_cmd_min_pkt_size_0[] = "min_pkt_size"; +static const char dev_cmd_min_pkt_size_1[] = "min_pkt_size "; +static const char dev_cmd_min_pkt_size_2[] = "min_pkt_size 0"; +static const char dev_cmd_min_pkt_size_3[] = "min_pkt_size 1"; +static const char dev_cmd_min_pkt_size_4[] = "min_pkt_size 100"; +static const char dev_cmd_min_pkt_size_5[] = "min_pkt_size=1001"; +static const char dev_cmd_min_pkt_size_6[] = "min_pkt_size =2002"; +static const char dev_cmd_min_pkt_size_7[] = "min_pkt_size= 3003"; +static const char dev_cmd_min_pkt_size_8[] = "min_pkt_size = 4004"; +static const char dev_cmd_max_pkt_size_0[] = "max_pkt_size 200"; +static const char dev_cmd_pkt_size_0[] = "pkt_size 300"; +static const char dev_cmd_imix_weights_0[] = "imix_weights 0,7 576,4 1500,1"; +static const char dev_cmd_imix_weights_1[] = "imix_weights 101,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20"; +static const char dev_cmd_imix_weights_2[] = "imix_weights 100,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20 121,21"; +static const char dev_cmd_imix_weights_3[] = "imix_weights"; +static const char dev_cmd_imix_weights_4[] = "imix_weights "; +static const char dev_cmd_imix_weights_5[] = "imix_weights 0"; +static const char dev_cmd_imix_weights_6[] = "imix_weights 0,"; +static const char dev_cmd_debug_0[] = "debug 1"; +static const char dev_cmd_debug_1[] = "debug 0"; +static const char dev_cmd_frags_0[] = "frags 100"; +static const char dev_cmd_delay_0[] = "delay 100"; +static const char dev_cmd_delay_1[] = "delay 2147483647"; +static const char dev_cmd_rate_0[] = "rate 0"; +static const char dev_cmd_rate_1[] = "rate 100"; +static const char dev_cmd_ratep_0[] = "ratep 0"; +static const char dev_cmd_ratep_1[] = "ratep 200"; +static const char dev_cmd_udp_src_min_0[] = "udp_src_min 1"; +static const char dev_cmd_udp_dst_min_0[] = "udp_dst_min 2"; +static const char dev_cmd_udp_src_max_0[] = "udp_src_max 3"; +static const char dev_cmd_udp_dst_max_0[] = "udp_dst_max 4"; +static const char dev_cmd_clone_skb_0[] = "clone_skb 1"; +static const char dev_cmd_clone_skb_1[] = "clone_skb 0"; +static const char dev_cmd_count_0[] = "count 100"; +static const char dev_cmd_src_mac_count_0[] = "src_mac_count 100"; +static const char dev_cmd_dst_mac_count_0[] = "dst_mac_count 100"; +static const char dev_cmd_burst_0[] = "burst 0"; +static const char dev_cmd_node_0[] = "node 100"; +static const char dev_cmd_xmit_mode_0[] = "xmit_mode start_xmit"; +static const char dev_cmd_xmit_mode_1[] = "xmit_mode netif_receive"; +static const char dev_cmd_xmit_mode_2[] = "xmit_mode queue_xmit"; +static const char dev_cmd_xmit_mode_3[] = "xmit_mode nonsense"; +static const char dev_cmd_flag_0[] = "flag UDPCSUM"; +static const char dev_cmd_flag_1[] = "flag !UDPCSUM"; +static const char dev_cmd_flag_2[] = "flag nonsense"; +static const char dev_cmd_dst_min_0[] = "dst_min 101.102.103.104"; +static const char dev_cmd_dst_0[] = "dst 101.102.103.104"; +static const char dev_cmd_dst_max_0[] = "dst_max 201.202.203.204"; +static const char dev_cmd_dst6_0[] = "dst6 2001:db38:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_dst6_min_0[] = "dst6_min 2001:db8:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_dst6_max_0[] = "dst6_max 2001:db8:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_src6_0[] = "src6 2001:db38:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_src_min_0[] = "src_min 101.102.103.104"; +static const char dev_cmd_src_max_0[] = "src_max 201.202.203.204"; +static const char dev_cmd_dst_mac_0[] = "dst_mac 01:02:03:04:05:06"; +static const char dev_cmd_src_mac_0[] = "src_mac 11:12:13:14:15:16"; +static const char dev_cmd_clear_counters_0[] = "clear_counters"; +static const char dev_cmd_flows_0[] = "flows 100"; +static const char dev_cmd_spi_0[] = "spi 100"; +static const char dev_cmd_flowlen_0[] = "flowlen 100"; +static const char dev_cmd_queue_map_min_0[] = "queue_map_min 1"; +static const char dev_cmd_queue_map_max_0[] = "queue_map_max 2"; +static const char dev_cmd_mpls_0[] = "mpls 00000001"; +static const char dev_cmd_mpls_1[] = "mpls 00000001,000000f2"; +static const char dev_cmd_mpls_2[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f"; +static const char dev_cmd_mpls_3[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f,00000f10"; +static const char dev_cmd_vlan_id_0[] = "vlan_id 1"; +static const char dev_cmd_vlan_p_0[] = "vlan_p 1"; +static const char dev_cmd_vlan_cfi_0[] = "vlan_cfi 1"; +static const char dev_cmd_vlan_id_1[] = "vlan_id 4096"; +static const char dev_cmd_svlan_id_0[] = "svlan_id 1"; +static const char dev_cmd_svlan_p_0[] = "svlan_p 1"; +static const char dev_cmd_svlan_cfi_0[] = "svlan_cfi 1"; +static const char dev_cmd_svlan_id_1[] = "svlan_id 4096"; +static const char dev_cmd_tos_0[] = "tos 0"; +static const char dev_cmd_tos_1[] = "tos 0f"; +static const char dev_cmd_tos_2[] = "tos 0ff"; +static const char dev_cmd_traffic_class_0[] = "traffic_class f0"; +static const char dev_cmd_skb_priority_0[] = "skb_priority 999"; + +FIXTURE(proc_net_pktgen) { + int ctrl_fd; + int thr_fd; + int dev_fd; +}; + +FIXTURE_SETUP(proc_net_pktgen) { + int r; + ssize_t len; + + r = system("modprobe pktgen"); + ASSERT_EQ(r, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?"); + + self->ctrl_fd = open("/proc/net/pktgen/pgctrl", O_RDWR); + ASSERT_GE(self->ctrl_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?"); + + self->thr_fd = open("/proc/net/pktgen/kpktgend_0", O_RDWR); + ASSERT_GE(self->thr_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?"); + + len = write(self->thr_fd, thr_cmd_add_loopback_0, sizeof(thr_cmd_add_loopback_0)); + ASSERT_EQ(len, sizeof(thr_cmd_add_loopback_0)) TH_LOG("device lo@0 already registered?"); + + self->dev_fd = open("/proc/net/pktgen/lo@0", O_RDWR); + ASSERT_GE(self->dev_fd, 0) TH_LOG("device entry for lo@0 missing?"); +} + +FIXTURE_TEARDOWN(proc_net_pktgen) { + int ret; + ssize_t len; + + ret = close(self->dev_fd); + EXPECT_EQ(ret, 0); + + len = write(self->thr_fd, thr_cmd_rm_loopback_0, sizeof(thr_cmd_rm_loopback_0)); + EXPECT_EQ(len, sizeof(thr_cmd_rm_loopback_0)); + + ret = close(self->thr_fd); + EXPECT_EQ(ret, 0); + + ret = close(self->ctrl_fd); + EXPECT_EQ(ret, 0); +} + +TEST_F(proc_net_pktgen, wrong_ctrl_cmd) { + for (int i = 0; i <= sizeof(wrong_ctrl_cmd); i++) { + ssize_t len; + + len = write(self->ctrl_fd, wrong_ctrl_cmd, i); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(proc_net_pktgen, ctrl_cmd) { + ssize_t len; + + len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop)); + EXPECT_EQ(len, sizeof(ctrl_cmd_stop)); + + len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop) - 1); + EXPECT_EQ(len, sizeof(ctrl_cmd_stop) - 1); + + len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start)); + EXPECT_EQ(len, sizeof(ctrl_cmd_start)); + + len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start) - 1); + EXPECT_EQ(len, sizeof(ctrl_cmd_start) - 1); + + len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset)); + EXPECT_EQ(len, sizeof(ctrl_cmd_reset)); + + len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset) - 1); + EXPECT_EQ(len, sizeof(ctrl_cmd_reset) - 1); +} + +TEST_F(proc_net_pktgen, wrong_thr_cmd) { + for (int i = 0; i <= sizeof(wrong_thr_cmd); i++) { + ssize_t len; + + len = write(self->thr_fd, wrong_thr_cmd, i); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(proc_net_pktgen, legacy_thr_cmd) { + for (int i = 0; i <= sizeof(legacy_thr_cmd); i++) { + ssize_t len; + + len = write(self->thr_fd, legacy_thr_cmd, i); + if (i < (sizeof(legacy_thr_cmd) - 1)) { + /* incomplete command string */ + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } else { + /* complete command string without/with trailing '\0' */ + EXPECT_EQ(len, i); + } + } +} + +TEST_F(proc_net_pktgen, wrong_dev_cmd) { + for (int i = 0; i <= sizeof(wrong_dev_cmd); i++) { + ssize_t len; + + len = write(self->dev_fd, wrong_dev_cmd, i); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(proc_net_pktgen, dev_cmd_min_pkt_size) { + ssize_t len; + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0)); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0) - 1); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0) - 1); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1)); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1) - 1); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1) - 1); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2)); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2) - 1); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2) - 1); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_3, sizeof(dev_cmd_min_pkt_size_3)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_3)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_4, sizeof(dev_cmd_min_pkt_size_4)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_4)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_5, sizeof(dev_cmd_min_pkt_size_5)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_5)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_6, sizeof(dev_cmd_min_pkt_size_6)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_6)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_7, sizeof(dev_cmd_min_pkt_size_7)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_7)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_8, sizeof(dev_cmd_min_pkt_size_8)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_8)); +} + +TEST_F(proc_net_pktgen, dev_cmd_max_pkt_size) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_max_pkt_size_0, sizeof(dev_cmd_max_pkt_size_0)); + EXPECT_EQ(len, sizeof(dev_cmd_max_pkt_size_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_pkt_size) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_pkt_size_0, sizeof(dev_cmd_pkt_size_0)); + EXPECT_EQ(len, sizeof(dev_cmd_pkt_size_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_imix_weights) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_imix_weights_0, sizeof(dev_cmd_imix_weights_0)); + EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_0)); + + len = write(self->dev_fd, dev_cmd_imix_weights_1, sizeof(dev_cmd_imix_weights_1)); + EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_1)); + + len = write(self->dev_fd, dev_cmd_imix_weights_2, sizeof(dev_cmd_imix_weights_2)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, E2BIG); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); +} + +TEST_F(proc_net_pktgen, dev_cmd_debug) { + ssize_t len; + + /* debug on */ + len = write(self->dev_fd, dev_cmd_debug_0, sizeof(dev_cmd_debug_0)); + EXPECT_EQ(len, sizeof(dev_cmd_debug_0)); + + /* debug off */ + len = write(self->dev_fd, dev_cmd_debug_1, sizeof(dev_cmd_debug_1)); + EXPECT_EQ(len, sizeof(dev_cmd_debug_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_frags) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_frags_0, sizeof(dev_cmd_frags_0)); + EXPECT_EQ(len, sizeof(dev_cmd_frags_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_delay) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_delay_0, sizeof(dev_cmd_delay_0)); + EXPECT_EQ(len, sizeof(dev_cmd_delay_0)); + + len = write(self->dev_fd, dev_cmd_delay_1, sizeof(dev_cmd_delay_1)); + EXPECT_EQ(len, sizeof(dev_cmd_delay_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_rate) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_rate_0, sizeof(dev_cmd_rate_0)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + len = write(self->dev_fd, dev_cmd_rate_1, sizeof(dev_cmd_rate_1)); + EXPECT_EQ(len, sizeof(dev_cmd_rate_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_ratep) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_ratep_0, sizeof(dev_cmd_ratep_0)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + len = write(self->dev_fd, dev_cmd_ratep_1, sizeof(dev_cmd_ratep_1)); + EXPECT_EQ(len, sizeof(dev_cmd_ratep_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_src_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_src_min_0, sizeof(dev_cmd_udp_src_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_src_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_dst_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_dst_min_0, sizeof(dev_cmd_udp_dst_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_src_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_src_max_0, sizeof(dev_cmd_udp_src_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_src_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_dst_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_dst_max_0, sizeof(dev_cmd_udp_dst_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_clone_skb) { + ssize_t len; + + /* clone_skb on (gives EOPNOTSUPP on lo device) */ + len = write(self->dev_fd, dev_cmd_clone_skb_0, sizeof(dev_cmd_clone_skb_0)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EOPNOTSUPP); + + /* clone_skb off */ + len = write(self->dev_fd, dev_cmd_clone_skb_1, sizeof(dev_cmd_clone_skb_1)); + EXPECT_EQ(len, sizeof(dev_cmd_clone_skb_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_count) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_count_0, sizeof(dev_cmd_count_0)); + EXPECT_EQ(len, sizeof(dev_cmd_count_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_mac_count) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_mac_count_0, sizeof(dev_cmd_src_mac_count_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_mac_count_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_mac_count) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_mac_count_0, sizeof(dev_cmd_dst_mac_count_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_count_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_burst) { + ssize_t len; + + /* burst off */ + len = write(self->dev_fd, dev_cmd_burst_0, sizeof(dev_cmd_burst_0)); + EXPECT_EQ(len, sizeof(dev_cmd_burst_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_node) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_node_0, sizeof(dev_cmd_node_0)); + EXPECT_EQ(len, sizeof(dev_cmd_node_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_xmit_mode) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_xmit_mode_0, sizeof(dev_cmd_xmit_mode_0)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_0)); + + len = write(self->dev_fd, dev_cmd_xmit_mode_1, sizeof(dev_cmd_xmit_mode_1)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_1)); + + len = write(self->dev_fd, dev_cmd_xmit_mode_2, sizeof(dev_cmd_xmit_mode_2)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_2)); + + len = write(self->dev_fd, dev_cmd_xmit_mode_3, sizeof(dev_cmd_xmit_mode_3)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_3)); +} + +TEST_F(proc_net_pktgen, dev_cmd_flag) { + ssize_t len; + + /* flag UDPCSUM on */ + len = write(self->dev_fd, dev_cmd_flag_0, sizeof(dev_cmd_flag_0)); + EXPECT_EQ(len, sizeof(dev_cmd_flag_0)); + + /* flag UDPCSUM off */ + len = write(self->dev_fd, dev_cmd_flag_1, sizeof(dev_cmd_flag_1)); + EXPECT_EQ(len, sizeof(dev_cmd_flag_1)); + + /* flag invalid */ + len = write(self->dev_fd, dev_cmd_flag_2, sizeof(dev_cmd_flag_2)); + EXPECT_EQ(len, sizeof(dev_cmd_flag_2)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_min_0, sizeof(dev_cmd_dst_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_0, sizeof(dev_cmd_dst_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_max_0, sizeof(dev_cmd_dst_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst6) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst6_0, sizeof(dev_cmd_dst6_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst6_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst6_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst6_min_0, sizeof(dev_cmd_dst6_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst6_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst6_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst6_max_0, sizeof(dev_cmd_dst6_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst6_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src6) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src6_0, sizeof(dev_cmd_src6_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src6_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_min_0, sizeof(dev_cmd_src_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_max_0, sizeof(dev_cmd_src_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_mac) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_mac_0, sizeof(dev_cmd_dst_mac_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_mac) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_mac_0, sizeof(dev_cmd_src_mac_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_mac_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_clear_counters) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_clear_counters_0, sizeof(dev_cmd_clear_counters_0)); + EXPECT_EQ(len, sizeof(dev_cmd_clear_counters_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_flows) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_flows_0, sizeof(dev_cmd_flows_0)); + EXPECT_EQ(len, sizeof(dev_cmd_flows_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_spi) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_spi_0, sizeof(dev_cmd_spi_0)); + EXPECT_EQ(len, sizeof(dev_cmd_spi_0)) TH_LOG("CONFIG_XFRM not enabled?"); +} + +TEST_F(proc_net_pktgen, dev_cmd_flowlen) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_flowlen_0, sizeof(dev_cmd_flowlen_0)); + EXPECT_EQ(len, sizeof(dev_cmd_flowlen_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_queue_map_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_queue_map_min_0, sizeof(dev_cmd_queue_map_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_queue_map_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_queue_map_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_queue_map_max_0, sizeof(dev_cmd_queue_map_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_queue_map_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_mpls) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_mpls_0, sizeof(dev_cmd_mpls_0)); + EXPECT_EQ(len, sizeof(dev_cmd_mpls_0)); + + len = write(self->dev_fd, dev_cmd_mpls_1, sizeof(dev_cmd_mpls_1)); + EXPECT_EQ(len, sizeof(dev_cmd_mpls_1)); + + len = write(self->dev_fd, dev_cmd_mpls_2, sizeof(dev_cmd_mpls_2)); + EXPECT_EQ(len, sizeof(dev_cmd_mpls_2)); + + len = write(self->dev_fd, dev_cmd_mpls_3, sizeof(dev_cmd_mpls_3)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, E2BIG); +} + +TEST_F(proc_net_pktgen, dev_cmd_vlan_id) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_vlan_id_0, sizeof(dev_cmd_vlan_id_0)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_0)); + + len = write(self->dev_fd, dev_cmd_vlan_p_0, sizeof(dev_cmd_vlan_p_0)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_p_0)); + + len = write(self->dev_fd, dev_cmd_vlan_cfi_0, sizeof(dev_cmd_vlan_cfi_0)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_cfi_0)); + + len = write(self->dev_fd, dev_cmd_vlan_id_1, sizeof(dev_cmd_vlan_id_1)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_svlan_id) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_svlan_id_0, sizeof(dev_cmd_svlan_id_0)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_0)); + + len = write(self->dev_fd, dev_cmd_svlan_p_0, sizeof(dev_cmd_svlan_p_0)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_p_0)); + + len = write(self->dev_fd, dev_cmd_svlan_cfi_0, sizeof(dev_cmd_svlan_cfi_0)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_cfi_0)); + + len = write(self->dev_fd, dev_cmd_svlan_id_1, sizeof(dev_cmd_svlan_id_1)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_1)); +} + + +TEST_F(proc_net_pktgen, dev_cmd_tos) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_tos_0, sizeof(dev_cmd_tos_0)); + EXPECT_EQ(len, sizeof(dev_cmd_tos_0)); + + len = write(self->dev_fd, dev_cmd_tos_1, sizeof(dev_cmd_tos_1)); + EXPECT_EQ(len, sizeof(dev_cmd_tos_1)); + + len = write(self->dev_fd, dev_cmd_tos_2, sizeof(dev_cmd_tos_2)); + EXPECT_EQ(len, sizeof(dev_cmd_tos_2)); +} + + +TEST_F(proc_net_pktgen, dev_cmd_traffic_class) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_traffic_class_0, sizeof(dev_cmd_traffic_class_0)); + EXPECT_EQ(len, sizeof(dev_cmd_traffic_class_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_skb_priority) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_skb_priority_0, sizeof(dev_cmd_skb_priority_0)); + EXPECT_EQ(len, sizeof(dev_cmd_skb_priority_0)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 1a736f700be4..84c524357075 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -48,6 +48,7 @@ #include <string.h> #include <sys/mman.h> #include <sys/socket.h> +#include <sys/ioctl.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> @@ -59,6 +60,33 @@ static uint32_t cfg_max_num_members; +static void loopback_set_up_down(int state_up) +{ + struct ifreq ifreq = {}; + int fd, err; + + fd = socket(AF_PACKET, SOCK_RAW, 0); + if (fd < 0) { + perror("socket loopback"); + exit(1); + } + strcpy(ifreq.ifr_name, "lo"); + err = ioctl(fd, SIOCGIFFLAGS, &ifreq); + if (err) { + perror("SIOCGIFFLAGS"); + exit(1); + } + if (state_up != !!(ifreq.ifr_flags & IFF_UP)) { + ifreq.ifr_flags ^= IFF_UP; + err = ioctl(fd, SIOCSIFFLAGS, &ifreq); + if (err) { + perror("SIOCSIFFLAGS"); + exit(1); + } + } + close(fd); +} + /* Open a socket in a given fanout mode. * @return -1 if mode is bad, a valid socket otherwise */ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) @@ -165,9 +193,9 @@ static void sock_fanout_set_ebpf(int fd) attr.insns = (unsigned long) prog; attr.insn_cnt = ARRAY_SIZE(prog); attr.license = (unsigned long) "GPL"; - attr.log_buf = (unsigned long) log_buf, - attr.log_size = sizeof(log_buf), - attr.log_level = 1, + attr.log_buf = (unsigned long) log_buf; + attr.log_size = sizeof(log_buf); + attr.log_level = 1; pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); if (pfd < 0) { @@ -251,6 +279,41 @@ static int sock_fanout_read(int fds[], char *rings[], const int expect[]) return 0; } +/* Test that creating/joining a fanout group fails for unbound socket without + * a specified protocol + */ +static void test_unbound_fanout(void) +{ + int val, fd0, fd1, err; + + fprintf(stderr, "test: unbound fanout\n"); + fd0 = socket(PF_PACKET, SOCK_RAW, 0); + if (fd0 < 0) { + perror("socket packet"); + exit(1); + } + /* Try to create a new fanout group. Should fail. */ + val = (PACKET_FANOUT_HASH << 16) | 1; + err = setsockopt(fd0, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val)); + if (!err) { + fprintf(stderr, "ERROR: unbound socket fanout create\n"); + exit(1); + } + fd1 = sock_fanout_open(PACKET_FANOUT_HASH, 1); + if (fd1 == -1) { + fprintf(stderr, "ERROR: failed to open HASH socket\n"); + exit(1); + } + /* Try to join an existing fanout group. Should fail. */ + err = setsockopt(fd0, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val)); + if (!err) { + fprintf(stderr, "ERROR: unbound socket fanout join\n"); + exit(1); + } + close(fd0); + close(fd1); +} + /* Test illegal mode + flag combination */ static void test_control_single(void) { @@ -264,17 +327,22 @@ static void test_control_single(void) } /* Test illegal group with different modes or flags */ -static void test_control_group(void) +static void test_control_group(int toggle) { int fds[2]; - fprintf(stderr, "test: control multiple sockets\n"); + if (toggle) + fprintf(stderr, "test: control multiple sockets with link down toggle\n"); + else + fprintf(stderr, "test: control multiple sockets\n"); fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); if (fds[0] == -1) { fprintf(stderr, "ERROR: failed to open HASH socket\n"); exit(1); } + if (toggle) + loopback_set_up_down(0); if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) { fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); @@ -294,6 +362,8 @@ static void test_control_group(void) fprintf(stderr, "ERROR: failed to join group\n"); exit(1); } + if (toggle) + loopback_set_up_down(1); if (close(fds[1]) || close(fds[0])) { fprintf(stderr, "ERROR: closing sockets\n"); exit(1); @@ -488,8 +558,10 @@ int main(int argc, char **argv) const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } }; int port_off = 2, tries = 20, ret; + test_unbound_fanout(); test_control_single(); - test_control_group(); + test_control_group(0); + test_control_group(1); test_control_group_max_num_members(); test_unique_fanout_group_ids(); diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 404a2ce759ab..221270cee3ea 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -12,7 +12,7 @@ * * Datapath: * Open a pair of packet sockets and send resp. receive an a priori known - * packet pattern accross the sockets and check if it was received resp. + * packet pattern across the sockets and check if it was received resp. * sent correctly. Fanout in combination with RX_RING is currently not * tested here. * diff --git a/tools/testing/selftests/net/rds/.gitignore b/tools/testing/selftests/net/rds/.gitignore new file mode 100644 index 000000000000..1c6f04e2aa11 --- /dev/null +++ b/tools/testing/selftests/net/rds/.gitignore @@ -0,0 +1 @@ +include.sh diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile new file mode 100644 index 000000000000..612a7219990e --- /dev/null +++ b/tools/testing/selftests/net/rds/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 + +all: + @echo mk_build_dir="$(shell pwd)" > include.sh + +TEST_PROGS := run.sh + +TEST_FILES := include.sh test.py + +EXTRA_CLEAN := /tmp/rds_logs include.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt new file mode 100644 index 000000000000..cbde2951ab13 --- /dev/null +++ b/tools/testing/selftests/net/rds/README.txt @@ -0,0 +1,41 @@ +RDS self-tests +============== + +These scripts provide a coverage test for RDS-TCP by creating two +network namespaces and running rds packets between them. A loopback +network is provisioned with optional probability of packet loss or +corruption. A workload of 50000 hashes, each 64 characters in size, +are passed over an RDS socket on this test network. A passing test means +the RDS-TCP stack was able to recover properly. The provided config.sh +can be used to compile the kernel with the necessary gcov options. The +kernel may optionally be configured to omit the coverage report as well. + +USAGE: + run.sh [-d logdir] [-l packet_loss] [-c packet_corruption] + [-u packet_duplcate] + +OPTIONS: + -d Log directory. Defaults to tools/testing/selftests/net/rds/rds_logs + + -l Simulates a percentage of packet loss + + -c Simulates a percentage of packet corruption + + -u Simulates a percentage of packet duplication. + +EXAMPLE: + + # Create a suitable gcov enabled .config + tools/testing/selftests/net/rds/config.sh -g + + # Alternatly create a gcov disabled .config + tools/testing/selftests/net/rds/config.sh + + # build the kernel + vng --build --config tools/testing/selftests/net/config + + # launch the tests in a VM + vng -v --rwdir ./ --run . --user root --cpus 4 -- \ + "export PYTHONPATH=tools/testing/selftests/net/; tools/testing/selftests/net/rds/run.sh" + +An HTML coverage report will be output in tools/testing/selftests/net/rds/rds_logs/coverage/. diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh new file mode 100755 index 000000000000..791c8dbe1095 --- /dev/null +++ b/tools/testing/selftests/net/rds/config.sh @@ -0,0 +1,53 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -u +set -x + +unset KBUILD_OUTPUT + +GENERATE_GCOV_REPORT=0 +while getopts "g" opt; do + case ${opt} in + g) + GENERATE_GCOV_REPORT=1 + ;; + :) + echo "USAGE: config.sh [-g]" + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 1 + ;; + esac +done + +CONF_FILE="tools/testing/selftests/net/config" + +# no modules +scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES + +# enable RDS +scripts/config --file "$CONF_FILE" --enable CONFIG_RDS +scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP + +if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then + # instrument RDS and only RDS + scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL + scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS +else + scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS +fi + +# need network namespaces to run tests with veth network interfaces +scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS +scripts/config --file "$CONF_FILE" --enable CONFIG_VETH + +# simulate packet loss +scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM + diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh new file mode 100755 index 000000000000..8aee244f582a --- /dev/null +++ b/tools/testing/selftests/net/rds/run.sh @@ -0,0 +1,224 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -u + +unset KBUILD_OUTPUT + +current_dir="$(realpath "$(dirname "$0")")" +build_dir="$current_dir" + +build_include="$current_dir/include.sh" +if test -f "$build_include"; then + # this include will define "$mk_build_dir" as the location the test was + # built. We will need this if the tests are installed in a location + # other than the kernel source + + source "$build_include" + build_dir="$mk_build_dir" +fi + +# This test requires kernel source and the *.gcda data therein +# Locate the top level of the kernel source, and the net/rds +# subfolder with the appropriate *.gcno object files +ksrc_dir="$(realpath "$build_dir"/../../../../../)" +kconfig="$ksrc_dir/.config" +obj_dir="$ksrc_dir/net/rds" + +GCOV_CMD=gcov + +#check to see if the host has the required packages to generate a gcov report +check_gcov_env() +{ + if ! which "$GCOV_CMD" > /dev/null 2>&1; then + echo "Warning: Could not find gcov. " + GENERATE_GCOV_REPORT=0 + return + fi + + # the gcov version must match the gcc version + GCC_VER=$(gcc -dumpfullversion) + GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| awk 'BEGIN {FS="-"}{print $1}') + if [ "$GCOV_VER" != "$GCC_VER" ]; then + #attempt to find a matching gcov version + GCOV_CMD=gcov-$(gcc -dumpversion) + + if ! which "$GCOV_CMD" > /dev/null 2>&1; then + echo "Warning: Could not find an appropriate gcov installation. \ + gcov version must match gcc version" + GENERATE_GCOV_REPORT=0 + return + fi + + #recheck version number of found gcov executable + GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| \ + awk 'BEGIN {FS="-"}{print $1}') + if [ "$GCOV_VER" != "$GCC_VER" ]; then + echo "Warning: Could not find an appropriate gcov installation. \ + gcov version must match gcc version" + GENERATE_GCOV_REPORT=0 + else + echo "Warning: Mismatched gcc and gcov detected. Using $GCOV_CMD" + fi + fi +} + +# Check to see if the kconfig has the required configs to generate a coverage report +check_gcov_conf() +{ + if ! grep -x "CONFIG_GCOV_PROFILE_RDS=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_PROFILE_RDS should be enabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + if ! grep -x "CONFIG_GCOV_KERNEL=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_KERNEL should be enabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + if grep -x "CONFIG_GCOV_PROFILE_ALL=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_PROFILE_ALL should be disabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + + if [ "$GENERATE_GCOV_REPORT" -eq 0 ]; then + echo "To enable gcov reports, please run "\ + "\"tools/testing/selftests/net/rds/config.sh -g\" and rebuild the kernel" + else + # if we have the required kernel configs, proceed to check the environment to + # ensure we have the required gcov packages + check_gcov_env + fi +} + +# Kselftest framework requirement - SKIP code is 4. +check_conf_enabled() { + if ! grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then + echo "selftests: [SKIP] This test requires $1 enabled" + echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel" + exit 4 + fi +} +check_conf_disabled() { + if grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then + echo "selftests: [SKIP] This test requires $1 disabled" + echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel" + exit 4 + fi +} +check_conf() { + check_conf_enabled CONFIG_NET_SCH_NETEM + check_conf_enabled CONFIG_VETH + check_conf_enabled CONFIG_NET_NS + check_conf_enabled CONFIG_RDS_TCP + check_conf_enabled CONFIG_RDS + check_conf_disabled CONFIG_MODULES +} + +check_env() +{ + if ! test -d "$obj_dir"; then + echo "selftests: [SKIP] This test requires a kernel source tree" + exit 4 + fi + if ! test -e "$kconfig"; then + echo "selftests: [SKIP] This test requires a configured kernel source tree" + exit 4 + fi + if ! which strace > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without strace" + exit 4 + fi + if ! which tcpdump > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without tcpdump" + exit 4 + fi + + if ! which python3 > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without python3" + exit 4 + fi + + python_major=$(python3 -c "import sys; print(sys.version_info[0])") + python_minor=$(python3 -c "import sys; print(sys.version_info[1])") + if [[ python_major -lt 3 || ( python_major -eq 3 && python_minor -lt 9 ) ]] ; then + echo "selftests: [SKIP] Could not run test without at least python3.9" + python3 -V + exit 4 + fi +} + +LOG_DIR="$current_dir"/rds_logs +PLOSS=0 +PCORRUPT=0 +PDUP=0 +GENERATE_GCOV_REPORT=1 +while getopts "d:l:c:u:" opt; do + case ${opt} in + d) + LOG_DIR=${OPTARG} + ;; + l) + PLOSS=${OPTARG} + ;; + c) + PCORRUPT=${OPTARG} + ;; + u) + PDUP=${OPTARG} + ;; + :) + echo "USAGE: run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]" \ + "[-u packet_duplcate] [-g]" + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 1 + ;; + esac +done + + +check_env +check_conf +check_gcov_conf + + +rm -fr "$LOG_DIR" +TRACE_FILE="${LOG_DIR}/rds-strace.txt" +COVR_DIR="${LOG_DIR}/coverage/" +mkdir -p "$LOG_DIR" +mkdir -p "$COVR_DIR" + +set +e +echo running RDS tests... +echo Traces will be logged to "$TRACE_FILE" +rm -f "$TRACE_FILE" +strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \ + -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP" + +test_rc=$? +dmesg > "${LOG_DIR}/dmesg.out" + +if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then + echo saving coverage data... + (set +x; cd /sys/kernel/debug/gcov; find ./* -name '*.gcda' | \ + while read -r f + do + cat < "/sys/kernel/debug/gcov/$f" > "/$f" + done) + + echo running gcovr... + gcovr -s --html-details --gcov-executable "$GCOV_CMD" --gcov-ignore-parse-errors \ + -o "${COVR_DIR}/gcovr" "${ksrc_dir}/net/rds/" +else + echo "Coverage report will be skipped" +fi + +if [ "$test_rc" -eq 0 ]; then + echo "PASS: Test completed successfully" +else + echo "FAIL: Test failed" +fi + +exit "$test_rc" diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py new file mode 100755 index 000000000000..4a7178d11193 --- /dev/null +++ b/tools/testing/selftests/net/rds/test.py @@ -0,0 +1,265 @@ +#! /usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import argparse +import ctypes +import errno +import hashlib +import os +import select +import signal +import socket +import subprocess +import sys +import atexit +from pwd import getpwuid +from os import stat + +# Allow utils module to be imported from different directory +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(this_dir, "../")) +from lib.py.utils import ip + +libc = ctypes.cdll.LoadLibrary('libc.so.6') +setns = libc.setns + +net0 = 'net0' +net1 = 'net1' + +veth0 = 'veth0' +veth1 = 'veth1' + +# Helper function for creating a socket inside a network namespace. +# We need this because otherwise RDS will detect that the two TCP +# sockets are on the same interface and use the loop transport instead +# of the TCP transport. +def netns_socket(netns, *args): + u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) + + child = os.fork() + if child == 0: + # change network namespace + with open(f'/var/run/netns/{netns}') as f: + try: + ret = setns(f.fileno(), 0) + except IOError as e: + print(e.errno) + print(e) + + # create socket in target namespace + s = socket.socket(*args) + + # send resulting socket to parent + socket.send_fds(u0, [], [s.fileno()]) + + sys.exit(0) + + # receive socket from child + _, s, _, _ = socket.recv_fds(u1, 0, 1) + os.waitpid(child, 0) + u0.close() + u1.close() + return socket.fromfd(s[0], *args) + +def signal_handler(sig, frame): + print('Test timed out') + sys.exit(1) + +#Parse out command line arguments. We take an optional +# timeout parameter and an optional log output folder +parser = argparse.ArgumentParser(description="init script args", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-d", "--logdir", action="store", + help="directory to store logs", default="/tmp") +parser.add_argument('--timeout', help="timeout to terminate hung test", + type=int, default=0) +parser.add_argument('-l', '--loss', help="Simulate tcp packet loss", + type=int, default=0) +parser.add_argument('-c', '--corruption', help="Simulate tcp packet corruption", + type=int, default=0) +parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication", + type=int, default=0) +args = parser.parse_args() +logdir=args.logdir +packet_loss=str(args.loss)+'%' +packet_corruption=str(args.corruption)+'%' +packet_duplicate=str(args.duplicate)+'%' + +ip(f"netns add {net0}") +ip(f"netns add {net1}") +ip(f"link add type veth") + +addrs = [ + # we technically don't need different port numbers, but this will + # help identify traffic in the network analyzer + ('10.0.0.1', 10000), + ('10.0.0.2', 20000), +] + +# move interfaces to separate namespaces so they can no longer be +# bound directly; this prevents rds from switching over from the tcp +# transport to the loop transport. +ip(f"link set {veth0} netns {net0} up") +ip(f"link set {veth1} netns {net1} up") + + + +# add addresses +ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}") +ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}") + +# add routes +ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}") +ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}") + +# sanity check that our two interfaces/addresses are correctly set up +# and communicating by doing a single ping +ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}") + +# Start a packet capture on each network +for net in [net0, net1]: + tcpdump_pid = os.fork() + if tcpdump_pid == 0: + pcap = logdir+'/'+net+'.pcap' + subprocess.check_call(['touch', pcap]) + user = getpwuid(stat(pcap).st_uid).pw_name + ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}") + sys.exit(0) + +# simulate packet loss, duplication and corruption +for net, iface in [(net0, veth0), (net1, veth1)]: + ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem \ + corrupt {packet_corruption} loss {packet_loss} duplicate \ + {packet_duplicate}") + +# add a timeout +if args.timeout > 0: + signal.alarm(args.timeout) + signal.signal(signal.SIGALRM, signal_handler) + +sockets = [ + netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET), + netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET), +] + +for s, addr in zip(sockets, addrs): + s.bind(addr) + s.setblocking(0) + +fileno_to_socket = { + s.fileno(): s for s in sockets +} + +addr_to_socket = { + addr: s for addr, s in zip(addrs, sockets) +} + +socket_to_addr = { + s: addr for addr, s in zip(addrs, sockets) +} + +send_hashes = {} +recv_hashes = {} + +ep = select.epoll() + +for s in sockets: + ep.register(s, select.EPOLLRDNORM) + +n = 50000 +nr_send = 0 +nr_recv = 0 + +while nr_send < n: + # Send as much as we can without blocking + print("sending...", nr_send, nr_recv) + while nr_send < n: + send_data = hashlib.sha256( + f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8') + + # pseudo-random send/receive pattern + sender = sockets[nr_send % 2] + receiver = sockets[1 - (nr_send % 3) % 2] + + try: + sender.sendto(send_data, socket_to_addr[receiver]) + send_hashes.setdefault((sender.fileno(), receiver.fileno()), + hashlib.sha256()).update(f'<{send_data}>'.encode('utf-8')) + nr_send = nr_send + 1 + except BlockingIOError as e: + break + except OSError as e: + if e.errno in [errno.ENOBUFS, errno.ECONNRESET, errno.EPIPE]: + break + raise + + # Receive as much as we can without blocking + print("receiving...", nr_send, nr_recv) + while nr_recv < nr_send: + for fileno, eventmask in ep.poll(): + receiver = fileno_to_socket[fileno] + + if eventmask & select.EPOLLRDNORM: + while True: + try: + recv_data, address = receiver.recvfrom(1024) + sender = addr_to_socket[address] + recv_hashes.setdefault((sender.fileno(), + receiver.fileno()), hashlib.sha256()).update( + f'<{recv_data}>'.encode('utf-8')) + nr_recv = nr_recv + 1 + except BlockingIOError as e: + break + + # exercise net/rds/tcp.c:rds_tcp_sysctl_reset() + for net in [net0, net1]: + ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000") + ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000") + +print("done", nr_send, nr_recv) + +# the Python socket module doesn't know these +RDS_INFO_FIRST = 10000 +RDS_INFO_LAST = 10017 + +nr_success = 0 +nr_error = 0 + +for s in sockets: + for optname in range(RDS_INFO_FIRST, RDS_INFO_LAST + 1): + # Sigh, the Python socket module doesn't allow us to pass + # buffer lengths greater than 1024 for some reason. RDS + # wants multiple pages. + try: + s.getsockopt(socket.SOL_RDS, optname, 1024) + nr_success = nr_success + 1 + except OSError as e: + nr_error = nr_error + 1 + if e.errno == errno.ENOSPC: + # ignore + pass + +print(f"getsockopt(): {nr_success}/{nr_error}") + +print("Stopping network packet captures") +subprocess.check_call(['killall', '-q', 'tcpdump']) + +# We're done sending and receiving stuff, now let's check if what +# we received is what we sent. +for (sender, receiver), send_hash in send_hashes.items(): + recv_hash = recv_hashes.get((sender, receiver)) + + if recv_hash is None: + print("FAIL: No data received") + sys.exit(1) + + if send_hash.hexdigest() != recv_hash.hexdigest(): + print("FAIL: Send/recv mismatch") + print("hash expected:", send_hash.hexdigest()) + print("hash received:", recv_hash.hexdigest()) + sys.exit(1) + + print(f"{sender}/{receiver}: ok") + +print("Success") +sys.exit(0) diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c index 066efd30e294..7b9bf8a7bbe1 100644 --- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -112,7 +112,7 @@ TEST(reuseaddr_ports_exhausted_reusable_same_euid) ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); if (opts->reuseport[0] && opts->reuseport[1]) { - EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened."); + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets successfully listened."); } else { EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations."); } diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c index b8475cb29be7..1c43401a1c80 100644 --- a/tools/testing/selftests/net/reuseport_addr_any.c +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -9,7 +9,6 @@ #include <arpa/inet.h> #include <errno.h> #include <error.h> -#include <linux/dccp.h> #include <linux/in.h> #include <linux/unistd.h> #include <stdbool.h> @@ -21,10 +20,6 @@ #include <sys/socket.h> #include <unistd.h> -#ifndef SOL_DCCP -#define SOL_DCCP 269 -#endif - static const char *IP4_ADDR = "127.0.0.1"; static const char *IP6_ADDR = "::1"; static const char *IP4_MAPPED6 = "::ffff:127.0.0.1"; @@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count, if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) error(1, errno, "tcp: failed to listen on receive port"); - else if (proto == SOCK_DCCP) { - if (setsockopt(rcv_fds[i], SOL_DCCP, - DCCP_SOCKOPT_SERVICE, - &(int) {htonl(42)}, sizeof(int))) - error(1, errno, "failed to setsockopt"); - - if (listen(rcv_fds[i], 10)) - error(1, errno, "dccp: failed to listen on receive port"); - } } } @@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto) if (fd < 0) error(1, errno, "failed to create send socket"); - if (proto == SOCK_DCCP && - setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE, - &(int){htonl(42)}, sizeof(int))) - error(1, errno, "failed to setsockopt"); - if (bind(fd, saddr, sz)) error(1, errno, "failed to bind send socket"); @@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto) if (i < 0) error(1, errno, "epoll_wait failed"); - if (proto == SOCK_STREAM || proto == SOCK_DCCP) { + if (proto == SOCK_STREAM) { fd = accept(ev.data.fd, NULL, NULL); if (fd < 0) error(1, errno, "failed to accept"); @@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto, static void test_proto(int proto, const char *proto_str) { - if (proto == SOCK_DCCP) { - int test_fd; - - test_fd = socket(AF_INET, proto, 0); - if (test_fd < 0) { - if (errno == ESOCKTNOSUPPORT) { - fprintf(stderr, "DCCP not supported: skipping DCCP tests\n"); - return; - } else - error(1, errno, "failed to create a DCCP socket"); - } - close(test_fd); - } - fprintf(stderr, "%s IPv4 ... ", proto_str); run_one_test(AF_INET, AF_INET, proto, IP4_ADDR); @@ -271,7 +238,6 @@ int main(void) { test_proto(SOCK_DGRAM, "UDP"); test_proto(SOCK_STREAM, "TCP"); - test_proto(SOCK_DCCP, "DCCP"); fprintf(stderr, "SUCCESS\n"); return 0; diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py new file mode 100755 index 000000000000..e9ad5e88da97 --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_exit, ksft_run, ksft_ge, RtnlAddrFamily +import socket + +IPV4_ALL_HOSTS_MULTICAST = b'\xe0\x00\x00\x01' + +def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None: + """ + Verify that at least one interface has the IPv4 all-hosts multicast address. + At least the loopback interface should have this address. + """ + + addresses = rtnl.getmulticast({"ifa-family": socket.AF_INET}, dump=True) + + all_host_multicasts = [ + addr for addr in addresses if addr['multicast'] == IPV4_ALL_HOSTS_MULTICAST + ] + + ksft_ge(len(all_host_multicasts), 1, + "No interface found with the IPv4 all-hosts multicast address") + +def main() -> None: + rtnl = RtnlAddrFamily() + ksft_run([dump_mcaddr_check], args=(rtnl, )) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index bdf6f10d0558..2e8243a65b50 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -21,14 +21,15 @@ ALL_TESTS=" kci_test_vrf kci_test_encap kci_test_macsec - kci_test_macsec_offload kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get + kci_test_fdb_del kci_test_neigh_get kci_test_bridge_parent_id kci_test_address_proto kci_test_enslave_bonding + kci_test_mngtmpaddr " devdummy="test-dummy0" @@ -44,6 +45,7 @@ check_err() if [ $ret -eq 0 ]; then ret=$1 fi + [ -n "$2" ] && echo "$2" } # same but inverted -- used when command must fail for test to pass @@ -559,73 +561,6 @@ kci_test_macsec() end_test "PASS: macsec" } -kci_test_macsec_offload() -{ - sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ - sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ - probed=false - local ret=0 - run_cmd_grep "^Usage: ip macsec" ip macsec help - if [ $? -ne 0 ]; then - end_test "SKIP: macsec: iproute2 too old" - return $ksft_skip - fi - - if ! mount | grep -q debugfs; then - mount -t debugfs none /sys/kernel/debug/ &> /dev/null - fi - - # setup netdevsim since dummydev doesn't have offload support - if [ ! -w /sys/bus/netdevsim/new_device ] ; then - run_cmd modprobe -q netdevsim - - if [ $ret -ne 0 ]; then - end_test "SKIP: macsec_offload can't load netdevsim" - return $ksft_skip - fi - probed=true - fi - - echo "0" > /sys/bus/netdevsim/new_device - while [ ! -d $sysfsnet ] ; do :; done - udevadm settle - dev=`ls $sysfsnet` - - ip link set $dev up - if [ ! -d $sysfsd ] ; then - end_test "FAIL: macsec_offload can't create device $dev" - return 1 - fi - run_cmd_grep 'macsec-hw-offload: on' ethtool -k $dev - if [ $? -eq 1 ] ; then - end_test "FAIL: macsec_offload netdevsim doesn't support MACsec offload" - return 1 - fi - run_cmd ip link add link $dev kci_macsec1 type macsec port 4 offload mac - run_cmd ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac - run_cmd ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac - run_cmd_fail ip link add link $dev kci_macsec4 type macsec port 8 offload mac - - msname=kci_macsec1 - run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 - run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" - run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ - key 00 0123456789abcdef0123456789abcdef - run_cmd_fail ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef" - # clean up any leftovers - for msdev in kci_macsec{1,2,3,4} ; do - ip link del $msdev 2> /dev/null - done - echo 0 > /sys/bus/netdevsim/del_device - $probed && rmmod netdevsim - - if [ $ret -ne 0 ]; then - end_test "FAIL: macsec_offload" - return 1 - fi - end_test "PASS: macsec_offload" -} - #------------------------------------------------------------------- # Example commands # ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ @@ -809,10 +744,10 @@ kci_test_ipsec_offload() # does driver have correct offload info run_cmd diff $sysfsf - << EOF SA count=2 tx=3 -sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000 +sa[0] tx ipaddr=$dstip sa[0] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 sa[0] key=0x34333231 38373635 32313039 36353433 -sa[1] rx ipaddr=0x00000000 00000000 00000000 037ba8c0 +sa[1] rx ipaddr=$srcip sa[1] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 sa[1] key=0x34333231 38373635 32313039 36353433 EOF @@ -1065,6 +1000,45 @@ kci_test_fdb_get() end_test "PASS: bridge fdb get" } +kci_test_fdb_del() +{ + local test_mac=de:ad:be:ef:13:37 + local dummydev="dummy1" + local brdev="test-br0" + local ret=0 + + run_cmd_grep 'bridge fdb get' bridge fdb help + if [ $? -ne 0 ]; then + end_test "SKIP: fdb del tests: iproute2 too old" + return $ksft_skip + fi + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP fdb del tests: cannot add net namespace $testns" + return $ksft_skip + fi + IP="ip -netns $testns" + BRIDGE="bridge -netns $testns" + run_cmd $IP link add $dummydev type dummy + run_cmd $IP link add name $brdev type bridge vlan_filtering 1 + run_cmd $IP link set dev $dummydev master $brdev + run_cmd $BRIDGE fdb add $test_mac dev $dummydev master static vlan 1 + run_cmd $BRIDGE vlan del vid 1 dev $dummydev + run_cmd $BRIDGE fdb get $test_mac br $brdev vlan 1 + run_cmd $BRIDGE fdb del $test_mac dev $dummydev master vlan 1 + run_cmd_fail $BRIDGE fdb get $test_mac br $brdev vlan 1 + + ip netns del $testns &>/dev/null + + if [ $ret -ne 0 ]; then + end_test "FAIL: bridge fdb del" + return 1 + fi + + end_test "PASS: bridge fdb del" +} + kci_test_neigh_get() { dstmac=de:ad:be:ef:13:37 @@ -1267,6 +1241,99 @@ kci_test_enslave_bonding() ip netns del "$testns" } +# Called to validate the addresses on $IFNAME: +# +# 1. Every `temporary` address must have a matching `mngtmpaddr` +# 2. Every `mngtmpaddr` address must have some un`deprecated` `temporary` +# +# If the mngtmpaddr or tempaddr checking failed, return 0 and stop slowwait +validate_mngtmpaddr() +{ + local dev=$1 + local prefix="" + local addr_list=$(ip -j -n $testns addr show dev ${dev}) + local temp_addrs=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.temporary == true) | .local') + local mng_prefixes=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.mngtmpaddr == true) | .local' | \ + cut -d: -f1-4 | tr '\n' ' ') + local undep_prefixes=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.temporary == true and .deprecated != true) | .local' | \ + cut -d: -f1-4 | tr '\n' ' ') + + # 1. All temporary addresses (temp and dep) must have a matching mngtmpaddr + for address in ${temp_addrs}; do + prefix=$(echo ${address} | cut -d: -f1-4) + if [[ ! " ${mng_prefixes} " =~ " $prefix " ]]; then + check_err 1 "FAIL: Temporary $address with no matching mngtmpaddr!"; + return 0 + fi + done + + # 2. All mngtmpaddr addresses must have a temporary address (not dep) + for prefix in ${mng_prefixes}; do + if [[ ! " ${undep_prefixes} " =~ " $prefix " ]]; then + check_err 1 "FAIL: No undeprecated temporary in $prefix!"; + return 0 + fi + done + + return 1 +} + +kci_test_mngtmpaddr() +{ + local ret=0 + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP mngtmpaddr tests: cannot add net namespace $testns" + return $ksft_skip + fi + + # 1. Create a dummy Ethernet interface + run_cmd ip -n $testns link add ${devdummy} type dummy + run_cmd ip -n $testns link set ${devdummy} up + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.use_tempaddr=1 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_prefered_lft=10 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_valid_lft=25 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.max_desync_factor=1 + + # 2. Create several mngtmpaddr addresses on that interface. + # with temp_*_lft configured to be pretty short (10 and 35 seconds + # for prefer/valid respectively) + for i in $(seq 1 9); do + run_cmd ip -n $testns addr add 2001:db8:7e57:${i}::1/64 mngtmpaddr dev ${devdummy} + done + + # 3. Confirm that a preferred temporary address exists for each mngtmpaddr + # address at all times, polling once per second for 30 seconds. + slowwait 30 validate_mngtmpaddr ${devdummy} + + # 4. Delete each mngtmpaddr address, one at a time (alternating between + # deleting and merely un-mngtmpaddr-ing), and confirm that the other + # mngtmpaddr addresses still have preferred temporaries. + for i in $(seq 1 9); do + (( $i % 4 == 0 )) && mng_flag="mngtmpaddr" || mng_flag="" + if (( $i % 2 == 0 )); then + run_cmd ip -n $testns addr del 2001:db8:7e57:${i}::1/64 $mng_flag dev ${devdummy} + else + run_cmd ip -n $testns addr change 2001:db8:7e57:${i}::1/64 dev ${devdummy} + fi + # the temp addr should be deleted + validate_mngtmpaddr ${devdummy} + done + + if [ $ret -ne 0 ]; then + end_test "FAIL: mngtmpaddr add/remove incorrect" + else + end_test "PASS: mngtmpaddr add/remove correctly" + fi + + ip netns del "$testns" + return $ret +} + kci_test_rtnl() { local current_test diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index 9eb42570294d..16ac4df55fdb 100644 --- a/tools/testing/selftests/net/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -57,6 +57,8 @@ static struct sof_flag sof_flags[] = { SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE), SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE), SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE), + SOF_FLAG(SOF_TIMESTAMPING_OPT_RX_FILTER), + SOF_FLAG(SOF_TIMESTAMPING_RAW_HARDWARE), }; static struct socket_type socket_types[] = { @@ -98,6 +100,22 @@ static struct test_case test_cases[] = { {} }, { + { .so_timestamping = SOF_TIMESTAMPING_RAW_HARDWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + {} + }, + { + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + {} + }, + { + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_RX_SOFTWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + { .swtstamp = true } + }, + { { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE }, { .swtstamp = true } diff --git a/tools/testing/selftests/net/sample_map_ret0.bpf.c b/tools/testing/selftests/net/sample_map_ret0.bpf.c new file mode 100644 index 000000000000..43ca92594926 --- /dev/null +++ b/tools/testing/selftests/net/sample_map_ret0.bpf.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, long); + __uint(max_entries, 2); +} htab SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, long); + __uint(max_entries, 2); +} array SEC(".maps"); + +/* Sample program which should always load for testing control paths. */ +SEC("xdp") int func() +{ + __u64 key64 = 0; + __u32 key = 0; + long *value; + + value = bpf_map_lookup_elem(&htab, &key); + if (!value) + return 1; + value = bpf_map_lookup_elem(&array, &key64); + if (!value) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/net/sample_ret0.bpf.c b/tools/testing/selftests/net/sample_ret0.bpf.c new file mode 100644 index 000000000000..1df5ca98bb65 --- /dev/null +++ b/tools/testing/selftests/net/sample_ret0.bpf.c @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ + +#define SEC(name) __attribute__((section(name), used)) + +/* Sample program which should always load for testing control paths. */ +SEC("xdp") +int func() +{ + return 0; +} diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index 1f78a87f6f37..152bf4c65747 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -11,7 +11,8 @@ setup_veth_ns() { local -r ns_mac="$4" [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 200000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 1 > "/sys/class/net/${ns_dev}/napi_defer_hard_irqs" ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 ip -netns "${ns_name}" link set dev "${ns_dev}" up diff --git a/tools/testing/selftests/net/sk_so_peek_off.c b/tools/testing/selftests/net/sk_so_peek_off.c new file mode 100644 index 000000000000..d87dd8d8d491 --- /dev/null +++ b/tools/testing/selftests/net/sk_so_peek_off.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include "../kselftest.h" + +static char *afstr(int af, int proto) +{ + if (proto == IPPROTO_TCP) + return af == AF_INET ? "TCP/IPv4" : "TCP/IPv6"; + else + return af == AF_INET ? "UDP/IPv4" : "UDP/IPv6"; +} + +int sk_peek_offset_probe(sa_family_t af, int proto) +{ + int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM); + int optv = 0; + int ret = 0; + int s; + + s = socket(af, type, proto); + if (s < 0) { + ksft_perror("Temporary TCP socket creation failed"); + } else { + if (!setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &optv, sizeof(int))) + ret = 1; + else + printf("%s does not support SO_PEEK_OFF\n", afstr(af, proto)); + close(s); + } + return ret; +} + +static void sk_peek_offset_set(int s, int offset) +{ + if (setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, sizeof(offset))) + ksft_perror("Failed to set SO_PEEK_OFF value\n"); +} + +static int sk_peek_offset_get(int s) +{ + int offset; + socklen_t len = sizeof(offset); + + if (getsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, &len)) + ksft_perror("Failed to get SO_PEEK_OFF value\n"); + return offset; +} + +static int sk_peek_offset_test(sa_family_t af, int proto) +{ + int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM); + union { + struct sockaddr sa; + struct sockaddr_in a4; + struct sockaddr_in6 a6; + } a; + int res = 0; + int s[2] = {0, 0}; + int recv_sock = 0; + int offset = 0; + ssize_t len; + char buf[2]; + + memset(&a, 0, sizeof(a)); + a.sa.sa_family = af; + + s[0] = recv_sock = socket(af, type, proto); + s[1] = socket(af, type, proto); + + if (s[0] < 0 || s[1] < 0) { + ksft_perror("Temporary socket creation failed\n"); + goto out; + } + if (bind(s[0], &a.sa, sizeof(a)) < 0) { + ksft_perror("Temporary socket bind() failed\n"); + goto out; + } + if (getsockname(s[0], &a.sa, &((socklen_t) { sizeof(a) })) < 0) { + ksft_perror("Temporary socket getsockname() failed\n"); + goto out; + } + if (proto == IPPROTO_TCP && listen(s[0], 0) < 0) { + ksft_perror("Temporary socket listen() failed\n"); + goto out; + } + if (connect(s[1], &a.sa, sizeof(a)) < 0) { + ksft_perror("Temporary socket connect() failed\n"); + goto out; + } + if (proto == IPPROTO_TCP) { + recv_sock = accept(s[0], NULL, NULL); + if (recv_sock <= 0) { + ksft_perror("Temporary socket accept() failed\n"); + goto out; + } + } + + /* Some basic tests of getting/setting offset */ + offset = sk_peek_offset_get(recv_sock); + if (offset != -1) { + ksft_perror("Initial value of socket offset not -1\n"); + goto out; + } + sk_peek_offset_set(recv_sock, 0); + offset = sk_peek_offset_get(recv_sock); + if (offset != 0) { + ksft_perror("Failed to set socket offset to 0\n"); + goto out; + } + + /* Transfer a message */ + if (send(s[1], (char *)("ab"), 2, 0) != 2) { + ksft_perror("Temporary probe socket send() failed\n"); + goto out; + } + /* Read first byte */ + len = recv(recv_sock, buf, 1, MSG_PEEK); + if (len != 1 || buf[0] != 'a') { + ksft_perror("Failed to read first byte of message\n"); + goto out; + } + offset = sk_peek_offset_get(recv_sock); + if (offset != 1) { + ksft_perror("Offset not forwarded correctly at first byte\n"); + goto out; + } + /* Try to read beyond last byte */ + len = recv(recv_sock, buf, 2, MSG_PEEK); + if (len != 1 || buf[0] != 'b') { + ksft_perror("Failed to read last byte of message\n"); + goto out; + } + offset = sk_peek_offset_get(recv_sock); + if (offset != 2) { + ksft_perror("Offset not forwarded correctly at last byte\n"); + goto out; + } + /* Flush message */ + len = recv(recv_sock, buf, 2, MSG_TRUNC); + if (len != 2) { + ksft_perror("Failed to flush message\n"); + goto out; + } + offset = sk_peek_offset_get(recv_sock); + if (offset != 0) { + ksft_perror("Offset not reverted correctly after flush\n"); + goto out; + } + + printf("%s with MSG_PEEK_OFF works correctly\n", afstr(af, proto)); + res = 1; +out: + if (proto == IPPROTO_TCP && recv_sock >= 0) + close(recv_sock); + if (s[1] >= 0) + close(s[1]); + if (s[0] >= 0) + close(s[0]); + return res; +} + +static int do_test(int proto) +{ + int res4, res6; + + res4 = sk_peek_offset_probe(AF_INET, proto); + res6 = sk_peek_offset_probe(AF_INET6, proto); + + if (!res4 && !res6) + return KSFT_SKIP; + + if (res4) + res4 = sk_peek_offset_test(AF_INET, proto); + + if (res6) + res6 = sk_peek_offset_test(AF_INET6, proto); + + if (!res4 || !res6) + return KSFT_FAIL; + + return KSFT_PASS; +} + +int main(void) +{ + int restcp, resudp; + + restcp = do_test(IPPROTO_TCP); + resudp = do_test(IPPROTO_UDP); + if (restcp == KSFT_FAIL || resudp == KSFT_FAIL) + return KSFT_FAIL; + + return KSFT_PASS; +} diff --git a/tools/testing/selftests/net/skf_net_off.c b/tools/testing/selftests/net/skf_net_off.c new file mode 100644 index 000000000000..1fdf61d6cd7f --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Open a tun device. + * + * [modifications: use IFF_NAPI_FRAGS, add sk filter] + * + * Expects the device to have been configured previously, e.g.: + * sudo ip tuntap add name tap1 mode tap + * sudo ip link set tap1 up + * sudo ip link set dev tap1 addr 02:00:00:00:00:01 + * sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad + * + * And to avoid premature pskb_may_pull: + * + * sudo ethtool -K tap1 gro off + * sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux' + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if.h> +#include <linux/if_packet.h> +#include <linux/if_tun.h> +#include <linux/ipv6.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <poll.h> +#include <signal.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/poll.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <unistd.h> + +static bool cfg_do_filter; +static bool cfg_do_frags; +static int cfg_dst_port = 8000; +static char *cfg_ifname; + +static int tun_open(const char *tun_name) +{ + struct ifreq ifr = {0}; + int fd, ret; + + fd = open("/dev/net/tun", O_RDWR); + if (fd == -1) + error(1, errno, "open /dev/net/tun"); + + ifr.ifr_flags = IFF_TAP; + if (cfg_do_frags) + ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS; + + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1); + + ret = ioctl(fd, TUNSETIFF, &ifr); + if (ret) + error(1, ret, "ioctl TUNSETIFF"); + + return fd; +} + +static void sk_set_filter(int fd) +{ + const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt); + const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest); + + /* Filter UDP packets with destination port cfg_dst_port */ + struct sock_filter filter_code[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + + struct sock_fprog filter = { + sizeof(filter_code) / sizeof(filter_code[0]), + filter_code, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter))) + error(1, errno, "setsockopt attach filter"); +} + +static int raw_open(void) +{ + int fd; + + fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP); + if (fd == -1) + error(1, errno, "socket raw (udp)"); + + if (cfg_do_filter) + sk_set_filter(fd); + + return fd; +} + +static void tun_write(int fd) +{ + const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 }; + const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 }; + struct tun_pi pi = {0}; + struct ipv6hdr ip6h = {0}; + struct udphdr uh = {0}; + struct ethhdr eth = {0}; + uint32_t payload; + struct iovec iov[5]; + int ret; + + pi.proto = htons(ETH_P_IPV6); + + memcpy(eth.h_source, eth_src, sizeof(eth_src)); + memcpy(eth.h_dest, eth_dst, sizeof(eth_dst)); + eth.h_proto = htons(ETH_P_IPV6); + + ip6h.version = 6; + ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t)); + ip6h.nexthdr = IPPROTO_UDP; + ip6h.hop_limit = 8; + if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1) + error(1, errno, "inet_pton src"); + if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1) + error(1, errno, "inet_pton src"); + + uh.source = htons(8000); + uh.dest = htons(cfg_dst_port); + uh.len = ip6h.payload_len; + uh.check = 0; + + payload = htonl(0xABABABAB); /* Covered in IPv6 length */ + + iov[0].iov_base = π + iov[0].iov_len = sizeof(pi); + iov[1].iov_base = ð + iov[1].iov_len = sizeof(eth); + iov[2].iov_base = &ip6h; + iov[2].iov_len = sizeof(ip6h); + iov[3].iov_base = &uh; + iov[3].iov_len = sizeof(uh); + iov[4].iov_base = &payload; + iov[4].iov_len = sizeof(payload); + + ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0])); + if (ret <= 0) + error(1, errno, "writev"); +} + +static void raw_read(int fd) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + struct msghdr msg = {0}; + struct iovec iov[2]; + struct udphdr uh; + uint32_t payload[2]; + int ret; + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcvtimeo udp"); + + iov[0].iov_base = &uh; + iov[0].iov_len = sizeof(uh); + + iov[1].iov_base = payload; + iov[1].iov_len = sizeof(payload); + + msg.msg_iov = iov; + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); + + ret = recvmsg(fd, &msg, 0); + if (ret <= 0) + error(1, errno, "read raw"); + if (ret != sizeof(uh) + sizeof(payload[0])) + error(1, errno, "read raw: len=%d\n", ret); + + fprintf(stderr, "raw recv: 0x%x\n", payload[0]); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "fFi:")) != -1) { + switch (c) { + case 'f': + cfg_do_filter = true; + printf("bpf filter enabled\n"); + break; + case 'F': + cfg_do_frags = true; + printf("napi frags mode enabled\n"); + break; + case 'i': + cfg_ifname = optarg; + break; + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!cfg_ifname) + error(1, 0, "must specify tap interface name (-i)"); +} + +int main(int argc, char **argv) +{ + int fdt, fdr; + + parse_opts(argc, argv); + + fdr = raw_open(); + fdt = tun_open(cfg_ifname); + + tun_write(fdt); + raw_read(fdr); + + if (close(fdt)) + error(1, errno, "close tun"); + if (close(fdr)) + error(1, errno, "close udp"); + + fprintf(stderr, "OK\n"); + return 0; +} + diff --git a/tools/testing/selftests/net/skf_net_off.sh b/tools/testing/selftests/net/skf_net_off.sh new file mode 100755 index 000000000000..5da5066fb465 --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly NS="ns-$(mktemp -u XXXXXX)" + +cleanup() { + ip netns del $NS +} + +ip netns add $NS +trap cleanup EXIT + +ip -netns $NS link set lo up +ip -netns $NS tuntap add name tap1 mode tap +ip -netns $NS link set tap1 up +ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01 +ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad +ip netns exec $NS ethtool -K tap1 gro off + +# disable early demux, else udp_v6_early_demux pulls udp header into linear +ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0 + +echo "no filter" +ip netns exec $NS ./skf_net_off -i tap1 + +echo "filter, linear skb (-f)" +ip netns exec $NS ./skf_net_off -i tap1 -f + +echo "filter, fragmented skb (-f) (-F)" +ip netns exec $NS ./skf_net_off -i tap1 -f -F diff --git a/tools/testing/selftests/net/so_rcv_listener.c b/tools/testing/selftests/net/so_rcv_listener.c new file mode 100644 index 000000000000..bc5841192aa6 --- /dev/null +++ b/tools/testing/selftests/net/so_rcv_listener.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <netdb.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#ifndef SO_RCVPRIORITY +#define SO_RCVPRIORITY 82 +#endif + +struct options { + __u32 val; + int name; + int rcvname; + const char *host; + const char *service; +} opt; + +static void __attribute__((noreturn)) usage(const char *bin) +{ + printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin); + printf("Options:\n" + "\t\t-M val Test SO_RCVMARK\n" + "\t\t-P val Test SO_RCVPRIORITY\n" + ""); + exit(EXIT_FAILURE); +} + +static void parse_args(int argc, char *argv[]) +{ + int o; + + while ((o = getopt(argc, argv, "M:P:")) != -1) { + switch (o) { + case 'M': + opt.val = atoi(optarg); + opt.name = SO_MARK; + opt.rcvname = SO_RCVMARK; + break; + case 'P': + opt.val = atoi(optarg); + opt.name = SO_PRIORITY; + opt.rcvname = SO_RCVPRIORITY; + break; + default: + usage(argv[0]); + break; + } + } + + if (optind != argc - 2) + usage(argv[0]); + + opt.host = argv[optind]; + opt.service = argv[optind + 1]; +} + +int main(int argc, char *argv[]) +{ + int err = 0; + int recv_fd = -1; + int ret_value = 0; + __u32 recv_val; + struct cmsghdr *cmsg; + char cbuf[CMSG_SPACE(sizeof(__u32))]; + char recv_buf[CMSG_SPACE(sizeof(__u32))]; + struct iovec iov[1]; + struct msghdr msg; + struct sockaddr_in recv_addr4; + struct sockaddr_in6 recv_addr6; + + parse_args(argc, argv); + + int family = strchr(opt.host, ':') ? AF_INET6 : AF_INET; + + recv_fd = socket(family, SOCK_DGRAM, IPPROTO_UDP); + if (recv_fd < 0) { + perror("Can't open recv socket"); + ret_value = -errno; + goto cleanup; + } + + err = setsockopt(recv_fd, SOL_SOCKET, opt.rcvname, &opt.val, sizeof(opt.val)); + if (err < 0) { + perror("Recv setsockopt error"); + ret_value = -errno; + goto cleanup; + } + + if (family == AF_INET) { + memset(&recv_addr4, 0, sizeof(recv_addr4)); + recv_addr4.sin_family = family; + recv_addr4.sin_port = htons(atoi(opt.service)); + + if (inet_pton(family, opt.host, &recv_addr4.sin_addr) <= 0) { + perror("Invalid IPV4 address"); + ret_value = -errno; + goto cleanup; + } + + err = bind(recv_fd, (struct sockaddr *)&recv_addr4, sizeof(recv_addr4)); + } else { + memset(&recv_addr6, 0, sizeof(recv_addr6)); + recv_addr6.sin6_family = family; + recv_addr6.sin6_port = htons(atoi(opt.service)); + + if (inet_pton(family, opt.host, &recv_addr6.sin6_addr) <= 0) { + perror("Invalid IPV6 address"); + ret_value = -errno; + goto cleanup; + } + + err = bind(recv_fd, (struct sockaddr *)&recv_addr6, sizeof(recv_addr6)); + } + + if (err < 0) { + perror("Recv bind error"); + ret_value = -errno; + goto cleanup; + } + + iov[0].iov_base = recv_buf; + iov[0].iov_len = sizeof(recv_buf); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + + err = recvmsg(recv_fd, &msg, 0); + if (err < 0) { + perror("Message receive error"); + ret_value = -errno; + goto cleanup; + } + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == opt.name) { + recv_val = *(__u32 *)CMSG_DATA(cmsg); + printf("Received value: %u\n", recv_val); + + if (recv_val != opt.val) { + fprintf(stderr, "Error: expected value: %u, got: %u\n", + opt.val, recv_val); + ret_value = -EINVAL; + } + goto cleanup; + } + } + + fprintf(stderr, "Error: No matching cmsg received\n"); + ret_value = -ENOMSG; + +cleanup: + if (recv_fd >= 0) + close(recv_fd); + + return ret_value; +} diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh index 02d617040793..a5e959a080bb 100755 --- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -285,11 +285,6 @@ setup_hs() ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh index 79fb81e63c59..a649dba3cb77 100755 --- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh @@ -250,11 +250,6 @@ setup_hs() eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 diff --git a/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh new file mode 100755 index 000000000000..e23210aa547f --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh @@ -0,0 +1,335 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Jianguo Wu <wujianguo@chinatelecom.cn> +# +# Mostly copied from tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh. +# +# This script is designed for testing the support of netfilter hooks for +# SRv6 End.DX4 behavior. +# +# Hereafter a network diagram is shown, where one tenants (named 100) offer +# IPv4 L3 VPN services allowing hosts to communicate with each other across +# an IPv6 network. +# +# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DX4 behavior. +# +# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-1 pings +# the host hs-2. +# +# First of all, L2 reachability of the host hs-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-1 sends an IPv4 packet destined to hs-2, the router rt-1 +# receives the packet on the internal veth-t100 interface, rt-1 contains the +# SRv6 Encap route for encapsulating the IPv4 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and +# routs the packet to the specified nexthop. Afterwards, the packet is sent to +# the host hs-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a +# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING +# hooks. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-1 netns | | hs-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | 10.0.0.11/24 | +----------+ | | +----------+ | 10.0.0.22/24 | | +# | +-------+-------+ | route | | | | route | +-------+-------- | +# | | table | | | | table | | +# | +----------+ | | +----------+ | +# | +--------------+ | | +--------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | | +# | +--------------+ | | +--------------+ | +# | | | | +# | rt-1 netns | | rt-2 netns | +# | | | | +# +-----------------------------------+ +-----------------------------------+ +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table +# +----------------------------------------------------------------+ +# |SID |Action | +# +----------------------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DX4 nh4 10.0.0.1 dev veth-t100 | +# +----------------------------------------------------------------+ +# +# rt-1: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table +# +---------------------------------------------------------------+ +# |SID |Action | +# +---------------------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DX4 nh4 10.0.0.2 dev veth-t100| +# +---------------------------------------------------------------+ +# +# rt-2: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +readonly IPv6_RT_NETWORK=2001:11 +readonly IPv4_HS_NETWORK=10.0.0 +readonly SID_LOCATOR=fc00 + +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_rt_netfilter() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1 + ip netns exec ${nsname} iptables -t raw -A PREROUTING -m rpfilter --invert -j DROP +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.${rt}${hs}/24 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004 + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \ + via 2001:11::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \ + encap seg6local action End.DX4 nh4 ${IPv4_HS_NETWORK}.${hsdst} dev veth-t${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 + setup_hs 2 2 100 + + # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})" +} + +host_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +router_netfilter_tests() +{ + log_section "SRv6 VPN connectivity test with netfilter enabled in routers" + setup_rt_netfilter 1 + setup_rt_netfilter 2 + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +host_tests +router_netfilter_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh new file mode 100755 index 000000000000..9e69a2ed5bc3 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh @@ -0,0 +1,340 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Jianguo Wu <wujianguo@chinatelecom.cn> +# +# Mostly copied from tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh. +# +# This script is designed for testing the support of netfilter hooks for +# SRv6 End.DX4 behavior. +# +# Hereafter a network diagram is shown, where one tenants (named 100) offer +# IPv6 L3 VPN services allowing hosts to communicate with each other across +# an IPv6 network. +# +# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DX4 behavior. +# +# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-1 pings +# the host hs-2. +# +# First of all, L2 reachability of the host hs-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-1 sends an IPv6 packet destined to hs-2, the router rt-1 +# receives the packet on the internal veth-t100 interface, rt-1 contains the +# SRv6 Encap route for encapsulating the IPv6 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and +# routs the packet to the specified nexthop. Afterwards, the packet is sent to +# the host hs-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a +# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING +# hooks. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-1 netns | | hs-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::1/64 | | | | cafe::2/64 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | cafe::11/64 | +----------+ | | +----------+ | cafe::22/64 | | +# | +-------+-------+ | route | | | | route | +-------+-------- | +# | | table | | | | table | | +# | +----------+ | | +----------+ | +# | +--------------+ | | +--------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | | +# | +--------------+ | | +--------------+ | +# | | | | +# | rt-1 netns | | rt-2 netns | +# | | | | +# +-----------------------------------+ +-----------------------------------+ +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table +# +----------------------------------------------------------------+ +# |SID |Action | +# +----------------------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DX6 nh6 cafe::1 dev veth-t100 | +# +----------------------------------------------------------------+ +# +# rt-1: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table +# +---------------------------------------------------------------+ +# |SID |Action | +# +---------------------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DX6 nh6 cafe::2 dev veth-t100 | +# +---------------------------------------------------------------+ +# +# rt-2: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +readonly IPv6_RT_NETWORK=2001:11 +readonly IPv6_HS_NETWORK=cafe +readonly SID_LOCATOR=fc00 + +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_rt_netfilter() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1 + ip netns exec ${nsname} ip6tables -t raw -A PREROUTING -m rpfilter --invert -j DROP +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::${rt}${hs}/64 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local rtveth=veth-t${tid} + local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004 + + ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth} + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \ + via 2001:11::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \ + encap seg6local action End.DX6 nh6 ${IPv6_HS_NETWORK}::${hsdst} dev veth-t${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 + setup_hs 2 2 100 + + # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-${hssrc} ping -6 -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})" +} + +host_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +router_netfilter_tests() +{ + log_section "SRv6 VPN connectivity test with netfilter enabled in routers" + setup_rt_netfilter 1 + setup_rt_netfilter 2 + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +host_tests +router_netfilter_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh index 50563443a4ad..318487eda671 100755 --- a/tools/testing/selftests/net/srv6_end_flavors_test.sh +++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh @@ -399,7 +399,7 @@ __get_srv6_rtcfg_id() # Given the description of a router <id:op> as an input, the function returns # the <op> token which represents the operation (e.g. End behavior with or -# withouth flavors) configured for the node. +# without flavors) configured for the node. # Note that when the operation represents an End behavior with a list of # flavors, the output is the ordered version of that list. @@ -480,7 +480,7 @@ setup_rt_local_sids() # all SIDs start with a common locator. Routes and SRv6 Endpoint - # behavior instaces are grouped together in the 'localsid' table. + # behavior instances are grouped together in the 'localsid' table. ip -netns "${nsname}" -6 rule \ add to "${LOCATOR_SERVICE}::/16" \ lookup "${LOCALSID_TABLE_ID}" prio 999 diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh index 87e414cc417c..ba730655a7bf 100755 --- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh @@ -245,10 +245,8 @@ # that adopted in the use cases already examined (of course, it is necessary to # consider the different SIDs/C-SIDs). -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" @@ -376,32 +374,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -410,8 +394,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -420,28 +403,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, @@ -462,10 +429,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -497,7 +464,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -518,9 +485,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -596,7 +560,7 @@ setup_rt_local_sids() local lcnode_func_prefix local lcblock_prefix - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -668,8 +632,8 @@ __setup_l3vpn() local rtsrc_nsname local rtdst_nsname - rtsrc_nsname="$(get_rtname "${src}")" - rtdst_nsname="$(get_rtname "${dst}")" + eval rtsrc_nsname=\${$(get_rtname "${src}")} + eval rtdst_nsname=\${$(get_rtname "${dst}")} container="${LCBLOCK_ADDR}" @@ -744,8 +708,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -791,11 +755,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -880,7 +839,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -903,7 +862,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -915,7 +874,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 @@ -1025,7 +984,7 @@ rt_x_nextcsid_end_behavior_test() local nsname local ret - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} __nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}" ret="$?" diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh index c79cb8ede17f..4b86040c58c6 100755 --- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh @@ -287,10 +287,8 @@ # packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46) # and sends it to the host hs-1. -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" @@ -418,32 +416,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -452,15 +436,12 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" + setup_ns "${nsname}" - __create_namespace "${nsname}" - + eval nsname=\${$(get_rtname "${rtid}")} ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -470,29 +451,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, # the selftest is considered as "skipped". @@ -512,10 +476,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -547,7 +511,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -631,7 +595,7 @@ set_end_x_nextcsid() local rt="$1" local adj="$2" - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} net_prefix="$(get_network_prefix "${rt}" "${adj}")" lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" @@ -650,7 +614,7 @@ set_underlay_sids_reachability() local rt="$1" local rt_neighs="$2" - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -685,7 +649,7 @@ setup_rt_local_sids() local lcnode_func_prefix local lcblock_prefix - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} set_underlay_sids_reachability "${rt}" "${rt_neighs}" @@ -728,8 +692,8 @@ __setup_l3vpn() local rtsrc_nsname local rtdst_nsname - rtsrc_nsname="$(get_rtname "${src}")" - rtdst_nsname="$(get_rtname "${dst}")" + eval rtsrc_nsname=\${$(get_rtname "${src}")} + eval rtdst_nsname=\${$(get_rtname "${dst}")} container="${LCBLOCK_ADDR}" @@ -804,8 +768,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -851,11 +815,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -947,7 +906,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -970,7 +929,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -982,7 +941,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 @@ -1093,7 +1052,7 @@ rt_x_nextcsid_end_x_behavior_test() local nsname local ret - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} __nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}" ret="$?" diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh index 28a775654b92..3efce1718c5f 100755 --- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -166,10 +166,8 @@ # hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d) # -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" readonly RT2HS_DEVNAME="veth-t${VRF_TID}" @@ -248,32 +246,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -282,8 +266,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -292,29 +275,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, # the selftest is considered as "skipped". @@ -334,10 +300,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -369,7 +335,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -387,9 +353,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -403,7 +366,7 @@ setup_rt_local_sids() local nsname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -469,7 +432,7 @@ __setup_rt_policy() local policy='' local n - nsname="$(get_rtname "${encap_rt}")" + eval nsname=\${$(get_rtname "${encap_rt}")} for n in ${end_rts}; do policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," @@ -516,8 +479,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -555,11 +518,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -656,7 +614,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -679,7 +637,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -691,7 +649,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh index cb4177d41b21..cabc70538ffe 100755 --- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -116,10 +116,8 @@ # hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b) # -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly RT2HS_DEVNAME="veth-hs" readonly HS_VETH_NAME="veth0" @@ -199,32 +197,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -233,8 +217,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -243,28 +226,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, @@ -285,10 +252,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -320,7 +287,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -341,9 +308,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -357,7 +321,7 @@ setup_rt_local_sids() local nsname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -407,7 +371,7 @@ __setup_rt_policy() local policy='' local n - nsname="$(get_rtname "${encap_rt}")" + eval nsname=\${$(get_rtname "${encap_rt}")} for n in ${end_rts}; do policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," @@ -446,7 +410,7 @@ setup_decap() local rt="$1" local nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} # Local End.DX2 behavior ip -netns "${nsname}" -6 route \ @@ -463,8 +427,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -486,11 +450,6 @@ setup_hs() add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up - - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 } # set an auto-generated mac address @@ -508,7 +467,7 @@ set_mac_address() local ifname="$4" local nsname - nsname=$(get_nodename "${nodename}") + eval nsname=\${${nodename}} ip -netns "${nsname}" link set dev "${ifname}" down @@ -532,7 +491,7 @@ set_host_l2peer() local hssrc_name local ipaddr - hssrc_name="$(get_hsname "${hssrc}")" + eval hssrc_name=\${$(get_hsname "${hssrc}")} if [ "${proto}" -eq 6 ]; then ipaddr="${ipprefix}::${hsdst}" @@ -562,7 +521,7 @@ setup_l2vpn() local rtdst="${hsdst}" # set fixed mac for source node and the neigh MAC address - set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" + set_mac_address "hs_${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6 set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4 @@ -570,7 +529,7 @@ setup_l2vpn() # to the mac address of the remote peer (L2 VPN destination host). # Otherwise, traffic coming from the source host is dropped at the # ingress router. - set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" + set_mac_address "rt_${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" # set the SRv6 Policies at the ingress router setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ @@ -647,7 +606,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -670,7 +629,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -682,7 +641,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 522d991e310e..5b0205c70c39 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -26,12 +26,13 @@ LIB := $(LIBDIR)/libaotst.a LDLIBS += $(LIB) -pthread LIBDEPS := lib/aolib.h Makefile -CFLAGS := -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing +CFLAGS += -Wall -O2 -g -fno-strict-aliasing CFLAGS += $(KHDR_INCLUDES) CFLAGS += -iquote ./lib/ -I ../../../../include/ # Library -LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c +LIBSRC := ftrace.c ftrace-tcp.c kconfig.c netlink.c +LIBSRC += proc.c repair.c setup.c sock.c utils.c LIBOBJ := $(LIBSRC:%.c=$(LIBDIR)/%.o) EXTRA_CLEAN += $(LIBOBJ) $(LIB) diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c index a1e6e007c291..6736484996a3 100644 --- a/tools/testing/selftests/net/tcp_ao/bench-lookups.c +++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c @@ -355,6 +355,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(30, server_fn, client_fn); + test_init(31, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config index d3277a9de987..3605e38711cb 100644 --- a/tools/testing/selftests/net/tcp_ao/config +++ b/tools/testing/selftests/net/tcp_ao/config @@ -7,4 +7,5 @@ CONFIG_NET_L3_MASTER_DEV=y CONFIG_NET_VRF=y CONFIG_TCP_AO=y CONFIG_TCP_MD5SIG=y +CONFIG_TRACEPOINTS=y CONFIG_VETH=m diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c index 185a2f6e5ff3..93b61e9a36f1 100644 --- a/tools/testing/selftests/net/tcp_ao/connect-deny.c +++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c @@ -4,6 +4,7 @@ #include "aolib.h" #define fault(type) (inj == FAULT_ ## type) +static volatile int sk_pair; static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen, union tcp_addr in_addr, uint8_t prefix, @@ -34,10 +35,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, const char *cnt_name, test_cnt cnt_expected, fault_t inj) { - struct tcp_ao_counters ao_cnt1, ao_cnt2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; int lsk, err, sk = 0; - time_t timeout; lsk = test_listen_socket(this_ip_addr, port, 1); @@ -46,21 +47,24 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, if (cnt_name) before_cnt = netstat_get_one(cnt_name, NULL); - if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (pwd && test_get_tcp_counters(lsk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - err = test_wait_fd(lsk, timeout, 0); + err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair); if (err == -ETIMEDOUT) { + sk_pair = err; if (!fault(TIMEOUT)) - test_fail("timed out for accept()"); + test_fail("%s: timed out for accept()", tst_name); + } else if (err == -EKEYREJECTED) { + if (!fault(KEYREJECT)) + test_fail("%s: key was rejected", tst_name); } else if (err < 0) { - test_error("test_wait_fd()"); + test_error("test_skpair_wait_poll()"); } else { if (fault(TIMEOUT)) - test_fail("ready to accept"); + test_fail("%s: ready to accept", tst_name); sk = accept(lsk, NULL, NULL); if (sk < 0) { @@ -71,12 +75,14 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, } } - if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); + synchronize_threads(); /* before counter checks */ + if (pwd && test_get_tcp_counters(lsk, &cnt2)) + test_error("test_get_tcp_counters()"); close(lsk); + if (pwd) - test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); if (!cnt_name) goto out; @@ -84,10 +90,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, after_cnt = netstat_get_one(cnt_name, NULL); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } @@ -107,7 +113,7 @@ static void *server_fn(void *arg) try_accept("Non-AO server + AO client", port++, NULL, this_ip_dest, -1, 100, 100, 0, - "TCPAOKeyNotFound", 0, FAULT_TIMEOUT); + "TCPAOKeyNotFound", TEST_CNT_NS_KEY_NOT_FOUND, FAULT_TIMEOUT); try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, @@ -133,8 +139,9 @@ static void *server_fn(void *arg) wrong_addr, -1, 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT); + /* Key rejected by the other side, failing short through skpair */ try_accept("Client: Wrong addr", port++, NULL, - this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT); + this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_KEYREJECT); try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 200, 100, 0, @@ -161,8 +168,7 @@ static void try_connect(const char *tst_name, unsigned int port, uint8_t sndid, uint8_t rcvid, test_cnt cnt_expected, fault_t inj) { - struct tcp_ao_counters ao_cnt1, ao_cnt2; - time_t timeout; + struct tcp_counters cnt1, cnt2; int sk, ret; sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); @@ -172,15 +178,15 @@ static void try_connect(const char *tst_name, unsigned int port, if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid)) test_error("setsockopt(TCP_AO_ADD_KEY)"); - if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (pwd && test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - ret = _test_connect_socket(sk, this_ip_dest, port, timeout); - + ret = test_skpair_connect_poll(sk, this_ip_dest, port, cnt_expected, &sk_pair); + synchronize_threads(); /* before counter checks */ if (ret < 0) { + sk_pair = ret; if (fault(KEYREJECT) && ret == -EKEYREJECTED) { test_ok("%s: connect() was prevented", tst_name); } else if (ret == -ETIMEDOUT && fault(TIMEOUT)) { @@ -199,9 +205,11 @@ static void try_connect(const char *tst_name, unsigned int port, else test_ok("%s: connected", tst_name); if (pwd && ret > 0) { - if (test_get_tcp_ao_counters(sk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); - test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); + } else if (pwd) { + test_tcp_counters_free(&cnt1); } out: synchronize_threads(); /* close() */ @@ -212,30 +220,49 @@ out: static void *client_fn(void *arg) { - union tcp_addr wrong_addr, network_addr; + union tcp_addr wrong_addr, network_addr, addr_any = {}; unsigned int port = test_server_port; if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) test_error("Can't convert ip address %s", TEST_WRONG_IP); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server + Non-AO client", port++, NULL, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + /* + * XXX: The test doesn't increase any counters, see tcp_make_synack(). + * Potentially, it can be speed up by setting sk_pair = -ETIMEDOUT + * but the price would be increased complexity of the tracer thread. + */ + trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any, + port, 0, 100, 100); try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_WRONG_MACLEN, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); @@ -259,6 +286,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(21, server_fn, client_fn); + test_init(22, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c index 81653b47f303..340f00e979ea 100644 --- a/tools/testing/selftests/net/tcp_ao/connect.c +++ b/tools/testing/selftests/net/tcp_ao/connect.c @@ -35,7 +35,7 @@ static void *client_fn(void *arg) uint64_t before_aogood, after_aogood; const size_t nr_packets = 20; struct netstat *ns_before, *ns_after; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters ao1, ao2; if (sk < 0) test_error("socket()"); @@ -50,41 +50,41 @@ static void *client_fn(void *arg) ns_before = netstat_read(); before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &ao1)) + test_error("test_get_tcp_counters()"); - if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, 100, nr_packets)) { test_fail("verify failed"); return NULL; } ns_after = netstat_read(); after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &ao2)) + test_error("test_get_tcp_counters()"); netstat_print_diff(ns_before, ns_after); netstat_free(ns_before); netstat_free(ns_after); if (nr_packets > (after_aogood - before_aogood)) { - test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)", + test_fail("TCPAOGood counter mismatch: %zu > (%" PRIu64 " - %" PRIu64 ")", nr_packets, after_aogood, before_aogood); return NULL; } - if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD)) + if (test_assert_counters("connect", &ao1, &ao2, TEST_CNT_GOOD)) return NULL; - test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64, - before_aogood, ao1.ao_info_pkt_good, - ao1.key_cnts[0].pkt_good, - after_aogood, ao2.ao_info_pkt_good, - ao2.key_cnts[0].pkt_good, + test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu", + before_aogood, ao1.ao.ao_info_pkt_good, + ao1.ao.key_cnts[0].pkt_good, + after_aogood, ao2.ao.ao_info_pkt_good, + ao2.ao.key_cnts[0].pkt_good, nr_packets); return NULL; } int main(int argc, char *argv[]) { - test_init(1, server_fn, client_fn); + test_init(2, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c index d69bcba3c929..85c1a1e958c6 100644 --- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c +++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c @@ -53,7 +53,7 @@ static void serve_interfered(int sk) ssize_t test_quota = packet_size * packets_nr * 10; uint64_t dest_unreach_a, dest_unreach_b; uint64_t icmp_ignored_a, icmp_ignored_b; - struct tcp_ao_counters ao_cnt1, ao_cnt2; + struct tcp_counters cnt1, cnt2; bool counter_not_found; struct netstat *ns_after, *ns_before; ssize_t bytes; @@ -61,16 +61,16 @@ static void serve_interfered(int sk) ns_before = netstat_read(); dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL); icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL); - if (test_get_tcp_ao_counters(sk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); bytes = test_server_run(sk, test_quota, 0); ns_after = netstat_read(); netstat_print_diff(ns_before, ns_after); dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL); icmp_ignored_b = netstat_get(ns_after, tcpao_icmps, &counter_not_found); - if (test_get_tcp_ao_counters(sk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); netstat_free(ns_before); netstat_free(ns_after); @@ -91,9 +91,9 @@ static void serve_interfered(int sk) return; } #ifdef TEST_ICMPS_ACCEPT - test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD); #else - test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); #endif if (icmp_ignored_a >= icmp_ignored_b) { test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, @@ -395,7 +395,6 @@ static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *d static void send_interfered(int sk) { - const unsigned int timeout = TEST_TIMEOUT_SEC; struct sockaddr_in6 src, dst; socklen_t addr_sz; @@ -409,7 +408,7 @@ static void send_interfered(int sk) while (1) { uint32_t rcv_nxt; - if (test_client_verify(sk, packet_size, packets_nr, timeout)) { + if (test_client_verify(sk, packet_size, packets_nr)) { test_fail("client: connection is broken"); return; } @@ -444,6 +443,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(3, server_fn, client_fn); + test_init(4, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index 24e62120b792..69d9a7a05d5c 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -629,11 +629,11 @@ static int key_collection_socket(bool server, unsigned int port) } static void verify_counters(const char *tst_name, bool is_listen_sk, bool server, - struct tcp_ao_counters *a, struct tcp_ao_counters *b) + struct tcp_counters *a, struct tcp_counters *b) { unsigned int i; - __test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD); + test_assert_counters_sk(tst_name, a, b, TEST_CNT_GOOD); for (i = 0; i < collection.nr_keys; i++) { struct test_key *key = &collection.keys[i]; @@ -652,12 +652,12 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server rx_cnt_expected = key->used_on_server_tx; } - test_tcp_ao_key_counters_cmp(tst_name, a, b, - rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, - sndid, rcvid); + test_assert_counters_key(tst_name, &a->ao, &b->ao, + rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, + sndid, rcvid); } - test_tcp_ao_counters_free(a); - test_tcp_ao_counters_free(b); + test_tcp_counters_free(a); + test_tcp_counters_free(b); test_ok("%s: passed counters checks", tst_name); } @@ -791,17 +791,17 @@ out: } static int start_server(const char *tst_name, unsigned int port, size_t quota, - struct tcp_ao_counters *begin, + struct tcp_counters *begin, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters lsk_c1, lsk_c2; + struct tcp_counters lsk_c1, lsk_c2; ssize_t bytes; int sk, lsk; synchronize_threads(); /* 1: key collection initialized */ lsk = key_collection_socket(true, port); - if (test_get_tcp_ao_counters(lsk, &lsk_c1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &lsk_c1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 2: MKTs added => connect() */ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) test_error("test_wait_fd()"); @@ -809,12 +809,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota, sk = accept(lsk, NULL, NULL); if (sk < 0) test_error("accept()"); - if (test_get_tcp_ao_counters(sk, begin)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, begin)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 3: accepted => send data */ - if (test_get_tcp_ao_counters(lsk, &lsk_c2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &lsk_c2)) + test_error("test_get_tcp_counters()"); verify_keys(tst_name, lsk, true, true); close(lsk); @@ -830,12 +830,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota, } static void end_server(const char *tst_name, int sk, - struct tcp_ao_counters *begin) + struct tcp_counters *begin) { - struct tcp_ao_counters end; + struct tcp_counters end; - if (test_get_tcp_ao_counters(sk, &end)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &end)) + test_error("test_get_tcp_counters()"); verify_keys(tst_name, sk, false, true); synchronize_threads(); /* 4: verified => closed */ @@ -848,7 +848,7 @@ static void end_server(const char *tst_name, int sk, static void try_server_run(const char *tst_name, unsigned int port, size_t quota, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; int sk; sk = start_server(tst_name, port, quota, &tmp, @@ -860,7 +860,7 @@ static void server_rotations(const char *tst_name, unsigned int port, size_t quota, unsigned int rotations, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; unsigned int i; int sk; @@ -886,7 +886,7 @@ static void server_rotations(const char *tst_name, unsigned int port, static int run_client(const char *tst_name, unsigned int port, unsigned int nr_keys, int current_index, int rnext_index, - struct tcp_ao_counters *before, + struct tcp_counters *before, const size_t msg_sz, const size_t msg_nr) { int sk; @@ -904,8 +904,8 @@ static int run_client(const char *tst_name, unsigned int port, if (test_set_key(sk, sndid, rcvid)) test_error("failed to set current/rnext keys"); } - if (before && test_get_tcp_ao_counters(sk, before)) - test_error("test_get_tcp_ao_counters()"); + if (before && test_get_tcp_counters(sk, before)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 2: MKTs added => connect() */ if (test_connect_socket(sk, this_ip_dest, port++) <= 0) @@ -918,11 +918,11 @@ static int run_client(const char *tst_name, unsigned int port, collection.keys[rnext_index].used_on_server_tx = 1; synchronize_threads(); /* 3: accepted => send data */ - if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, msg_sz, msg_nr)) { test_fail("verify failed"); close(sk); if (before) - test_tcp_ao_counters_free(before); + test_tcp_counters_free(before); return -1; } @@ -931,7 +931,7 @@ static int run_client(const char *tst_name, unsigned int port, static int start_client(const char *tst_name, unsigned int port, unsigned int nr_keys, int current_index, int rnext_index, - struct tcp_ao_counters *before, + struct tcp_counters *before, const size_t msg_sz, const size_t msg_nr) { if (init_default_key_collection(nr_keys, true)) @@ -943,9 +943,9 @@ static int start_client(const char *tst_name, unsigned int port, static void end_client(const char *tst_name, int sk, unsigned int nr_keys, int current_index, int rnext_index, - struct tcp_ao_counters *start) + struct tcp_counters *start) { - struct tcp_ao_counters end; + struct tcp_counters end; /* Some application may become dependent on this kernel choice */ if (current_index < 0) @@ -955,8 +955,8 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys, verify_current_rnext(tst_name, sk, collection.keys[current_index].client_keyid, collection.keys[rnext_index].server_keyid); - if (start && test_get_tcp_ao_counters(sk, &end)) - test_error("test_get_tcp_ao_counters()"); + if (start && test_get_tcp_counters(sk, &end)) + test_error("test_get_tcp_counters()"); verify_keys(tst_name, sk, false, false); synchronize_threads(); /* 4: verify => closed */ close(sk); @@ -965,7 +965,7 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys, synchronize_threads(); /* 5: counters */ } -static void try_unmatched_keys(int sk, int *rnext_index) +static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port) { struct test_key *key; unsigned int i = 0; @@ -1013,7 +1013,10 @@ static void try_unmatched_keys(int sk, int *rnext_index) test_error("all keys on server match the client"); if (test_set_key(sk, -1, key->server_keyid)) test_error("Can't change the current key"); - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, + -1, key->server_keyid, -1); + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("verify failed"); *rnext_index = i; } @@ -1045,7 +1048,7 @@ static void check_current_back(const char *tst_name, unsigned int port, unsigned int current_index, unsigned int rnext_index, unsigned int rotate_to_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; int sk; sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, @@ -1054,7 +1057,11 @@ static void check_current_back(const char *tst_name, unsigned int port, return; if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1)) test_error("Can't change the current key"); - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, + collection.keys[rotate_to_index].client_keyid, + collection.keys[current_index].client_keyid, -1); + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("verify failed"); /* There is a race here: between setting the current_key with * setsockopt(TCP_AO_INFO) and starting to send some data - there @@ -1074,7 +1081,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port, unsigned int nr_keys, unsigned int rotations, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; unsigned int i; int sk; @@ -1085,12 +1092,17 @@ static void roll_over_keys(const char *tst_name, unsigned int port, for (i = rnext_index + 1; rotations > 0; i++, rotations--) { if (i >= collection.nr_keys) i = 0; + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, + this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, + i == 0 ? -1 : collection.keys[i - 1].server_keyid, + collection.keys[i].server_keyid, -1); if (test_set_key(sk, -1, collection.keys[i].server_keyid)) test_error("Can't change the Rnext key"); - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, msg_len, nr_packets)) { test_fail("verify failed"); close(sk); - test_tcp_ao_counters_free(&tmp); + test_tcp_counters_free(&tmp); return; } verify_current_rnext(tst_name, sk, -1, @@ -1104,7 +1116,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port, static void try_client_run(const char *tst_name, unsigned int port, unsigned int nr_keys, int current_index, int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; int sk; sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, @@ -1124,7 +1136,7 @@ static void try_client_match(const char *tst_name, unsigned int port, rnext_index, msg_len, nr_packets); if (sk < 0) return; - try_unmatched_keys(sk, &rnext_index); + try_unmatched_keys(sk, &rnext_index, port); end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL); } @@ -1181,6 +1193,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(120, server_fn, client_fn); + test_init(121, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h index fbc7f6111815..ebb2899c12fe 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h +++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h @@ -37,17 +37,59 @@ extern void __test_xfail(const char *buf); extern void __test_error(const char *buf); extern void __test_skip(const char *buf); -__attribute__((__format__(__printf__, 2, 3))) -static inline void __test_print(void (*fn)(const char *), const char *fmt, ...) +static inline char *test_snprintf(const char *fmt, va_list vargs) { -#define TEST_MSG_BUFFER_SIZE 4096 - char buf[TEST_MSG_BUFFER_SIZE]; - va_list arg; - - va_start(arg, fmt); - vsnprintf(buf, sizeof(buf), fmt, arg); - va_end(arg); - fn(buf); + char *ret = NULL; + size_t size = 0; + va_list tmp; + int n = 0; + + va_copy(tmp, vargs); + n = vsnprintf(ret, size, fmt, tmp); + va_end(tmp); + if (n < 0) + return NULL; + + size = n + 1; + ret = malloc(size); + if (!ret) + return NULL; + + n = vsnprintf(ret, size, fmt, vargs); + if (n < 0 || n > size - 1) { + free(ret); + return NULL; + } + return ret; +} + +static __printf(1, 2) inline char *test_sprintf(const char *fmt, ...) +{ + va_list vargs; + char *ret; + + va_start(vargs, fmt); + ret = test_snprintf(fmt, vargs); + va_end(vargs); + + return ret; +} + +static __printf(2, 3) inline void __test_print(void (*fn)(const char *), + const char *fmt, ...) +{ + va_list vargs; + char *msg; + + va_start(vargs, fmt); + msg = test_snprintf(fmt, vargs); + va_end(vargs); + + if (!msg) + return; + + fn(msg); + free(msg); } #define test_print(fmt, ...) \ @@ -103,6 +145,7 @@ enum test_needs_kconfig { KCONFIG_TCP_AO, /* required */ KCONFIG_TCP_MD5, /* optional, for TCP-MD5 features */ KCONFIG_NET_VRF, /* optional, for L3/VRF testing */ + KCONFIG_FTRACE, /* optional, for tracepoints checks */ __KCONFIG_LAST__ }; extern bool kernel_config_has(enum test_needs_kconfig k); @@ -142,6 +185,8 @@ static inline void test_init2(unsigned int ntests, __test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2); } extern void test_add_destructor(void (*d)(void)); +extern void test_init_ftrace(int nsfd1, int nsfd2); +extern int test_setup_tracing(void); /* To adjust optmem socket limit, approximately estimate a number, * that is bigger than sizeof(struct tcp_ao_key). @@ -216,12 +261,17 @@ static inline void test_init(unsigned int ntests, } extern void synchronize_threads(void); extern void switch_ns(int fd); +extern int switch_save_ns(int fd); +extern void switch_close_ns(int fd); extern __thread union tcp_addr this_ip_addr; extern __thread union tcp_addr this_ip_dest; extern int test_family; extern void randomize_buffer(void *buf, size_t buflen); +extern __printf(3, 4) int test_echo(const char *fname, bool append, + const char *fmt, ...); + extern int open_netns(void); extern int unshare_open_netns(void); extern const char veth_name[]; @@ -239,7 +289,7 @@ extern int link_set_up(const char *intf); extern const unsigned int test_server_port; extern int test_wait_fd(int sk, time_t sec, bool write); extern int __test_connect_socket(int sk, const char *device, - void *addr, size_t addr_sz, time_t timeout); + void *addr, size_t addr_sz, bool async); extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz); static inline int test_listen_socket(const union tcp_addr taddr, @@ -281,25 +331,26 @@ static inline int test_listen_socket(const union tcp_addr taddr, * If set to 0 - kernel will try to retransmit SYN number of times, set in * /proc/sys/net/ipv4/tcp_syn_retries * By default set to 1 to make tests pass faster on non-busy machine. + * [in process of removal, don't use in new tests] */ #ifndef TEST_RETRANSMIT_SEC #define TEST_RETRANSMIT_SEC 1 #endif static inline int _test_connect_socket(int sk, const union tcp_addr taddr, - unsigned int port, time_t timeout) + unsigned int port, bool async) { sockaddr_af addr; tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); return __test_connect_socket(sk, veth_name, - (void *)&addr, sizeof(addr), timeout); + (void *)&addr, sizeof(addr), async); } static inline int test_connect_socket(int sk, const union tcp_addr taddr, unsigned int port) { - return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC); + return _test_connect_socket(sk, taddr, port, false); } extern int __test_set_md5(int sk, void *addr, size_t addr_sz, @@ -433,10 +484,7 @@ static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps) } extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec); -extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, - const size_t msg_len, time_t timeout_sec); -extern int test_client_verify(int sk, const size_t msg_len, const size_t nr, - time_t timeout_sec); +extern int test_client_verify(int sk, const size_t msg_len, const size_t nr); struct tcp_ao_key_counters { uint8_t sndid; @@ -462,7 +510,15 @@ struct tcp_ao_counters { size_t nr_keys; struct tcp_ao_key_counters *key_cnts; }; -extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out); + +struct tcp_counters { + struct tcp_ao_counters ao; + uint64_t netns_md5_notfound; + uint64_t netns_md5_unexpected; + uint64_t netns_md5_failure; +}; + +extern int test_get_tcp_counters(int sk, struct tcp_counters *out); #define TEST_CNT_KEY_GOOD BIT(0) #define TEST_CNT_KEY_BAD BIT(1) @@ -476,8 +532,31 @@ extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out); #define TEST_CNT_NS_KEY_NOT_FOUND BIT(9) #define TEST_CNT_NS_AO_REQUIRED BIT(10) #define TEST_CNT_NS_DROPPED_ICMP BIT(11) +#define TEST_CNT_NS_MD5_NOT_FOUND BIT(12) +#define TEST_CNT_NS_MD5_UNEXPECTED BIT(13) +#define TEST_CNT_NS_MD5_FAILURE BIT(14) typedef uint16_t test_cnt; +#define _for_each_counter(f) \ +do { \ + /* per-netns */ \ + f(ao.netns_ao_good, TEST_CNT_NS_GOOD); \ + f(ao.netns_ao_bad, TEST_CNT_NS_BAD); \ + f(ao.netns_ao_key_not_found, TEST_CNT_NS_KEY_NOT_FOUND); \ + f(ao.netns_ao_required, TEST_CNT_NS_AO_REQUIRED); \ + f(ao.netns_ao_dropped_icmp, TEST_CNT_NS_DROPPED_ICMP); \ + /* per-socket */ \ + f(ao.ao_info_pkt_good, TEST_CNT_SOCK_GOOD); \ + f(ao.ao_info_pkt_bad, TEST_CNT_SOCK_BAD); \ + f(ao.ao_info_pkt_key_not_found, TEST_CNT_SOCK_KEY_NOT_FOUND); \ + f(ao.ao_info_pkt_ao_required, TEST_CNT_SOCK_AO_REQUIRED); \ + f(ao.ao_info_pkt_dropped_icmp, TEST_CNT_SOCK_DROPPED_ICMP); \ + /* non-AO */ \ + f(netns_md5_notfound, TEST_CNT_NS_MD5_NOT_FOUND); \ + f(netns_md5_unexpected, TEST_CNT_NS_MD5_UNEXPECTED); \ + f(netns_md5_failure, TEST_CNT_NS_MD5_FAILURE); \ +} while (0) + #define TEST_CNT_AO_GOOD (TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD) #define TEST_CNT_AO_BAD (TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD) #define TEST_CNT_AO_KEY_NOT_FOUND (TEST_CNT_SOCK_KEY_NOT_FOUND | \ @@ -489,34 +568,71 @@ typedef uint16_t test_cnt; #define TEST_CNT_GOOD (TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD) #define TEST_CNT_BAD (TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD) -extern int __test_tcp_ao_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, struct tcp_ao_counters *after, +extern test_cnt test_cmp_counters(struct tcp_counters *before, + struct tcp_counters *after); +extern int test_assert_counters_sk(const char *tst_name, + struct tcp_counters *before, struct tcp_counters *after, test_cnt expected); -extern int test_tcp_ao_key_counters_cmp(const char *tst_name, +extern int test_assert_counters_key(const char *tst_name, struct tcp_ao_counters *before, struct tcp_ao_counters *after, test_cnt expected, int sndid, int rcvid); -extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts); +extern void test_tcp_counters_free(struct tcp_counters *cnts); + +/* + * Polling for netns and socket counters during select()/connect() and also + * client/server messaging. Instead of constant timeout on underlying select(), + * check the counters and return early. This allows to pass the tests where + * timeout is expected without waiting for that fixing timeout (tests speed-up). + * Previously shorter timeouts were used for tests expecting to time out, + * but that leaded to sporadic false positives on counter checks failures, + * as one second timeouts aren't enough for TCP retransmit. + * + * Two sides of the socketpair (client/server) should synchronize failures + * using a shared variable *err, so that they can detect the other side's + * failure. + */ +extern int test_skpair_wait_poll(int sk, bool write, test_cnt cond, + volatile int *err); +extern int _test_skpair_connect_poll(int sk, const char *device, + void *addr, size_t addr_sz, + test_cnt cond, volatile int *err); +static inline int test_skpair_connect_poll(int sk, const union tcp_addr taddr, + unsigned int port, + test_cnt cond, volatile int *err) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); + return _test_skpair_connect_poll(sk, veth_name, + (void *)&addr, sizeof(addr), cond, err); +} + +extern int test_skpair_client(int sk, const size_t msg_len, const size_t nr, + test_cnt cond, volatile int *err); +extern int test_skpair_server(int sk, ssize_t quota, + test_cnt cond, volatile int *err); + /* - * Frees buffers allocated in test_get_tcp_ao_counters(). + * Frees buffers allocated in test_get_tcp_counters(). * The function doesn't expect new keys or keys removed between calls - * to test_get_tcp_ao_counters(). Check key counters manually if they + * to test_get_tcp_counters(). Check key counters manually if they * may change. */ -static inline int test_tcp_ao_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, - struct tcp_ao_counters *after, - test_cnt expected) +static inline int test_assert_counters(const char *tst_name, + struct tcp_counters *before, + struct tcp_counters *after, + test_cnt expected) { int ret; - ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected); + ret = test_assert_counters_sk(tst_name, before, after, expected); if (ret) goto out; - ret = test_tcp_ao_key_counters_cmp(tst_name, before, after, - expected, -1, -1); + ret = test_assert_counters_key(tst_name, &before->ao, &after->ao, + expected, -1, -1); out: - test_tcp_ao_counters_free(before); - test_tcp_ao_counters_free(after); + test_tcp_counters_free(before); + test_tcp_counters_free(after); return ret; } @@ -602,4 +718,115 @@ static inline int test_add_repaired_key(int sk, return test_verify_socket_key(sk, &tmp); } +#define DEFAULT_FTRACE_BUFFER_KB 10000 +#define DEFAULT_TRACER_LINES_ARR 200 +struct test_ftracer; +extern uint64_t ns_cookie1, ns_cookie2; + +enum ftracer_op { + FTRACER_LINE_DISCARD = 0, + FTRACER_LINE_PRESERVE, + FTRACER_EXIT, +}; + +extern struct test_ftracer *create_ftracer(const char *name, + enum ftracer_op (*process_line)(const char *line), + void (*destructor)(struct test_ftracer *tracer), + bool (*expecting_more)(void), + size_t lines_buf_sz, size_t buffer_size_kb); +extern int setup_trace_event(struct test_ftracer *tracer, + const char *event, const char *filter); +extern void destroy_ftracer(struct test_ftracer *tracer); +extern const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer); +extern const char **tracer_get_savedlines(struct test_ftracer *tracer); + +enum trace_events { + /* TCP_HASH_EVENT */ + TCP_HASH_BAD_HEADER = 0, + TCP_HASH_MD5_REQUIRED, + TCP_HASH_MD5_UNEXPECTED, + TCP_HASH_MD5_MISMATCH, + TCP_HASH_AO_REQUIRED, + /* TCP_AO_EVENT */ + TCP_AO_HANDSHAKE_FAILURE, + TCP_AO_WRONG_MACLEN, + TCP_AO_MISMATCH, + TCP_AO_KEY_NOT_FOUND, + TCP_AO_RNEXT_REQUEST, + /* TCP_AO_EVENT_SK */ + TCP_AO_SYNACK_NO_KEY, + /* TCP_AO_EVENT_SNE */ + TCP_AO_SND_SNE_UPDATE, + TCP_AO_RCV_SNE_UPDATE, + __MAX_TRACE_EVENTS +}; + +extern int __trace_event_expect(enum trace_events type, int family, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack, + int keyid, int rnext, int maclen, int sne); + +static inline void trace_hash_event_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, L3index, + fin, syn, rst, psh, ack, + -1, -1, -1, -1); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +static inline void trace_ao_event_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack, + int keyid, int rnext, int maclen) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, L3index, + fin, syn, rst, psh, ack, + keyid, rnext, maclen, -1); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +static inline void trace_ao_event_sk_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, + int keyid, int rnext) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, -1, + -1, -1, -1, -1, -1, + keyid, rnext, -1, -1); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +static inline void trace_ao_event_sne_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int sne) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, -1, + -1, -1, -1, -1, -1, + -1, -1, -1, sne); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +extern int setup_aolib_ftracer(void); + #endif /* _AOLIB_H_ */ diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c new file mode 100644 index 000000000000..27403f875054 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c @@ -0,0 +1,556 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <pthread.h> +#include "aolib.h" + +static const char *trace_event_names[__MAX_TRACE_EVENTS] = { + /* TCP_HASH_EVENT */ + "tcp_hash_bad_header", + "tcp_hash_md5_required", + "tcp_hash_md5_unexpected", + "tcp_hash_md5_mismatch", + "tcp_hash_ao_required", + /* TCP_AO_EVENT */ + "tcp_ao_handshake_failure", + "tcp_ao_wrong_maclen", + "tcp_ao_mismatch", + "tcp_ao_key_not_found", + "tcp_ao_rnext_request", + /* TCP_AO_EVENT_SK */ + "tcp_ao_synack_no_key", + /* TCP_AO_EVENT_SNE */ + "tcp_ao_snd_sne_update", + "tcp_ao_rcv_sne_update" +}; + +struct expected_trace_point { + /* required */ + enum trace_events type; + int family; + union tcp_addr src; + union tcp_addr dst; + + /* optional */ + int src_port; + int dst_port; + int L3index; + + int fin; + int syn; + int rst; + int psh; + int ack; + + int keyid; + int rnext; + int maclen; + int sne; + + size_t matched; +}; + +static struct expected_trace_point *exp_tps; +static size_t exp_tps_nr; +static size_t exp_tps_size; +static pthread_mutex_t exp_tps_mutex = PTHREAD_MUTEX_INITIALIZER; + +int __trace_event_expect(enum trace_events type, int family, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack, + int keyid, int rnext, int maclen, int sne) +{ + struct expected_trace_point new_tp = { + .type = type, + .family = family, + .src = src, + .dst = dst, + .src_port = src_port, + .dst_port = dst_port, + .L3index = L3index, + .fin = fin, + .syn = syn, + .rst = rst, + .psh = psh, + .ack = ack, + .keyid = keyid, + .rnext = rnext, + .maclen = maclen, + .sne = sne, + .matched = 0, + }; + int ret = 0; + + if (!kernel_config_has(KCONFIG_FTRACE)) + return 0; + + pthread_mutex_lock(&exp_tps_mutex); + if (exp_tps_nr == exp_tps_size) { + struct expected_trace_point *tmp; + + if (exp_tps_size == 0) + exp_tps_size = 10; + else + exp_tps_size = exp_tps_size * 1.6; + + tmp = reallocarray(exp_tps, exp_tps_size, sizeof(exp_tps[0])); + if (!tmp) { + ret = -ENOMEM; + goto out; + } + exp_tps = tmp; + } + exp_tps[exp_tps_nr] = new_tp; + exp_tps_nr++; +out: + pthread_mutex_unlock(&exp_tps_mutex); + return ret; +} + +static void free_expected_events(void) +{ + /* We're from the process destructor - not taking the mutex */ + exp_tps_size = 0; + exp_tps = NULL; + free(exp_tps); +} + +struct trace_point { + int family; + union tcp_addr src; + union tcp_addr dst; + unsigned int src_port; + unsigned int dst_port; + int L3index; + unsigned int fin:1, + syn:1, + rst:1, + psh:1, + ack:1; + + unsigned int keyid; + unsigned int rnext; + unsigned int maclen; + + unsigned int sne; +}; + +static bool lookup_expected_event(int event_type, struct trace_point *e) +{ + size_t i; + + pthread_mutex_lock(&exp_tps_mutex); + for (i = 0; i < exp_tps_nr; i++) { + struct expected_trace_point *p = &exp_tps[i]; + size_t sk_size; + + if (p->type != event_type) + continue; + if (p->family != e->family) + continue; + if (p->family == AF_INET) + sk_size = sizeof(p->src.a4); + else + sk_size = sizeof(p->src.a6); + if (memcmp(&p->src, &e->src, sk_size)) + continue; + if (memcmp(&p->dst, &e->dst, sk_size)) + continue; + if (p->src_port >= 0 && p->src_port != e->src_port) + continue; + if (p->dst_port >= 0 && p->dst_port != e->dst_port) + continue; + if (p->L3index >= 0 && p->L3index != e->L3index) + continue; + + if (p->fin >= 0 && p->fin != e->fin) + continue; + if (p->syn >= 0 && p->syn != e->syn) + continue; + if (p->rst >= 0 && p->rst != e->rst) + continue; + if (p->psh >= 0 && p->psh != e->psh) + continue; + if (p->ack >= 0 && p->ack != e->ack) + continue; + + if (p->keyid >= 0 && p->keyid != e->keyid) + continue; + if (p->rnext >= 0 && p->rnext != e->rnext) + continue; + if (p->maclen >= 0 && p->maclen != e->maclen) + continue; + if (p->sne >= 0 && p->sne != e->sne) + continue; + p->matched++; + pthread_mutex_unlock(&exp_tps_mutex); + return true; + } + pthread_mutex_unlock(&exp_tps_mutex); + return false; +} + +static int check_event_type(const char *line) +{ + size_t i; + + /* + * This should have been a set or hashmap, but it's a selftest, + * so... KISS. + */ + for (i = 0; i < __MAX_TRACE_EVENTS; i++) { + if (!strncmp(trace_event_names[i], line, strlen(trace_event_names[i]))) + return i; + } + return -1; +} + +static bool event_has_flags(enum trace_events event) +{ + switch (event) { + case TCP_HASH_BAD_HEADER: + case TCP_HASH_MD5_REQUIRED: + case TCP_HASH_MD5_UNEXPECTED: + case TCP_HASH_MD5_MISMATCH: + case TCP_HASH_AO_REQUIRED: + case TCP_AO_HANDSHAKE_FAILURE: + case TCP_AO_WRONG_MACLEN: + case TCP_AO_MISMATCH: + case TCP_AO_KEY_NOT_FOUND: + case TCP_AO_RNEXT_REQUEST: + return true; + default: + return false; + } +} + +static int tracer_ip_split(int family, char *src, char **addr, char **port) +{ + char *p; + + if (family == AF_INET) { + /* fomat is <addr>:port, i.e.: 10.0.254.1:7015 */ + *addr = src; + p = strchr(src, ':'); + if (!p) { + test_print("Couldn't parse trace event addr:port %s", src); + return -EINVAL; + } + *p++ = '\0'; + *port = p; + return 0; + } + if (family != AF_INET6) + return -EAFNOSUPPORT; + + /* format is [<addr>]:port, i.e.: [2001:db8:254::1]:7013 */ + *addr = strchr(src, '['); + p = strchr(src, ']'); + + if (!p || !*addr) { + test_print("Couldn't parse trace event [addr]:port %s", src); + return -EINVAL; + } + + *addr = *addr + 1; /* '[' */ + *p++ = '\0'; /* ']' */ + if (*p != ':') { + test_print("Couldn't parse trace event :port %s", p); + return -EINVAL; + } + *p++ = '\0'; /* ':' */ + *port = p; + return 0; +} + +static int tracer_scan_address(int family, char *src, + union tcp_addr *dst, unsigned int *port) +{ + char *addr, *port_str; + int ret; + + ret = tracer_ip_split(family, src, &addr, &port_str); + if (ret) + return ret; + + if (inet_pton(family, addr, dst) != 1) { + test_print("Couldn't parse trace event addr %s", addr); + return -EINVAL; + } + errno = 0; + *port = (unsigned int)strtoul(port_str, NULL, 10); + if (errno != 0) { + test_print("Couldn't parse trace event port %s", port_str); + return -errno; + } + return 0; +} + +static int tracer_scan_event(const char *line, enum trace_events event, + struct trace_point *out) +{ + char *src = NULL, *dst = NULL, *family = NULL; + char fin, syn, rst, psh, ack; + int nr_matched, ret = 0; + uint64_t netns_cookie; + + switch (event) { + case TCP_HASH_BAD_HEADER: + case TCP_HASH_MD5_REQUIRED: + case TCP_HASH_MD5_UNEXPECTED: + case TCP_HASH_MD5_MISMATCH: + case TCP_HASH_AO_REQUIRED: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c]", + &netns_cookie, &family, + &src, &dst, &out->L3index, + &fin, &syn, &rst, &psh, &ack); + if (nr_matched != 10) + test_print("Couldn't parse trace event, matched = %d/10", + nr_matched); + break; + } + case TCP_AO_HANDSHAKE_FAILURE: + case TCP_AO_WRONG_MACLEN: + case TCP_AO_MISMATCH: + case TCP_AO_KEY_NOT_FOUND: + case TCP_AO_RNEXT_REQUEST: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u", + &netns_cookie, &family, + &src, &dst, &out->L3index, + &fin, &syn, &rst, &psh, &ack, + &out->keyid, &out->rnext, &out->maclen); + if (nr_matched != 13) + test_print("Couldn't parse trace event, matched = %d/13", + nr_matched); + break; + } + case TCP_AO_SYNACK_NO_KEY: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms keyid=%u rnext=%u", + &netns_cookie, &family, + &src, &dst, &out->keyid, &out->rnext); + if (nr_matched != 6) + test_print("Couldn't parse trace event, matched = %d/6", + nr_matched); + break; + } + case TCP_AO_SND_SNE_UPDATE: + case TCP_AO_RCV_SNE_UPDATE: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms sne=%u", + &netns_cookie, &family, + &src, &dst, &out->sne); + if (nr_matched != 5) + test_print("Couldn't parse trace event, matched = %d/5", + nr_matched); + break; + } + default: + return -1; + } + + if (family) { + if (!strcmp(family, "AF_INET")) { + out->family = AF_INET; + } else if (!strcmp(family, "AF_INET6")) { + out->family = AF_INET6; + } else { + test_print("Couldn't parse trace event family %s", family); + ret = -EINVAL; + goto out_free; + } + } + + if (event_has_flags(event)) { + out->fin = (fin == 'F'); + out->syn = (syn == 'S'); + out->rst = (rst == 'R'); + out->psh = (psh == 'P'); + out->ack = (ack == '.'); + + if ((fin != 'F' && fin != ' ') || + (syn != 'S' && syn != ' ') || + (rst != 'R' && rst != ' ') || + (psh != 'P' && psh != ' ') || + (ack != '.' && ack != ' ')) { + test_print("Couldn't parse trace event flags %c%c%c%c%c", + fin, syn, rst, psh, ack); + ret = -EINVAL; + goto out_free; + } + } + + if (src && tracer_scan_address(out->family, src, &out->src, &out->src_port)) { + ret = -EINVAL; + goto out_free; + } + + if (dst && tracer_scan_address(out->family, dst, &out->dst, &out->dst_port)) { + ret = -EINVAL; + goto out_free; + } + + if (netns_cookie != ns_cookie1 && netns_cookie != ns_cookie2) { + test_print("Net namespace filter for trace event didn't work: %" PRIu64 " != %" PRIu64 " OR %" PRIu64, + netns_cookie, ns_cookie1, ns_cookie2); + ret = -EINVAL; + } + +out_free: + free(src); + free(dst); + free(family); + return ret; +} + +static enum ftracer_op aolib_tracer_process_event(const char *line) +{ + int event_type = check_event_type(line); + struct trace_point tmp = {}; + + if (event_type < 0) + return FTRACER_LINE_PRESERVE; + + if (tracer_scan_event(line, event_type, &tmp)) + return FTRACER_LINE_PRESERVE; + + return lookup_expected_event(event_type, &tmp) ? + FTRACER_LINE_DISCARD : FTRACER_LINE_PRESERVE; +} + +static void dump_trace_event(struct expected_trace_point *e) +{ + char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; + + if (!inet_ntop(e->family, &e->src, src, INET6_ADDRSTRLEN)) + test_error("inet_ntop()"); + if (!inet_ntop(e->family, &e->dst, dst, INET6_ADDRSTRLEN)) + test_error("inet_ntop()"); + test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu", + trace_event_names[e->type], + src, e->src_port, dst, e->dst_port, e->L3index, + e->fin ? "F" : "", e->syn ? "S" : "", e->rst ? "R" : "", + e->psh ? "P" : "", e->ack ? "." : "", + e->keyid, e->rnext, e->maclen, e->sne, e->matched); +} + +static void print_match_stats(bool unexpected_events) +{ + size_t matches_per_type[__MAX_TRACE_EVENTS] = {}; + bool expected_but_none = false; + size_t i, total_matched = 0; + char *stat_line = NULL; + + for (i = 0; i < exp_tps_nr; i++) { + struct expected_trace_point *e = &exp_tps[i]; + + total_matched += e->matched; + matches_per_type[e->type] += e->matched; + if (!e->matched) + expected_but_none = true; + } + for (i = 0; i < __MAX_TRACE_EVENTS; i++) { + if (!matches_per_type[i]) + continue; + stat_line = test_sprintf("%s%s[%zu] ", stat_line ?: "", + trace_event_names[i], + matches_per_type[i]); + if (!stat_line) + test_error("test_sprintf()"); + } + + if (unexpected_events || expected_but_none) { + for (i = 0; i < exp_tps_nr; i++) + dump_trace_event(&exp_tps[i]); + } + + if (unexpected_events) + return; + + if (expected_but_none) + test_fail("Some trace events were expected, but didn't occur"); + else if (total_matched) + test_ok("Trace events matched expectations: %zu %s", + total_matched, stat_line); + else + test_ok("No unexpected trace events during the test run"); +} + +#define dump_events(fmt, ...) \ + __test_print(__test_msg, fmt, ##__VA_ARGS__) +static void check_free_events(struct test_ftracer *tracer) +{ + const char **lines; + size_t nr; + + if (!kernel_config_has(KCONFIG_FTRACE)) { + test_skip("kernel config doesn't have ftrace - no checks"); + return; + } + + nr = tracer_get_savedlines_nr(tracer); + lines = tracer_get_savedlines(tracer); + print_match_stats(!!nr); + if (!nr) + return; + + errno = 0; + test_xfail("Trace events [%zu] were not expected:", nr); + while (nr) + dump_events("\t%s", lines[--nr]); +} + +static int setup_tcp_trace_events(struct test_ftracer *tracer) +{ + char *filter; + size_t i; + int ret; + + filter = test_sprintf("net_cookie == %zu || net_cookie == %zu", + ns_cookie1, ns_cookie2); + if (!filter) + return -ENOMEM; + + for (i = 0; i < __MAX_TRACE_EVENTS; i++) { + char *event_name = test_sprintf("tcp/%s", trace_event_names[i]); + + if (!event_name) { + ret = -ENOMEM; + break; + } + ret = setup_trace_event(tracer, event_name, filter); + free(event_name); + if (ret) + break; + } + + free(filter); + return ret; +} + +static void aolib_tracer_destroy(struct test_ftracer *tracer) +{ + check_free_events(tracer); + free_expected_events(); +} + +static bool aolib_tracer_expecting_more(void) +{ + size_t i; + + for (i = 0; i < exp_tps_nr; i++) + if (!exp_tps[i].matched) + return true; + return false; +} + +int setup_aolib_ftracer(void) +{ + struct test_ftracer *f; + + f = create_ftracer("aolib", aolib_tracer_process_event, + aolib_tracer_destroy, aolib_tracer_expecting_more, + DEFAULT_FTRACE_BUFFER_KB, DEFAULT_TRACER_LINES_ARR); + if (!f) + return -1; + + return setup_tcp_trace_events(f); +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c new file mode 100644 index 000000000000..e4d0b173bc94 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c @@ -0,0 +1,543 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <pthread.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <sys/time.h> +#include <unistd.h> +#include "../../../../../include/linux/kernel.h" +#include "aolib.h" + +static char ftrace_path[] = "ksft-ftrace-XXXXXX"; +static bool ftrace_mounted; +uint64_t ns_cookie1, ns_cookie2; + +struct test_ftracer { + pthread_t tracer_thread; + int error; + char *instance_path; + FILE *trace_pipe; + + enum ftracer_op (*process_line)(const char *line); + void (*destructor)(struct test_ftracer *tracer); + bool (*expecting_more)(void); + + char **saved_lines; + size_t saved_lines_size; + size_t next_line_ind; + + pthread_cond_t met_all_expected; + pthread_mutex_t met_all_expected_lock; + + struct test_ftracer *next; +}; + +static struct test_ftracer *ftracers; +static pthread_mutex_t ftracers_lock = PTHREAD_MUTEX_INITIALIZER; + +static int mount_ftrace(void) +{ + if (!mkdtemp(ftrace_path)) + test_error("Can't create temp dir"); + + if (mount("tracefs", ftrace_path, "tracefs", 0, "rw")) + return -errno; + + ftrace_mounted = true; + + return 0; +} + +static void unmount_ftrace(void) +{ + if (ftrace_mounted && umount(ftrace_path)) + test_print("Failed on cleanup: can't unmount tracefs: %m"); + + if (rmdir(ftrace_path)) + test_error("Failed on cleanup: can't remove ftrace dir %s", + ftrace_path); +} + +struct opts_list_t { + char *opt_name; + struct opts_list_t *next; +}; + +static int disable_trace_options(const char *ftrace_path) +{ + struct opts_list_t *opts_list = NULL; + char *fopts, *line = NULL; + size_t buf_len = 0; + ssize_t line_len; + int ret = 0; + FILE *opts; + + fopts = test_sprintf("%s/%s", ftrace_path, "trace_options"); + if (!fopts) + return -ENOMEM; + + opts = fopen(fopts, "r+"); + if (!opts) { + ret = -errno; + goto out_free; + } + + while ((line_len = getline(&line, &buf_len, opts)) != -1) { + struct opts_list_t *tmp; + + if (!strncmp(line, "no", 2)) + continue; + + tmp = malloc(sizeof(*tmp)); + if (!tmp) { + ret = -ENOMEM; + goto out_free_opts_list; + } + tmp->next = opts_list; + tmp->opt_name = test_sprintf("no%s", line); + if (!tmp->opt_name) { + ret = -ENOMEM; + free(tmp); + goto out_free_opts_list; + } + opts_list = tmp; + } + + while (opts_list) { + struct opts_list_t *tmp = opts_list; + + fseek(opts, 0, SEEK_SET); + fwrite(tmp->opt_name, 1, strlen(tmp->opt_name), opts); + + opts_list = opts_list->next; + free(tmp->opt_name); + free(tmp); + } + +out_free_opts_list: + while (opts_list) { + struct opts_list_t *tmp = opts_list; + + opts_list = opts_list->next; + free(tmp->opt_name); + free(tmp); + } + free(line); + fclose(opts); +out_free: + free(fopts); + return ret; +} + +static int setup_buffer_size(const char *ftrace_path, size_t sz) +{ + char *fbuf_size = test_sprintf("%s/buffer_size_kb", ftrace_path); + int ret; + + if (!fbuf_size) + return -1; + + ret = test_echo(fbuf_size, 0, "%zu", sz); + free(fbuf_size); + return ret; +} + +static int setup_ftrace_instance(struct test_ftracer *tracer, const char *name) +{ + char *tmp; + + tmp = test_sprintf("%s/instances/ksft-%s-XXXXXX", ftrace_path, name); + if (!tmp) + return -ENOMEM; + + tracer->instance_path = mkdtemp(tmp); + if (!tracer->instance_path) { + free(tmp); + return -errno; + } + + return 0; +} + +static void remove_ftrace_instance(struct test_ftracer *tracer) +{ + if (rmdir(tracer->instance_path)) + test_print("Failed on cleanup: can't remove ftrace instance %s", + tracer->instance_path); + free(tracer->instance_path); +} + +static void tracer_cleanup(void *arg) +{ + struct test_ftracer *tracer = arg; + + fclose(tracer->trace_pipe); +} + +static void tracer_set_error(struct test_ftracer *tracer, int error) +{ + if (!tracer->error) + tracer->error = error; +} + +const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer) +{ + return tracer->next_line_ind; +} + +const char **tracer_get_savedlines(struct test_ftracer *tracer) +{ + return (const char **)tracer->saved_lines; +} + +static void *tracer_thread_func(void *arg) +{ + struct test_ftracer *tracer = arg; + + pthread_cleanup_push(tracer_cleanup, arg); + + while (tracer->next_line_ind < tracer->saved_lines_size) { + char **lp = &tracer->saved_lines[tracer->next_line_ind]; + enum ftracer_op op; + size_t buf_len = 0; + ssize_t line_len; + + line_len = getline(lp, &buf_len, tracer->trace_pipe); + if (line_len == -1) + break; + + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); + op = tracer->process_line(*lp); + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + + if (tracer->expecting_more) { + pthread_mutex_lock(&tracer->met_all_expected_lock); + if (!tracer->expecting_more()) + pthread_cond_signal(&tracer->met_all_expected); + pthread_mutex_unlock(&tracer->met_all_expected_lock); + } + + if (op == FTRACER_LINE_DISCARD) + continue; + if (op == FTRACER_EXIT) + break; + if (op != FTRACER_LINE_PRESERVE) + test_error("unexpected tracer command %d", op); + + tracer->next_line_ind++; + buf_len = 0; + } + test_print("too many lines in ftracer buffer %zu, exiting tracer", + tracer->next_line_ind); + + pthread_cleanup_pop(1); + return NULL; +} + +static int setup_trace_thread(struct test_ftracer *tracer) +{ + int ret = 0; + char *path; + + path = test_sprintf("%s/trace_pipe", tracer->instance_path); + if (!path) + return -ENOMEM; + + tracer->trace_pipe = fopen(path, "r"); + if (!tracer->trace_pipe) { + ret = -errno; + goto out_free; + } + + if (pthread_create(&tracer->tracer_thread, NULL, + tracer_thread_func, (void *)tracer)) { + ret = -errno; + fclose(tracer->trace_pipe); + } + +out_free: + free(path); + return ret; +} + +static void stop_trace_thread(struct test_ftracer *tracer) +{ + void *res; + + if (pthread_cancel(tracer->tracer_thread)) { + test_print("Can't stop tracer pthread: %m"); + tracer_set_error(tracer, -errno); + } + if (pthread_join(tracer->tracer_thread, &res)) { + test_print("Can't join tracer pthread: %m"); + tracer_set_error(tracer, -errno); + } + if (res != PTHREAD_CANCELED) { + test_print("Tracer thread wasn't canceled"); + tracer_set_error(tracer, -errno); + } + if (tracer->error) + test_fail("tracer errored by %s", strerror(tracer->error)); +} + +static void final_wait_for_events(struct test_ftracer *tracer, + unsigned timeout_sec) +{ + struct timespec timeout; + struct timeval now; + int ret = 0; + + if (!tracer->expecting_more) + return; + + pthread_mutex_lock(&tracer->met_all_expected_lock); + gettimeofday(&now, NULL); + timeout.tv_sec = now.tv_sec + timeout_sec; + timeout.tv_nsec = now.tv_usec * 1000; + + while (tracer->expecting_more() && ret != ETIMEDOUT) + ret = pthread_cond_timedwait(&tracer->met_all_expected, + &tracer->met_all_expected_lock, &timeout); + pthread_mutex_unlock(&tracer->met_all_expected_lock); +} + +int setup_trace_event(struct test_ftracer *tracer, + const char *event, const char *filter) +{ + char *enable_path, *filter_path, *instance = tracer->instance_path; + int ret; + + enable_path = test_sprintf("%s/events/%s/enable", instance, event); + if (!enable_path) + return -ENOMEM; + + filter_path = test_sprintf("%s/events/%s/filter", instance, event); + if (!filter_path) { + ret = -ENOMEM; + goto out_free; + } + + ret = test_echo(filter_path, 0, "%s", filter); + if (!ret) + ret = test_echo(enable_path, 0, "1"); + +out_free: + free(filter_path); + free(enable_path); + return ret; +} + +struct test_ftracer *create_ftracer(const char *name, + enum ftracer_op (*process_line)(const char *line), + void (*destructor)(struct test_ftracer *tracer), + bool (*expecting_more)(void), + size_t lines_buf_sz, size_t buffer_size_kb) +{ + struct test_ftracer *tracer; + int err; + + /* XXX: separate __create_ftracer() helper and do here + * if (!kernel_config_has(KCONFIG_FTRACE)) + * return NULL; + */ + + tracer = malloc(sizeof(*tracer)); + if (!tracer) { + test_print("malloc()"); + return NULL; + } + + memset(tracer, 0, sizeof(*tracer)); + + err = setup_ftrace_instance(tracer, name); + if (err) { + test_print("setup_ftrace_instance(): %d", err); + goto err_free; + } + + err = disable_trace_options(tracer->instance_path); + if (err) { + test_print("disable_trace_options(): %d", err); + goto err_remove; + } + + err = setup_buffer_size(tracer->instance_path, buffer_size_kb); + if (err) { + test_print("disable_trace_options(): %d", err); + goto err_remove; + } + + tracer->saved_lines = calloc(lines_buf_sz, sizeof(tracer->saved_lines[0])); + if (!tracer->saved_lines) { + test_print("calloc()"); + goto err_remove; + } + tracer->saved_lines_size = lines_buf_sz; + + tracer->process_line = process_line; + tracer->destructor = destructor; + tracer->expecting_more = expecting_more; + + err = pthread_cond_init(&tracer->met_all_expected, NULL); + if (err) { + test_print("pthread_cond_init(): %d", err); + goto err_free_lines; + } + + err = pthread_mutex_init(&tracer->met_all_expected_lock, NULL); + if (err) { + test_print("pthread_mutex_init(): %d", err); + goto err_cond_destroy; + } + + err = setup_trace_thread(tracer); + if (err) { + test_print("setup_trace_thread(): %d", err); + goto err_mutex_destroy; + } + + pthread_mutex_lock(&ftracers_lock); + tracer->next = ftracers; + ftracers = tracer; + pthread_mutex_unlock(&ftracers_lock); + + return tracer; + +err_mutex_destroy: + pthread_mutex_destroy(&tracer->met_all_expected_lock); +err_cond_destroy: + pthread_cond_destroy(&tracer->met_all_expected); +err_free_lines: + free(tracer->saved_lines); +err_remove: + remove_ftrace_instance(tracer); +err_free: + free(tracer); + return NULL; +} + +static void __destroy_ftracer(struct test_ftracer *tracer) +{ + size_t i; + + final_wait_for_events(tracer, TEST_TIMEOUT_SEC); + stop_trace_thread(tracer); + remove_ftrace_instance(tracer); + if (tracer->destructor) + tracer->destructor(tracer); + for (i = 0; i < tracer->saved_lines_size; i++) + free(tracer->saved_lines[i]); + pthread_cond_destroy(&tracer->met_all_expected); + pthread_mutex_destroy(&tracer->met_all_expected_lock); + free(tracer); +} + +void destroy_ftracer(struct test_ftracer *tracer) +{ + pthread_mutex_lock(&ftracers_lock); + if (tracer == ftracers) { + ftracers = tracer->next; + } else { + struct test_ftracer *f = ftracers; + + while (f->next != tracer) { + if (!f->next) + test_error("tracers list corruption or double free %p", tracer); + f = f->next; + } + f->next = tracer->next; + } + tracer->next = NULL; + pthread_mutex_unlock(&ftracers_lock); + __destroy_ftracer(tracer); +} + +static void destroy_all_ftracers(void) +{ + struct test_ftracer *f; + + pthread_mutex_lock(&ftracers_lock); + f = ftracers; + ftracers = NULL; + pthread_mutex_unlock(&ftracers_lock); + + while (f) { + struct test_ftracer *n = f->next; + + f->next = NULL; + __destroy_ftracer(f); + f = n; + } +} + +static void test_unset_tracing(void) +{ + destroy_all_ftracers(); + unmount_ftrace(); +} + +int test_setup_tracing(void) +{ + /* + * Just a basic protection - this should be called only once from + * lib/kconfig. Not thread safe, which is fine as it's early, before + * threads are created. + */ + static int already_set; + int err; + + if (already_set) + return -1; + + /* Needs net-namespace cookies for filters */ + if (ns_cookie1 == ns_cookie2) { + test_print("net-namespace cookies: %" PRIu64 " == %" PRIu64 ", can't set up tracing", + ns_cookie1, ns_cookie2); + return -1; + } + + already_set = 1; + + test_add_destructor(test_unset_tracing); + + err = mount_ftrace(); + if (err) { + test_print("failed to mount_ftrace(): %d", err); + return err; + } + + return setup_aolib_ftracer(); +} + +static int get_ns_cookie(int nsfd, uint64_t *out) +{ + int old_ns = switch_save_ns(nsfd); + socklen_t size = sizeof(*out); + int sk; + + sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + test_print("socket(): %m"); + return -errno; + } + + if (getsockopt(sk, SOL_SOCKET, SO_NETNS_COOKIE, out, &size)) { + test_print("getsockopt(SO_NETNS_COOKIE): %m"); + close(sk); + return -errno; + } + + close(sk); + switch_close_ns(old_ns); + return 0; +} + +void test_init_ftrace(int nsfd1, int nsfd2) +{ + get_ns_cookie(nsfd1, &ns_cookie1); + get_ns_cookie(nsfd2, &ns_cookie2); + /* Populate kernel config state */ + kernel_config_has(KCONFIG_FTRACE); +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c index f279ffc3843b..9f1c175846f8 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c +++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c @@ -6,7 +6,7 @@ #include "aolib.h" struct kconfig_t { - int _errno; /* the returned error if not supported */ + int _error; /* negative errno if not supported */ int (*check_kconfig)(int *error); }; @@ -62,7 +62,7 @@ static int has_tcp_ao(int *err) memcpy(&tmp.addr, &addr, sizeof(addr)); *err = 0; if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) { - *err = errno; + *err = -errno; if (errno != ENOPROTOOPT) ret = -errno; } @@ -87,7 +87,7 @@ static int has_tcp_md5(int *err) */ *err = 0; if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) { - *err = errno; + *err = -errno; if (errno != ENOPROTOOPT && errno == ENOMEM) { test_print("setsockopt(TCP_MD5SIG_EXT): %m"); ret = -errno; @@ -116,13 +116,21 @@ static int has_vrfs(int *err) return ret; } +static int has_ftrace(int *err) +{ + *err = test_setup_tracing(); + return 0; +} + +#define KCONFIG_UNKNOWN 1 static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER; static struct kconfig_t kconfig[__KCONFIG_LAST__] = { - { -1, has_net_ns }, - { -1, has_veth }, - { -1, has_tcp_ao }, - { -1, has_tcp_md5 }, - { -1, has_vrfs }, + { KCONFIG_UNKNOWN, has_net_ns }, + { KCONFIG_UNKNOWN, has_veth }, + { KCONFIG_UNKNOWN, has_tcp_ao }, + { KCONFIG_UNKNOWN, has_tcp_md5 }, + { KCONFIG_UNKNOWN, has_vrfs }, + { KCONFIG_UNKNOWN, has_ftrace }, }; const char *tests_skip_reason[__KCONFIG_LAST__] = { @@ -131,6 +139,7 @@ const char *tests_skip_reason[__KCONFIG_LAST__] = { "Tests require TCP-AO support (CONFIG_TCP_AO)", "setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)", "VRFs are not supported (CONFIG_NET_VRF)", + "Ftrace points are not supported (CONFIG_TRACEPOINTS)", }; bool kernel_config_has(enum test_needs_kconfig k) @@ -138,11 +147,11 @@ bool kernel_config_has(enum test_needs_kconfig k) bool ret; pthread_mutex_lock(&kconfig_lock); - if (kconfig[k]._errno == -1) { - if (kconfig[k].check_kconfig(&kconfig[k]._errno)) + if (kconfig[k]._error == KCONFIG_UNKNOWN) { + if (kconfig[k].check_kconfig(&kconfig[k]._error)) test_error("Failed to initialize kconfig %u", k); } - ret = kconfig[k]._errno == 0; + ret = kconfig[k]._error == 0; pthread_mutex_unlock(&kconfig_lock); return ret; } diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c index e408b9243b2c..a27cc03c9fbd 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/setup.c +++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c @@ -111,7 +111,7 @@ static void sig_int(int signo) int open_netns(void) { - const char *netns_path = "/proc/self/ns/net"; + const char *netns_path = "/proc/thread-self/ns/net"; int fd; fd = open(netns_path, O_RDONLY); @@ -142,6 +142,13 @@ int switch_save_ns(int new_ns) return ret; } +void switch_close_ns(int fd) +{ + if (setns(fd, CLONE_NEWNET)) + test_error("setns()"); + close(fd); +} + static int nsfd_outside = -1; static int nsfd_parent = -1; static int nsfd_child = -1; @@ -243,9 +250,9 @@ void __test_init(unsigned int ntests, int family, unsigned int prefix, test_print("rand seed %u", (unsigned int)seed); srand(seed); - ksft_print_header(); init_namespaces(); + test_init_ftrace(nsfd_parent, nsfd_child); if (add_veth(veth_name, nsfd_parent, nsfd_child)) test_error("Failed to add veth"); @@ -296,7 +303,7 @@ static bool is_optmem_namespaced(void) int old_ns = switch_save_ns(nsfd_child); optmem_ns = !access(optmem_file, F_OK); - switch_ns(old_ns); + switch_close_ns(old_ns); } return !!optmem_ns; } @@ -317,7 +324,7 @@ size_t test_get_optmem(void) test_error("can't read from %s", optmem_file); fclose(foptmem); if (!is_optmem_namespaced()) - switch_ns(old_ns); + switch_close_ns(old_ns); return ret; } @@ -339,7 +346,7 @@ static void __test_set_optmem(size_t new, size_t *old) test_error("can't write %zu to %s", new, optmem_file); fclose(foptmem); if (!is_optmem_namespaced()) - switch_ns(old_ns); + switch_close_ns(old_ns); } static void test_revert_optmem(void) diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c index 15aeb0963058..ef8e9031d47a 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/sock.c +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -34,10 +34,8 @@ int __test_listen_socket(int backlog, void *addr, size_t addr_sz) return sk; } -int test_wait_fd(int sk, time_t sec, bool write) +static int __test_wait_fd(int sk, struct timeval *tv, bool write) { - struct timeval tv = { .tv_sec = sec }; - struct timeval *ptv = NULL; fd_set fds, efds; int ret; socklen_t slen = sizeof(ret); @@ -47,14 +45,11 @@ int test_wait_fd(int sk, time_t sec, bool write) FD_ZERO(&efds); FD_SET(sk, &efds); - if (sec) - ptv = &tv; - errno = 0; if (write) - ret = select(sk + 1, NULL, &fds, &efds, ptv); + ret = select(sk + 1, NULL, &fds, &efds, tv); else - ret = select(sk + 1, &fds, NULL, &efds, ptv); + ret = select(sk + 1, &fds, NULL, &efds, tv); if (ret < 0) return -errno; if (ret == 0) { @@ -69,8 +64,54 @@ int test_wait_fd(int sk, time_t sec, bool write) return 0; } +int test_wait_fd(int sk, time_t sec, bool write) +{ + struct timeval tv = { .tv_sec = sec, }; + + return __test_wait_fd(sk, sec ? &tv : NULL, write); +} + +static bool __skpair_poll_should_stop(int sk, struct tcp_counters *c, + test_cnt condition) +{ + struct tcp_counters c2; + test_cnt diff; + + if (test_get_tcp_counters(sk, &c2)) + test_error("test_get_tcp_counters()"); + + diff = test_cmp_counters(c, &c2); + test_tcp_counters_free(&c2); + return (diff & condition) == condition; +} + +/* How often wake up and check netns counters & paired (*err) */ +#define POLL_USEC 150 +static int __test_skpair_poll(int sk, bool write, uint64_t timeout, + struct tcp_counters *c, test_cnt cond, + volatile int *err) +{ + uint64_t t; + + for (t = 0; t <= timeout * 1000000; t += POLL_USEC) { + struct timeval tv = { .tv_usec = POLL_USEC, }; + int ret; + + ret = __test_wait_fd(sk, &tv, write); + if (ret != -ETIMEDOUT) + return ret; + if (c && cond && __skpair_poll_should_stop(sk, c, cond)) + break; + if (err && *err) + return *err; + } + if (err) + *err = -ETIMEDOUT; + return -ETIMEDOUT; +} + int __test_connect_socket(int sk, const char *device, - void *addr, size_t addr_sz, time_t timeout) + void *addr, size_t addr_sz, bool async) { long flags; int err; @@ -82,15 +123,6 @@ int __test_connect_socket(int sk, const char *device, test_error("setsockopt(SO_BINDTODEVICE, %s)", device); } - if (!timeout) { - err = connect(sk, addr, addr_sz); - if (err) { - err = -errno; - goto out; - } - return 0; - } - flags = fcntl(sk, F_GETFL); if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) test_error("fcntl()"); @@ -100,9 +132,9 @@ int __test_connect_socket(int sk, const char *device, err = -errno; goto out; } - if (timeout < 0) + if (async) return sk; - err = test_wait_fd(sk, timeout, 1); + err = test_wait_fd(sk, TEST_TIMEOUT_SEC, 1); if (err) goto out; } @@ -113,6 +145,45 @@ out: return err; } +int test_skpair_wait_poll(int sk, bool write, + test_cnt cond, volatile int *err) +{ + struct tcp_counters c; + int ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + + ret = __test_skpair_poll(sk, write, TEST_TIMEOUT_SEC, &c, cond, err); + test_tcp_counters_free(&c); + return ret; +} + +int _test_skpair_connect_poll(int sk, const char *device, + void *addr, size_t addr_sz, + test_cnt condition, volatile int *err) +{ + struct tcp_counters c; + int ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + ret = __test_connect_socket(sk, device, addr, addr_sz, true); + if (ret < 0) { + test_tcp_counters_free(&c); + return (*err = ret); + } + ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, &c, condition, err); + if (ret < 0) + close(sk); + test_tcp_counters_free(&c); + return ret; +} + int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix, int vrf, const char *password) { @@ -333,12 +404,12 @@ do { \ return 0; } -int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) +int test_get_tcp_counters(int sk, struct tcp_counters *out) { struct tcp_ao_getsockopt *key_dump; socklen_t key_dump_sz = sizeof(*key_dump); struct tcp_ao_info_opt info = {}; - bool c1, c2, c3, c4, c5; + bool c1, c2, c3, c4, c5, c6, c7, c8; struct netstat *ns; int err, nr_keys; @@ -346,25 +417,30 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) /* per-netns */ ns = netstat_read(); - out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1); - out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2); - out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3); - out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4); - out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5); + out->ao.netns_ao_good = netstat_get(ns, "TCPAOGood", &c1); + out->ao.netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2); + out->ao.netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3); + out->ao.netns_ao_required = netstat_get(ns, "TCPAORequired", &c4); + out->ao.netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5); + out->netns_md5_notfound = netstat_get(ns, "TCPMD5NotFound", &c6); + out->netns_md5_unexpected = netstat_get(ns, "TCPMD5Unexpected", &c7); + out->netns_md5_failure = netstat_get(ns, "TCPMD5Failure", &c8); netstat_free(ns); - if (c1 || c2 || c3 || c4 || c5) + if (c1 || c2 || c3 || c4 || c5 || c6 || c7 || c8) return -EOPNOTSUPP; err = test_get_ao_info(sk, &info); + if (err == -ENOENT) + return 0; if (err) return err; /* per-socket */ - out->ao_info_pkt_good = info.pkt_good; - out->ao_info_pkt_bad = info.pkt_bad; - out->ao_info_pkt_key_not_found = info.pkt_key_not_found; - out->ao_info_pkt_ao_required = info.pkt_ao_required; - out->ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp; + out->ao.ao_info_pkt_good = info.pkt_good; + out->ao.ao_info_pkt_bad = info.pkt_bad; + out->ao.ao_info_pkt_key_not_found = info.pkt_key_not_found; + out->ao.ao_info_pkt_ao_required = info.pkt_ao_required; + out->ao.ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp; /* per-key */ nr_keys = test_get_ao_keys_nr(sk); @@ -372,14 +448,13 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) return nr_keys; if (nr_keys == 0) test_error("test_get_ao_keys_nr() == 0"); - out->nr_keys = (size_t)nr_keys; + out->ao.nr_keys = (size_t)nr_keys; key_dump = calloc(nr_keys, key_dump_sz); if (!key_dump) return -errno; key_dump[0].nkeys = nr_keys; key_dump[0].get_all = 1; - key_dump[0].get_all = 1; err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, key_dump, &key_dump_sz); if (err) { @@ -387,72 +462,84 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) return -errno; } - out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0])); - if (!out->key_cnts) { + out->ao.key_cnts = calloc(nr_keys, sizeof(out->ao.key_cnts[0])); + if (!out->ao.key_cnts) { free(key_dump); return -errno; } while (nr_keys--) { - out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid; - out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid; - out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good; - out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad; + out->ao.key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid; + out->ao.key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid; + out->ao.key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good; + out->ao.key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad; } free(key_dump); return 0; } -int __test_tcp_ao_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, - struct tcp_ao_counters *after, - test_cnt expected) +test_cnt test_cmp_counters(struct tcp_counters *before, + struct tcp_counters *after) { -#define __cmp_ao(cnt, expecting_inc) \ +#define __cmp(cnt, e_cnt) \ +do { \ + if (before->cnt > after->cnt) \ + test_error("counter " __stringify(cnt) " decreased"); \ + if (before->cnt != after->cnt) \ + ret |= e_cnt; \ +} while (0) + + test_cnt ret = 0; + size_t i; + + if (before->ao.nr_keys != after->ao.nr_keys) + test_error("the number of keys has changed"); + + _for_each_counter(__cmp); + + i = before->ao.nr_keys; + while (i--) { + __cmp(ao.key_cnts[i].pkt_good, TEST_CNT_KEY_GOOD); + __cmp(ao.key_cnts[i].pkt_bad, TEST_CNT_KEY_BAD); + } +#undef __cmp + return ret; +} + +int test_assert_counters_sk(const char *tst_name, + struct tcp_counters *before, + struct tcp_counters *after, + test_cnt expected) +{ +#define __cmp_ao(cnt, e_cnt) \ do { \ if (before->cnt > after->cnt) { \ test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \ - tst_name ?: "", before->cnt, after->cnt); \ + tst_name ?: "", before->cnt, after->cnt); \ return -1; \ } \ - if ((before->cnt != after->cnt) != (expecting_inc)) { \ + if ((before->cnt != after->cnt) != !!(expected & e_cnt)) { \ test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \ - tst_name ?: "", (expecting_inc) ? "" : "not ", \ + tst_name ?: "", (expected & e_cnt) ? "" : "not ", \ before->cnt, after->cnt); \ return -1; \ } \ -} while(0) +} while (0) errno = 0; - /* per-netns */ - __cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD)); - __cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD)); - __cmp_ao(netns_ao_key_not_found, - !!(expected & TEST_CNT_NS_KEY_NOT_FOUND)); - __cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED)); - __cmp_ao(netns_ao_dropped_icmp, - !!(expected & TEST_CNT_NS_DROPPED_ICMP)); - /* per-socket */ - __cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD)); - __cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD)); - __cmp_ao(ao_info_pkt_key_not_found, - !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND)); - __cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED)); - __cmp_ao(ao_info_pkt_dropped_icmp, - !!(expected & TEST_CNT_SOCK_DROPPED_ICMP)); + _for_each_counter(__cmp_ao); return 0; #undef __cmp_ao } -int test_tcp_ao_key_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, - struct tcp_ao_counters *after, - test_cnt expected, - int sndid, int rcvid) +int test_assert_counters_key(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected, int sndid, int rcvid) { size_t i; -#define __cmp_ao(i, cnt, expecting_inc) \ +#define __cmp_ao(i, cnt, e_cnt) \ do { \ if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) { \ test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \ @@ -462,16 +549,16 @@ do { \ before->key_cnts[i].rcvid); \ return -1; \ } \ - if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) { \ + if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != !!(expected & e_cnt)) { \ test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \ - tst_name ?: "", (expecting_inc) ? "" : "not ",\ + tst_name ?: "", (expected & e_cnt) ? "" : "not ",\ before->key_cnts[i].cnt, \ after->key_cnts[i].cnt, \ before->key_cnts[i].sndid, \ before->key_cnts[i].rcvid); \ return -1; \ } \ -} while(0) +} while (0) if (before->nr_keys != after->nr_keys) { test_fail("%s: Keys changed on the socket %zu != %zu", @@ -486,20 +573,22 @@ do { \ continue; if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid) continue; - __cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD)); - __cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD)); + __cmp_ao(i, pkt_good, TEST_CNT_KEY_GOOD); + __cmp_ao(i, pkt_bad, TEST_CNT_KEY_BAD); } return 0; #undef __cmp_ao } -void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts) +void test_tcp_counters_free(struct tcp_counters *cnts) { - free(cnts->key_cnts); + free(cnts->ao.key_cnts); } #define TEST_BUF_SIZE 4096 -ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) +static ssize_t _test_server_run(int sk, ssize_t quota, struct tcp_counters *c, + test_cnt cond, volatile int *err, + time_t timeout_sec) { ssize_t total = 0; @@ -508,7 +597,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) ssize_t bytes, sent; int ret; - ret = test_wait_fd(sk, timeout_sec, 0); + ret = __test_skpair_poll(sk, 0, timeout_sec, c, cond, err); if (ret) return ret; @@ -519,7 +608,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) if (bytes == 0) break; - ret = test_wait_fd(sk, timeout_sec, 1); + ret = __test_skpair_poll(sk, 1, timeout_sec, c, cond, err); if (ret) return ret; @@ -534,13 +623,41 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) return total; } -ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, - const size_t msg_len, time_t timeout_sec) +ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) +{ + return _test_server_run(sk, quota, NULL, 0, NULL, + timeout_sec ?: TEST_TIMEOUT_SEC); +} + +int test_skpair_server(int sk, ssize_t quota, test_cnt cond, volatile int *err) +{ + struct tcp_counters c; + ssize_t ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + + ret = _test_server_run(sk, quota, &c, cond, err, TEST_TIMEOUT_SEC); + test_tcp_counters_free(&c); + return ret; +} + +static ssize_t test_client_loop(int sk, size_t buf_sz, const size_t msg_len, + struct tcp_counters *c, test_cnt cond, + volatile int *err) { char msg[msg_len]; int nodelay = 1; + char *buf; size_t i; + buf = alloca(buf_sz); + if (!buf) + return -ENOMEM; + randomize_buffer(buf, buf_sz); + if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay))) test_error("setsockopt(TCP_NODELAY)"); @@ -548,7 +665,7 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, size_t sent, bytes = min(msg_len, buf_sz - i); int ret; - ret = test_wait_fd(sk, timeout_sec, 1); + ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, c, cond, err); if (ret) return ret; @@ -562,7 +679,8 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, do { ssize_t got; - ret = test_wait_fd(sk, timeout_sec, 0); + ret = __test_skpair_poll(sk, 0, TEST_TIMEOUT_SEC, + c, cond, err); if (ret) return ret; @@ -581,15 +699,31 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, return i; } -int test_client_verify(int sk, const size_t msg_len, const size_t nr, - time_t timeout_sec) +int test_client_verify(int sk, const size_t msg_len, const size_t nr) { size_t buf_sz = msg_len * nr; - char *buf = alloca(buf_sz); ssize_t ret; - randomize_buffer(buf, buf_sz); - ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec); + ret = test_client_loop(sk, buf_sz, msg_len, NULL, 0, NULL); + if (ret < 0) + return (int)ret; + return ret != buf_sz ? -1 : 0; +} + +int test_skpair_client(int sk, const size_t msg_len, const size_t nr, + test_cnt cond, volatile int *err) +{ + struct tcp_counters c; + size_t buf_sz = msg_len * nr; + ssize_t ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + + ret = test_client_loop(sk, buf_sz, msg_len, &c, cond, err); + test_tcp_counters_free(&c); if (ret < 0) return (int)ret; return ret != buf_sz ? -1 : 0; diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c index 372daca525f5..bdf5522c9213 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/utils.c +++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c @@ -21,6 +21,32 @@ void randomize_buffer(void *buf, size_t buflen) } } +__printf(3, 4) int test_echo(const char *fname, bool append, + const char *fmt, ...) +{ + size_t len, written; + va_list vargs; + char *msg; + FILE *f; + + f = fopen(fname, append ? "a" : "w"); + if (!f) + return -errno; + + va_start(vargs, fmt); + msg = test_snprintf(fmt, vargs); + va_end(vargs); + if (!msg) { + fclose(f); + return -1; + } + len = strlen(msg); + written = fwrite(msg, 1, len, f); + fclose(f); + free(msg); + return written == len ? 0 : -1; +} + const struct sockaddr_in6 addr_any6 = { .sin6_family = AF_INET6, }; diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c index 8fdc808df325..9a059b6c4523 100644 --- a/tools/testing/selftests/net/tcp_ao/restore.c +++ b/tools/testing/selftests/net/tcp_ao/restore.c @@ -16,11 +16,11 @@ const size_t quota = nr_packets * msg_len; static void try_server_run(const char *tst_name, unsigned int port, fault_t inj, test_cnt cnt_expected) { + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; const char *cnt_name = "TCPAOGood"; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt, after_cnt; - int sk, lsk; - time_t timeout; + int sk, lsk, dummy; ssize_t bytes; if (fault(TIMEOUT)) @@ -48,11 +48,10 @@ static void try_server_run(const char *tst_name, unsigned int port, } before_cnt = netstat_get_one(cnt_name, NULL); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - bytes = test_server_run(sk, quota, timeout); + bytes = test_skpair_server(sk, quota, poll_cnt, &dummy); if (fault(TIMEOUT)) { if (bytes > 0) test_fail("%s: server served: %zd", tst_name, bytes); @@ -64,17 +63,18 @@ static void try_server_run(const char *tst_name, unsigned int port, else test_ok("%s: server alive", tst_name); } - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + synchronize_threads(); /* 3: counters checks */ + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_cnt = netstat_get_one(cnt_name, NULL); - test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", - tst_name, cnt_name, after_cnt, before_cnt); + test_fail("%s(server): %s counter did not increase: %" PRIu64 " <= %" PRIu64, + tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s(server): counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } @@ -82,7 +82,7 @@ static void try_server_run(const char *tst_name, unsigned int port, * Before close() as that will send FIN and move the peer in TCP_CLOSE * and that will prevent reading AO counters from the peer's socket. */ - synchronize_threads(); /* 3: verified => closed */ + synchronize_threads(); /* 4: verified => closed */ out: close(sk); } @@ -91,16 +91,16 @@ static void *server_fn(void *arg) { unsigned int port = test_server_port; - try_server_run("TCP-AO migrate to another socket", port++, + try_server_run("TCP-AO migrate to another socket (server)", port++, 0, TEST_CNT_GOOD); - try_server_run("TCP-AO with wrong send ISN", port++, + try_server_run("TCP-AO with wrong send ISN (server)", port++, FAULT_TIMEOUT, TEST_CNT_BAD); - try_server_run("TCP-AO with wrong receive ISN", port++, + try_server_run("TCP-AO with wrong receive ISN (server)", port++, FAULT_TIMEOUT, TEST_CNT_BAD); - try_server_run("TCP-AO with wrong send SEQ ext number", port++, + try_server_run("TCP-AO with wrong send SEQ ext number (server)", port++, FAULT_TIMEOUT, TEST_CNT_BAD); - try_server_run("TCP-AO with wrong receive SEQ ext number", port++, - FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); + try_server_run("TCP-AO with wrong receive SEQ ext number (server)", + port++, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); synchronize_threads(); /* don't race to exit: client exits */ return NULL; @@ -124,7 +124,7 @@ static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr, test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("pre-migrate verify failed"); test_enable_repair(sk); @@ -138,11 +138,11 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port, struct tcp_ao_repair *ao_img, fault_t inj, test_cnt cnt_expected) { + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; const char *cnt_name = "TCPAOGood"; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt, after_cnt; - time_t timeout; - int sk; + int sk, dummy; if (fault(TIMEOUT)) cnt_name = "TCPAOBad"; @@ -158,38 +158,39 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port, test_error("setsockopt(TCP_AO_ADD_KEY)"); test_ao_restore(sk, ao_img); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); test_disable_repair(sk); test_sock_state_free(img); - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - if (test_client_verify(sk, msg_len, nr_packets, timeout)) { + if (test_skpair_client(sk, msg_len, nr_packets, poll_cnt, &dummy)) { if (fault(TIMEOUT)) test_ok("%s: post-migrate connection is broken", tst_name); else test_fail("%s: post-migrate connection is working", tst_name); } else { if (fault(TIMEOUT)) - test_fail("%s: post-migrate connection still working", tst_name); + test_fail("%s: post-migrate connection is working", tst_name); else test_ok("%s: post-migrate connection is alive", tst_name); } - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 3: counters checks */ + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_cnt = netstat_get_one(cnt_name, NULL); - test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } - synchronize_threads(); /* 3: verified => closed */ + synchronize_threads(); /* 4: verified => closed */ close(sk); } @@ -201,29 +202,43 @@ static void *client_fn(void *arg) sockaddr_af saddr; test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); - test_sk_restore("TCP-AO migrate to another socket", port++, + test_sk_restore("TCP-AO migrate to another socket (client)", port++, &saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.snt_isn += 1; - test_sk_restore("TCP-AO with wrong send ISN", port++, + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); + test_sk_restore("TCP-AO with wrong send ISN (client)", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.rcv_isn += 1; - test_sk_restore("TCP-AO with wrong receive ISN", port++, + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); + test_sk_restore("TCP-AO with wrong receive ISN (client)", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.snd_sne += 1; - test_sk_restore("TCP-AO with wrong send SEQ ext number", port++, - &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); + /* not expecting server => client mismatches as only snd sne is broken */ + test_sk_restore("TCP-AO with wrong send SEQ ext number (client)", + port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.rcv_sne += 1; - test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++, - &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + /* not expecting client => server mismatches as only rcv sne is broken */ + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); + test_sk_restore("TCP-AO with wrong receive SEQ ext number (client)", + port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_NS_GOOD | TEST_CNT_BAD); return NULL; @@ -231,6 +246,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(20, server_fn, client_fn); + test_init(21, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c index a2fe88d35ac0..883cddf377cf 100644 --- a/tools/testing/selftests/net/tcp_ao/rst.c +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -84,15 +84,15 @@ static void close_forced(int sk) static void test_server_active_rst(unsigned int port) { - struct tcp_ao_counters cnt1, cnt2; + struct tcp_counters cnt1, cnt2; ssize_t bytes; int sk, lsk; lsk = test_listen_socket(this_ip_addr, port, backlog); if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) test_error("setsockopt(TCP_AO_ADD_KEY)"); - if (test_get_tcp_ao_counters(lsk, &cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 1: MKT added */ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) @@ -103,8 +103,8 @@ static void test_server_active_rst(unsigned int port) test_error("accept()"); synchronize_threads(); /* 2: connection accept()ed, another queued */ - if (test_get_tcp_ao_counters(lsk, &cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &cnt2)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 3: close listen socket */ close(lsk); @@ -120,7 +120,7 @@ static void test_server_active_rst(unsigned int port) synchronize_threads(); /* 5: closed active sk */ synchronize_threads(); /* 6: counters checks */ - if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD)) + if (test_assert_counters("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD)) test_fail("MKT counters (server) have not only good packets"); else test_ok("MKT counters are good on server"); @@ -128,7 +128,7 @@ static void test_server_active_rst(unsigned int port) static void test_server_passive_rst(unsigned int port) { - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; int sk, lsk; ssize_t bytes; @@ -147,8 +147,8 @@ static void test_server_passive_rst(unsigned int port) synchronize_threads(); /* 2: accepted => send data */ close(lsk); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); if (bytes != quota) { @@ -160,12 +160,12 @@ static void test_server_passive_rst(unsigned int port) synchronize_threads(); /* 3: checkpoint the client */ synchronize_threads(); /* 4: close the server, creating twsk */ - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); close(sk); synchronize_threads(); /* 5: restore the socket, send more data */ - test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters("passive RST server", &cnt1, &cnt2, TEST_CNT_GOOD); synchronize_threads(); /* 6: server exits */ } @@ -271,8 +271,7 @@ static void test_client_active_rst(unsigned int port) synchronize_threads(); /* 1: MKT added */ for (i = 0; i < last; i++) { - err = _test_connect_socket(sk[i], this_ip_dest, port, - (i == 0) ? TEST_TIMEOUT_SEC : -1); + err = _test_connect_socket(sk[i], this_ip_dest, port, i != 0); if (err < 0) test_error("failed to connect()"); } @@ -283,12 +282,12 @@ static void test_client_active_rst(unsigned int port) test_error("test_wait_fds(): %d", err); /* async connect() with third sk to get into request_sock_queue */ - err = _test_connect_socket(sk[last], this_ip_dest, port, -1); + err = _test_connect_socket(sk[last], this_ip_dest, port, 1); if (err < 0) test_error("failed to connect()"); synchronize_threads(); /* 3: close listen socket */ - if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk[0], packet_sz, quota / packet_sz)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); @@ -323,7 +322,7 @@ static void test_client_active_rst(unsigned int port) static void test_client_passive_rst(unsigned int port) { - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; struct tcp_ao_repair ao_img; struct tcp_sock_state img; sockaddr_af saddr; @@ -341,7 +340,7 @@ static void test_client_passive_rst(unsigned int port) test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, packet_sz, quota / packet_sz)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); @@ -397,8 +396,8 @@ static void test_client_passive_rst(unsigned int port) test_error("setsockopt(TCP_AO_ADD_KEY)"); test_ao_restore(sk, &ao_img); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); test_disable_repair(sk); test_sock_state_free(&img); @@ -417,7 +416,7 @@ static void test_client_passive_rst(unsigned int port) * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0, * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0 */ - err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC); + err = test_client_verify(sk, packet_sz, quota / packet_sz); /* Make sure that the connection was reset, not timeouted */ if (err && err == -ECONNRESET) test_ok("client sock was passively reset post-seq-adjust"); @@ -426,12 +425,12 @@ static void test_client_passive_rst(unsigned int port) else test_fail("client sock is yet connected post-seq-adjust"); - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 6: server exits */ close(sk); - test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters("client passive RST", &cnt1, &cnt2, TEST_CNT_GOOD); } static void *client_fn(void *arg) @@ -455,6 +454,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(14, server_fn, client_fn); + test_init(15, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c index e154d9e198a9..2c73bea698a6 100644 --- a/tools/testing/selftests/net/tcp_ao/self-connect.c +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -16,6 +16,9 @@ static void __setup_lo_intf(const char *lo_intf, if (link_set_up(lo_intf)) test_error("Failed to bring %s up", lo_intf); + + if (ip_route_add(lo_intf, TEST_FAMILY, local_addr, local_addr)) + test_error("Failed to add a local route %s", lo_intf); } static void setup_lo_intf(const char *lo_intf) @@ -30,9 +33,7 @@ static void setup_lo_intf(const char *lo_intf) static void tcp_self_connect(const char *tst, unsigned int port, bool different_keyids, bool check_restore) { - uint64_t before_challenge_ack, after_challenge_ack; - uint64_t before_syn_challenge, after_syn_challenge; - struct tcp_ao_counters before_ao, after_ao; + struct tcp_counters before, after; uint64_t before_aogood, after_aogood; struct netstat *ns_before, *ns_after; const size_t nr_packets = 20; @@ -62,19 +63,17 @@ static void tcp_self_connect(const char *tst, unsigned int port, ns_before = netstat_read(); before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); - before_challenge_ack = netstat_get(ns_before, "TCPChallengeACK", NULL); - before_syn_challenge = netstat_get(ns_before, "TCPSYNChallenge", NULL); - if (test_get_tcp_ao_counters(sk, &before_ao)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &before)) + test_error("test_get_tcp_counters()"); if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr, - sizeof(addr), TEST_TIMEOUT_SEC) < 0) { + sizeof(addr), 0) < 0) { ns_after = netstat_read(); netstat_print_diff(ns_before, ns_after); test_error("failed to connect()"); } - if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, 100, nr_packets)) { test_fail("%s: tcp connection verify failed", tst); close(sk); return; @@ -82,10 +81,8 @@ static void tcp_self_connect(const char *tst, unsigned int port, ns_after = netstat_read(); after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); - after_challenge_ack = netstat_get(ns_after, "TCPChallengeACK", NULL); - after_syn_challenge = netstat_get(ns_after, "TCPSYNChallenge", NULL); - if (test_get_tcp_ao_counters(sk, &after_ao)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &after)) + test_error("test_get_tcp_counters()"); if (!check_restore) { /* to debug: netstat_print_diff(ns_before, ns_after); */ netstat_free(ns_before); @@ -93,25 +90,13 @@ static void tcp_self_connect(const char *tst, unsigned int port, netstat_free(ns_after); if (after_aogood <= before_aogood) { - test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64, tst, after_aogood, before_aogood); close(sk); return; } - if (after_challenge_ack <= before_challenge_ack || - after_syn_challenge <= before_syn_challenge) { - /* - * It's also meant to test simultaneous open, so check - * these counters as well. - */ - test_fail("%s: Didn't challenge SYN or ACK: %zu <= %zu OR %zu <= %zu", - tst, after_challenge_ack, before_challenge_ack, - after_syn_challenge, before_syn_challenge); - close(sk); - return; - } - if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) { + if (test_assert_counters(tst, &before, &after, TEST_CNT_GOOD)) { close(sk); return; } @@ -154,7 +139,7 @@ static void tcp_self_connect(const char *tst, unsigned int port, test_ao_restore(sk, &ao_img); test_disable_repair(sk); test_sock_state_free(&img); - if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, 100, nr_packets)) { test_fail("%s: tcp connection verify failed", tst); close(sk); return; @@ -166,7 +151,7 @@ static void tcp_self_connect(const char *tst, unsigned int port, netstat_free(ns_after); close(sk); if (after_aogood <= before_aogood) { - test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64, tst, after_aogood, before_aogood); return; } @@ -181,17 +166,26 @@ static void *client_fn(void *arg) setup_lo_intf("lo"); tcp_self_connect("self-connect(same keyids)", port++, false, false); + + /* expecting rnext to change based on the first segment RNext != Current */ + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr, + port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1); tcp_self_connect("self-connect(different keyids)", port++, true, false); tcp_self_connect("self-connect(restore)", port, false, true); - port += 2; + port += 2; /* restore test restores over different port */ + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr, + port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1); + /* intentionally on restore they are added to the socket in different order */ + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr, + port + 1, port + 1, 0, -1, -1, -1, -1, -1, 5, 7, -1); tcp_self_connect("self-connect(restore, different keyids)", port, true, true); - port += 2; + port += 2; /* restore test restores over different port */ return NULL; } int main(int argc, char *argv[]) { - test_init(4, client_fn, NULL); + test_init(5, client_fn, NULL); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c index ad4e77d6823e..f00245263b20 100644 --- a/tools/testing/selftests/net/tcp_ao/seq-ext.c +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -40,7 +40,7 @@ static void test_adjust_seqs(struct tcp_sock_state *img, static int test_sk_restore(struct tcp_sock_state *img, struct tcp_ao_repair *ao_img, sockaddr_af *saddr, const union tcp_addr daddr, unsigned int dport, - struct tcp_ao_counters *cnt) + struct tcp_counters *cnt) { int sk; @@ -54,8 +54,8 @@ static int test_sk_restore(struct tcp_sock_state *img, test_error("setsockopt(TCP_AO_ADD_KEY)"); test_ao_restore(sk, ao_img); - if (test_get_tcp_ao_counters(sk, cnt)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, cnt)) + test_error("test_get_tcp_counters()"); test_disable_repair(sk); test_sock_state_free(img); @@ -65,7 +65,7 @@ static int test_sk_restore(struct tcp_sock_state *img, static void *server_fn(void *arg) { uint64_t before_good, after_good, after_bad; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; struct tcp_sock_state img; struct tcp_ao_repair ao_img; sockaddr_af saddr; @@ -114,9 +114,17 @@ static void *server_fn(void *arg) test_adjust_seqs(&img, &ao_img, true); synchronize_threads(); /* 4: dump finished */ sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, - client_new_port, &ao1); - - synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + client_new_port, &cnt1); + + trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr, + this_ip_dest, test_server_port + 1, client_new_port, 1); + trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_dest, + this_ip_addr, client_new_port, test_server_port + 1, 1); + trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_addr, + this_ip_dest, test_server_port + 1, client_new_port, 1); + trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_dest, + this_ip_addr, client_new_port, test_server_port + 1, 1); + synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */ bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); if (bytes != quota) { if (bytes > 0) @@ -127,22 +135,23 @@ static void *server_fn(void *arg) test_ok("server alive"); } - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + synchronize_threads(); /* 6: verify counters after SEQ-number rollover */ + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_good = netstat_get_one("TCPAOGood", NULL); - test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD); if (after_good <= before_good) { - test_fail("TCPAOGood counter did not increase: %zu <= %zu", + test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64, after_good, before_good); } else { - test_ok("TCPAOGood counter increased %zu => %zu", + test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64, before_good, after_good); } after_bad = netstat_get_one("TCPAOBad", NULL); if (after_bad) - test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad); else test_ok("TCPAOBad counter didn't increase"); test_enable_repair(sk); @@ -164,7 +173,7 @@ out: static void *client_fn(void *arg) { uint64_t before_good, after_good, after_bad; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; struct tcp_sock_state img; struct tcp_ao_repair ao_img; sockaddr_af saddr; @@ -182,7 +191,7 @@ static void *client_fn(void *arg) test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, msg_len, nr_packets)) { test_fail("pre-migrate verify failed"); return NULL; } @@ -204,30 +213,31 @@ static void *client_fn(void *arg) test_adjust_seqs(&img, &ao_img, false); synchronize_threads(); /* 4: dump finished */ sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, - test_server_port + 1, &ao1); + test_server_port + 1, &cnt1); - synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */ + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("post-migrate verify failed"); else test_ok("post-migrate connection alive"); - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + synchronize_threads(); /* 5: verify counters after SEQ-number rollover */ + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_good = netstat_get_one("TCPAOGood", NULL); - test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD); if (after_good <= before_good) { - test_fail("TCPAOGood counter did not increase: %zu <= %zu", + test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64, after_good, before_good); } else { - test_ok("TCPAOGood counter increased %zu => %zu", + test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64, before_good, after_good); } after_bad = netstat_get_one("TCPAOBad", NULL); if (after_bad) - test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad); else test_ok("TCPAOBad counter didn't increase"); @@ -240,6 +250,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(7, server_fn, client_fn); + test_init(8, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c index 517930f9721b..0abb9807d742 100644 --- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c +++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c @@ -6,6 +6,8 @@ static union tcp_addr tcp_md5_client; +#define FILTER_TEST_NKEYS 16 + static int test_port = 7788; static void make_listen(int sk) { @@ -30,8 +32,8 @@ static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info, #define __cmp_ao(member) \ do { \ if (info->member != tmp.member) { \ - test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu", \ - tst, (size_t)info->member, (size_t)tmp.member); \ + test_fail("%s: getsockopt(): " __stringify(member) " %" PRIu64 " != %" PRIu64, \ + tst, (uint64_t)info->member, (uint64_t)tmp.member); \ return; \ } \ } while(0) @@ -813,23 +815,197 @@ static void duplicate_tests(void) setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: SendID differs"); } +static void fetch_all_keys(int sk, struct tcp_ao_getsockopt *keys) +{ + socklen_t optlen = sizeof(struct tcp_ao_getsockopt); + + memset(keys, 0, sizeof(struct tcp_ao_getsockopt) * FILTER_TEST_NKEYS); + keys[0].get_all = 1; + keys[0].nkeys = FILTER_TEST_NKEYS; + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &keys[0], &optlen)) + test_error("getsockopt"); +} + +static int prepare_test_keys(struct tcp_ao_getsockopt *keys) +{ + const char *test_password = "Test password number "; + struct tcp_ao_add test_ao[FILTER_TEST_NKEYS]; + char test_password_scratch[64] = {}; + u8 rcvid = 100, sndid = 100; + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + for (int i = 0; i < FILTER_TEST_NKEYS; i++) { + snprintf(test_password_scratch, 64, "%s %d", test_password, i); + test_prepare_key(&test_ao[i], DEFAULT_TEST_ALGO, this_ip_dest, + false, false, DEFAULT_TEST_PREFIX, 0, sndid++, + rcvid++, 0, 0, strlen(test_password_scratch), + test_password_scratch); + } + test_ao[0].set_current = 1; + test_ao[1].set_rnext = 1; + /* One key with a different addr and overlapping sndid, rcvid */ + tcp_addr_to_sockaddr_in(&test_ao[2].addr, &this_ip_addr, 0); + test_ao[2].sndid = 100; + test_ao[2].rcvid = 100; + + /* Add keys in a random order */ + for (int i = 0; i < FILTER_TEST_NKEYS; i++) { + int randidx = rand() % (FILTER_TEST_NKEYS - i); + + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, + &test_ao[randidx], sizeof(struct tcp_ao_add))) + test_error("setsockopt()"); + memcpy(&test_ao[randidx], &test_ao[FILTER_TEST_NKEYS - 1 - i], + sizeof(struct tcp_ao_add)); + } + + fetch_all_keys(sk, keys); + + return sk; +} + +/* Assumes passwords are unique */ +static int compare_mkts(struct tcp_ao_getsockopt *expected, int nexpected, + struct tcp_ao_getsockopt *actual, int nactual) +{ + int matches = 0; + + for (int i = 0; i < nexpected; i++) { + for (int j = 0; j < nactual; j++) { + if (memcmp(expected[i].key, actual[j].key, + TCP_AO_MAXKEYLEN) == 0) + matches++; + } + } + return nexpected - matches; +} + +static void filter_keys_checked(int sk, struct tcp_ao_getsockopt *filter, + struct tcp_ao_getsockopt *expected, + unsigned int nexpected, const char *tst) +{ + struct tcp_ao_getsockopt filtered_keys[FILTER_TEST_NKEYS] = {}; + struct tcp_ao_getsockopt all_keys[FILTER_TEST_NKEYS] = {}; + socklen_t len = sizeof(struct tcp_ao_getsockopt); + + fetch_all_keys(sk, all_keys); + memcpy(&filtered_keys[0], filter, sizeof(struct tcp_ao_getsockopt)); + filtered_keys[0].nkeys = FILTER_TEST_NKEYS; + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, filtered_keys, &len)) + test_error("getsockopt"); + if (filtered_keys[0].nkeys != nexpected) { + test_fail("wrong nr of keys, expected %u got %u", nexpected, + filtered_keys[0].nkeys); + goto out_close; + } + if (compare_mkts(expected, nexpected, filtered_keys, + filtered_keys[0].nkeys)) { + test_fail("got wrong keys back"); + goto out_close; + } + test_ok("filter keys: %s", tst); + +out_close: + close(sk); + memset(filter, 0, sizeof(struct tcp_ao_getsockopt)); +} + +static void filter_tests(void) +{ + struct tcp_ao_getsockopt original_keys[FILTER_TEST_NKEYS]; + struct tcp_ao_getsockopt expected_keys[FILTER_TEST_NKEYS]; + struct tcp_ao_getsockopt filter = {}; + int sk, f, nmatches; + socklen_t len; + + f = 2; + sk = prepare_test_keys(original_keys); + filter.rcvid = original_keys[f].rcvid; + filter.sndid = original_keys[f].sndid; + memcpy(&filter.addr, &original_keys[f].addr, + sizeof(original_keys[f].addr)); + filter.prefix = original_keys[f].prefix; + filter_keys_checked(sk, &filter, &original_keys[f], 1, + "by sndid, rcvid, address"); + + f = -1; + sk = prepare_test_keys(original_keys); + for (int i = 0; i < original_keys[0].nkeys; i++) { + if (original_keys[i].is_current) { + f = i; + break; + } + } + if (f < 0) + test_error("No current key after adding one"); + filter.is_current = 1; + filter_keys_checked(sk, &filter, &original_keys[f], 1, "by is_current"); + + f = -1; + sk = prepare_test_keys(original_keys); + for (int i = 0; i < original_keys[0].nkeys; i++) { + if (original_keys[i].is_rnext) { + f = i; + break; + } + } + if (f < 0) + test_error("No rnext key after adding one"); + filter.is_rnext = 1; + filter_keys_checked(sk, &filter, &original_keys[f], 1, "by is_rnext"); + + f = -1; + nmatches = 0; + sk = prepare_test_keys(original_keys); + for (int i = 0; i < original_keys[0].nkeys; i++) { + if (original_keys[i].sndid == 100) { + f = i; + memcpy(&expected_keys[nmatches], &original_keys[i], + sizeof(struct tcp_ao_getsockopt)); + nmatches++; + } + } + if (f < 0) + test_error("No key for sndid 100"); + if (nmatches != 2) + test_error("Should have 2 keys with sndid 100"); + filter.rcvid = original_keys[f].rcvid; + filter.sndid = original_keys[f].sndid; + filter.addr.ss_family = test_family; + filter_keys_checked(sk, &filter, expected_keys, nmatches, + "by sndid, rcvid"); + + sk = prepare_test_keys(original_keys); + filter.get_all = 1; + filter.nkeys = FILTER_TEST_NKEYS / 2; + len = sizeof(struct tcp_ao_getsockopt); + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &filter, &len)) + test_error("getsockopt"); + if (filter.nkeys == FILTER_TEST_NKEYS) + test_ok("filter keys: correct nkeys when in.nkeys < matches"); + else + test_fail("filter keys: wrong nkeys, expected %u got %u", + FILTER_TEST_NKEYS, filter.nkeys); +} + static void *client_fn(void *arg) { if (inet_pton(TEST_FAMILY, __TEST_CLIENT_IP(2), &tcp_md5_client) != 1) test_error("Can't convert ip address"); extend_tests(); einval_tests(); + filter_tests(); duplicate_tests(); - /* - * TODO: check getsockopt(TCP_AO_GET_KEYS) with different filters - * returning proper nr & keys; - */ return NULL; } int main(int argc, char *argv[]) { - test_init(120, client_fn, NULL); + test_init(126, client_fn, NULL); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c index 6b59a652159f..a1467b64390a 100644 --- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c +++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c @@ -6,6 +6,7 @@ #define fault(type) (inj == FAULT_ ## type) static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver."; static const char *ao_password = DEFAULT_TEST_PASSWORD; +static volatile int sk_pair; static union tcp_addr client2; static union tcp_addr client3; @@ -41,10 +42,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *cnt_name, test_cnt cnt_expected, int needs_tcp_md5, fault_t inj) { - struct tcp_ao_counters ao_cnt1, ao_cnt2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ - int lsk, err, sk = 0; - time_t timeout; + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; + int lsk, err, sk = -1; if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) return; @@ -63,21 +64,25 @@ static void try_accept(const char *tst_name, unsigned int port, if (cnt_name) before_cnt = netstat_get_one(cnt_name, NULL); - if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (ao_addr && test_get_tcp_counters(lsk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - err = test_wait_fd(lsk, timeout, 0); + err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair); + synchronize_threads(); /* connect()/accept() timeouts */ if (err == -ETIMEDOUT) { + sk_pair = err; if (!fault(TIMEOUT)) - test_fail("timed out for accept()"); + test_fail("%s: timed out for accept()", tst_name); + } else if (err == -EKEYREJECTED) { + if (!fault(KEYREJECT)) + test_fail("%s: key was rejected", tst_name); } else if (err < 0) { - test_error("test_wait_fd()"); + test_error("test_skpair_wait_poll()"); } else { if (fault(TIMEOUT)) - test_fail("ready to accept"); + test_fail("%s: ready to accept", tst_name); sk = accept(lsk, NULL, NULL); if (sk < 0) { @@ -88,8 +93,8 @@ static void try_accept(const char *tst_name, unsigned int port, } } - if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (ao_addr && test_get_tcp_counters(lsk, &cnt2)) + test_error("test_get_tcp_counters()"); close(lsk); if (!cnt_name) { @@ -100,18 +105,18 @@ static void try_accept(const char *tst_name, unsigned int port, after_cnt = netstat_get_one(cnt_name, NULL); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } if (ao_addr) - test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); out: synchronize_threads(); /* test_kill_sk() */ - if (sk > 0) + if (sk >= 0) test_kill_sk(sk); } @@ -152,78 +157,82 @@ static void *server_fn(void *arg) server_add_routes(); - try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0, + try_accept("[server] AO server (INADDR_ANY): AO client", port++, NULL, 0, &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0); - try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0, + try_accept("[server] AO server (INADDR_ANY): MD5 client", port++, NULL, 0, &addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected", - 0, 1, FAULT_TIMEOUT); - try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0, + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] AO server (INADDR_ANY): no sign client", port++, NULL, 0, &addr_any, 0, 0, 100, 100, 0, "TCPAORequired", TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); - try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0, + try_accept("[server] AO server (AO_REQUIRED): AO client", port++, NULL, 0, &this_ip_dest, TEST_PREFIX, true, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0); - try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, + try_accept("[server] AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, &this_ip_dest, TEST_PREFIX, true, 100, 100, 0, "TCPAORequired", TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); - try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0, + try_accept("[server] MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0, NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", - 0, 1, FAULT_TIMEOUT); - try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + TEST_CNT_NS_KEY_NOT_FOUND, 1, FAULT_TIMEOUT); + try_accept("[server] MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0); - try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any, + try_accept("[server] MD5 server (INADDR_ANY): no sign client", port++, &addr_any, 0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound", - 0, 1, FAULT_TIMEOUT); + TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("no sign server: AO client", port++, NULL, 0, + try_accept("[server] no sign server: AO client", port++, NULL, 0, NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", - TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT); - try_accept("no sign server: MD5 client", port++, NULL, 0, + TEST_CNT_NS_KEY_NOT_FOUND, 0, FAULT_TIMEOUT); + try_accept("[server] no sign server: MD5 client", port++, NULL, 0, NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected", - 0, 1, FAULT_TIMEOUT); - try_accept("no sign server: no sign client", port++, NULL, 0, + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] no sign server: no sign client", port++, NULL, 0, NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0); - try_accept("AO+MD5 server: AO client (matching)", port++, + try_accept("[server] AO+MD5 server: AO client (matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0); - try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++, + try_accept("[server] AO+MD5 server: AO client (misconfig, matching MD5)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++, + try_accept("[server] AO+MD5 server: AO client (misconfig, non-matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: MD5 client (matching)", port++, + try_accept("[server] AO+MD5 server: MD5 client (matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, NULL, 0, 1, 0); - try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++, + try_accept("[server] AO+MD5 server: MD5 client (misconfig, matching AO)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++, + 100, 100, 0, "TCPMD5Unexpected", + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] AO+MD5 server: MD5 client (misconfig, non-matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: no sign client (unmatched)", port++, + 100, 100, 0, "TCPMD5Unexpected", + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] AO+MD5 server: no sign client (unmatched)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "CurrEstab", 0, 1, 0); - try_accept("AO+MD5 server: no sign client (misconfig, matching AO)", + try_accept("[server] AO+MD5 server: no sign client (misconfig, matching AO)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAORequired", TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)", + try_accept("[server] AO+MD5 server: no sign client (misconfig, matching MD5)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT); + 100, 100, 0, "TCPMD5NotFound", + TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", + /* Key rejected by the other side, failing short through skpair */ + try_accept("[server] AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", + 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT); + try_accept("[server] AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); + 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT); server_add_fail_tests(&port); @@ -258,7 +267,6 @@ static void try_connect(const char *tst_name, unsigned int port, uint8_t sndid, uint8_t rcvid, uint8_t vrf, fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr) { - time_t timeout; int sk, ret; if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) @@ -280,10 +288,10 @@ static void try_connect(const char *tst_name, unsigned int port, synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - ret = _test_connect_socket(sk, this_ip_dest, port, timeout); - + ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair); + synchronize_threads(); /* connect()/accept() timeouts */ if (ret < 0) { + sk_pair = ret; if (fault(KEYREJECT) && ret == -EKEYREJECTED) test_ok("%s: connect() was prevented", tst_name); else if (ret == -ETIMEDOUT && fault(TIMEOUT)) @@ -303,8 +311,7 @@ static void try_connect(const char *tst_name, unsigned int port, out: synchronize_threads(); /* test_kill_sk() */ - /* _test_connect_socket() cleans up on failure */ - if (ret > 0) + if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */ test_kill_sk(sk); } @@ -435,7 +442,6 @@ static void try_to_add(const char *tst_name, unsigned int port, int ao_vrf, uint8_t sndid, uint8_t rcvid, int needs_tcp_md5, fault_t inj) { - time_t timeout; int sk, ret; if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) @@ -448,10 +454,10 @@ static void try_to_add(const char *tst_name, unsigned int port, synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair); - if (ret <= 0) { + synchronize_threads(); /* connect()/accept() timeouts */ + if (ret < 0) { test_error("%s: connect() returned %d", tst_name, ret); goto out; } @@ -487,8 +493,7 @@ static void try_to_add(const char *tst_name, unsigned int port, out: synchronize_threads(); /* test_kill_sk() */ - /* _test_connect_socket() cleans up on failure */ - if (ret > 0) + if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */ test_kill_sk(sk); } @@ -671,24 +676,38 @@ static void *client_fn(void *arg) try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("no sign server: AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("no sign server: MD5 client", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); try_connect("no sign server: no sign client", port++, NULL, 0, @@ -696,25 +715,37 @@ static void *client_fn(void *arg) try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0, &addr_any, 0, 100, 100, 0, 0, 1, &client2); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("AO+MD5 server: AO client (misconfig, matching MD5)", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, client3, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("AO+MD5 server: AO client (misconfig, non-matching)", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client3); try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0, NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client2, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client2); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client3, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client3); try_connect("AO+MD5 server: no sign client (unmatched)", port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: no sign client (misconfig, matching AO)", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client2); + trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); @@ -736,6 +767,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(72, server_fn, client_fn); + test_init(73, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh deleted file mode 100755 index 3119b80e711f..000000000000 --- a/tools/testing/selftests/net/test_blackhole_dev.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Runs blackhole-dev test using blackhole-dev kernel module - -if /sbin/modprobe -q test_blackhole_dev ; then - /sbin/modprobe -q -r test_blackhole_dev; - echo "test_blackhole_dev: ok"; -else - echo "test_blackhole_dev: [FAIL]"; - exit 1; -fi diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh index 8533393a4f18..9067197c9055 100755 --- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh +++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh @@ -51,7 +51,9 @@ ret=0 # All tests in this script. Can be overridden with -t option. TESTS=" neigh_suppress_arp + neigh_suppress_uc_arp neigh_suppress_ns + neigh_suppress_uc_ns neigh_vlan_suppress_arp neigh_vlan_suppress_ns " @@ -154,17 +156,9 @@ setup_topo() setup_topo_ns $ns done - ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns $h1 name eth0 - ip link set dev veth1 netns $sw1 name swp1 - - ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns $sw1 name veth0 - ip link set dev veth1 netns $sw2 name veth0 - - ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns $h2 name eth0 - ip link set dev veth1 netns $sw2 name swp1 + ip -n $h1 link add name eth0 type veth peer name swp1 netns $sw1 + ip -n $sw1 link add name veth0 type veth peer name veth0 netns $sw2 + ip -n $h2 link add name eth0 type veth peer name swp1 netns $sw2 } setup_host_common() @@ -396,6 +390,52 @@ neigh_suppress_arp() neigh_suppress_arp_common $vid $sip $tip } +neigh_suppress_uc_arp_common() +{ + local vid=$1; shift + local sip=$1; shift + local tip=$1; shift + local tmac + + echo + echo "Unicast ARP, per-port ARP suppression - VLAN $vid" + echo "-----------------------------------------------" + + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid" + + run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass" + + run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass" + + run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q" + tc_check_packets $h1 "dev eth0.$vid ingress" 101 1 + log_test $? 0 "Unicast ARP, suppression on, h1 filter" + tc_check_packets $h2 "dev eth0.$vid egress" 101 1 + log_test $? 0 "Unicast ARP, suppression on, h2 filter" +} + +neigh_suppress_uc_arp() +{ + local vid=10 + local sip=192.0.2.1 + local tip=192.0.2.2 + + neigh_suppress_uc_arp_common $vid $sip $tip + + vid=20 + sip=192.0.2.17 + tip=192.0.2.18 + neigh_suppress_uc_arp_common $vid $sip $tip +} + neigh_suppress_ns_common() { local vid=$1; shift @@ -502,6 +542,78 @@ neigh_suppress_ns() neigh_suppress_ns_common $vid $saddr $daddr $maddr } +icmpv6_header_get() +{ + local csum=$1; shift + local tip=$1; shift + local type + local p + + # Type 135 (Neighbor Solicitation), hex format + type="87" + p=$(: + )"$type:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"$csum:"$( : ICMPv6.checksum + )"00:00:00:00:"$( : Reserved + )"$tip:"$( : Target Address + ) + echo $p +} + +neigh_suppress_uc_ns_common() +{ + local vid=$1; shift + local sip=$1; shift + local dip=$1; shift + local full_dip=$1; shift + local csum=$1; shift + local tmac + + echo + echo "Unicast NS, per-port NS suppression - VLAN $vid" + echo "---------------------------------------------" + + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid" + + run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass" + + run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q" + tc_check_packets $h1 "dev eth0.$vid ingress" 101 1 + log_test $? 0 "Unicast NS, suppression on, h1 filter" + tc_check_packets $h2 "dev eth0.$vid egress" 101 1 + log_test $? 0 "Unicast NS, suppression on, h2 filter" +} + +neigh_suppress_uc_ns() +{ + local vid=10 + local saddr=2001:db8:1::1 + local daddr=2001:db8:1::2 + local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02 + local csum="ef:79" + + neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum + + vid=20 + saddr=2001:db8:2::1 + daddr=2001:db8:2::2 + full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02 + csum="ef:76" + + neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum +} + neigh_vlan_suppress_arp() { local vid1=10 @@ -833,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then exit $ksft_skip fi +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + bridge link help 2>&1 | grep -q "neigh_vlan_suppress" if [ $? -ne 0 ]; then echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support" diff --git a/tools/testing/selftests/net/test_so_rcv.sh b/tools/testing/selftests/net/test_so_rcv.sh new file mode 100755 index 000000000000..d8aa4362879d --- /dev/null +++ b/tools/testing/selftests/net/test_so_rcv.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +HOSTS=("127.0.0.1" "::1") +PORT=1234 +TOTAL_TESTS=0 +FAILED_TESTS=0 + +declare -A TESTS=( + ["SO_RCVPRIORITY"]="-P 2" + ["SO_RCVMARK"]="-M 3" +) + +check_result() { + ((TOTAL_TESTS++)) + if [ "$1" -ne 0 ]; then + ((FAILED_TESTS++)) + fi +} + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +setup_ns NS + +for HOST in "${HOSTS[@]}"; do + PROTOCOL="IPv4" + if [[ "$HOST" == "::1" ]]; then + PROTOCOL="IPv6" + fi + for test_name in "${!TESTS[@]}"; do + echo "Running $test_name test, $PROTOCOL" + arg=${TESTS[$test_name]} + + ip netns exec $NS ./so_rcv_listener $arg $HOST $PORT & + LISTENER_PID=$! + + sleep 0.5 + + if ! ip netns exec $NS ./cmsg_sender $arg $HOST $PORT; then + echo "Sender failed for $test_name, $PROTOCOL" + kill "$LISTENER_PID" 2>/dev/null + wait "$LISTENER_PID" + check_result 1 + continue + fi + + wait "$LISTENER_PID" + LISTENER_EXIT_CODE=$? + + if [ "$LISTENER_EXIT_CODE" -eq 0 ]; then + echo "Rcv test OK for $test_name, $PROTOCOL" + check_result 0 + else + echo "Rcv test FAILED for $test_name, $PROTOCOL" + check_result 1 + fi + done +done + +if [ "$FAILED_TESTS" -ne 0 ]; then + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" + exit ${KSFT_FAIL} +else + echo "OK - All $TOTAL_TESTS tests passed" + exit ${KSFT_PASS} +fi diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh index 2d442cdab11e..062f957950af 100755 --- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh +++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh @@ -1,29 +1,114 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Check FDB default-remote handling across "ip link set". +ALL_TESTS=" + test_set_remote + test_change_mc_remote +" +source lib.sh check_remotes() { local what=$1; shift local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l) - echo -ne "expected two remotes after $what\t" - if [[ $N != 2 ]]; then - echo "[FAIL]" - EXIT_STATUS=1 + ((N == 2)) + check_err $? "expected 2 remotes after $what, got $N" +} + +# Check FDB default-remote handling across "ip link set". +test_set_remote() +{ + RET=0 + + ip_link_add vx up type vxlan id 2000 dstport 4789 + bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent + bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent + check_remotes "fdb append" + + ip link set dev vx type vxlan remote 192.0.2.30 + check_remotes "link set" + + log_test 'FDB default-remote handling across "ip link set"' +} + +fmt_remote() +{ + local addr=$1; shift + + if [[ $addr == 224.* ]]; then + echo "group $addr" else - echo "[ OK ]" + echo "remote $addr" fi } -ip link add name vx up type vxlan id 2000 dstport 4789 -bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent -bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent -check_remotes "fdb append" +change_remote() +{ + local remote=$1; shift + + ip link set dev vx type vxlan $(fmt_remote $remote) dev v1 +} + +check_membership() +{ + local check_vec=("$@") + + local memberships + memberships=$( + netstat -n --groups | + sed -n '/^v1\b/p' | + grep -o '[^ ]*$' + ) + check_err $? "Couldn't obtain group memberships" + + local item + for item in "${check_vec[@]}"; do + eval "local $item" + echo "$memberships" | grep -q "\b$group\b" + check_err_fail $fail $? "$group is_ex reported in IGMP query response" + done +} + +test_change_mc_remote() +{ + check_command netstat || return + + ip_link_add v1 up type veth peer name v2 + ip_link_set_up v2 + + RET=0 + + ip_link_add vx up type vxlan dstport 4789 \ + local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000 + + check_membership "group=224.1.1.1 fail=0" \ + "group=224.1.1.2 fail=1" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after VXLAN creation" + + RET=0 + + change_remote 224.1.1.2 + check_membership "group=224.1.1.1 fail=1" \ + "group=224.1.1.2 fail=0" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after changing VXLAN remote MC->MC" + + RET=0 + + change_remote 192.0.2.2 + check_membership "group=224.1.1.1 fail=1" \ + "group=224.1.1.2 fail=1" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after changing VXLAN remote MC->UC" +} + +trap defer_scopes_cleanup EXIT -ip link set dev vx type vxlan remote 192.0.2.30 -check_remotes "link set" +tests_run -ip link del dev vx exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index f27a12d2a2c9..5ded3b3a7538 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -44,9 +44,11 @@ struct tls_crypto_info_keys { }; static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, - struct tls_crypto_info_keys *tls12) + struct tls_crypto_info_keys *tls12, + char key_generation) { - memset(tls12, 0, sizeof(*tls12)); + memset(tls12, key_generation, sizeof(*tls12)); + memset(tls12, 0, sizeof(struct tls_crypto_info)); switch (cipher_type) { case TLS_CIPHER_CHACHA20_POLY1305: @@ -266,6 +268,25 @@ TEST_F(tls_basic, bad_cipher) EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); } +TEST_F(tls_basic, recseq_wrap) +{ + struct tls_crypto_info_keys tls12; + char const *test_str = "test_read"; + int send_len = 10; + + if (self->notls) + SKIP(return, "no TLS support"); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0); + memset(&tls12.aes128.rec_seq, 0xff, sizeof(tls12.aes128.rec_seq)); + + ASSERT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + ASSERT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + FIXTURE(tls) { int fd, cfd; @@ -372,7 +393,7 @@ FIXTURE_SETUP(tls) SKIP(return, "Unsupported cipher in FIPS mode"); tls_crypto_info_init(variant->tls_version, variant->cipher_type, - &tls12); + &tls12, 0); ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); @@ -1156,7 +1177,7 @@ TEST_F(tls, bidir) struct tls_crypto_info_keys tls12; tls_crypto_info_init(variant->tls_version, variant->cipher_type, - &tls12); + &tls12, 0); ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12, tls12.len); @@ -1595,7 +1616,7 @@ TEST_F(tls, getsockopt) EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type); /* get the full crypto_info */ - tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect, 0); len = expect.len; memrnd(&get, sizeof(get)); EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0); @@ -1649,6 +1670,500 @@ TEST_F(tls, recv_efault) EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0); } +#define TLS_RECORD_TYPE_HANDSHAKE 0x16 +/* key_update, length 1, update_not_requested */ +static const char key_update_msg[] = "\x18\x00\x00\x01\x00"; +static void tls_send_keyupdate(struct __test_metadata *_metadata, int fd) +{ + size_t len = sizeof(key_update_msg); + + EXPECT_EQ(tls_send_cmsg(fd, TLS_RECORD_TYPE_HANDSHAKE, + (char *)key_update_msg, len, 0), + len); +} + +static void tls_recv_keyupdate(struct __test_metadata *_metadata, int fd, int flags) +{ + char buf[100]; + + EXPECT_EQ(tls_recv_cmsg(_metadata, fd, TLS_RECORD_TYPE_HANDSHAKE, buf, sizeof(buf), flags), + sizeof(key_update_msg)); + EXPECT_EQ(memcmp(buf, key_update_msg, sizeof(key_update_msg)), 0); +} + +/* set the key to 0 then 1 for RX, immediately to 1 for TX */ +TEST_F(tls_basic, rekey_rx) +{ + struct tls_crypto_info_keys tls12_0, tls12_1; + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_0, 0); + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_1, 1); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_0, tls12_0.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); + EXPECT_EQ(ret, 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + +/* set the key to 0 then 1 for TX, immediately to 1 for RX */ +TEST_F(tls_basic, rekey_tx) +{ + struct tls_crypto_info_keys tls12_0, tls12_1; + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_0, 0); + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_1, 1); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_0, tls12_0.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); + EXPECT_EQ(ret, 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + +TEST_F(tls_basic, disconnect) +{ + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + struct tls_crypto_info_keys key; + struct sockaddr_in addr; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &key, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len); + ASSERT_EQ(ret, 0); + + /* Pre-queue the data so that setsockopt parses it but doesn't + * dequeue it from the TCP socket. recvmsg would dequeue. + */ + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len); + ASSERT_EQ(ret, 0); + + addr.sin_family = AF_UNSPEC; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + ret = connect(self->cfd, &addr, sizeof(addr)); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); +} + +TEST_F(tls, rekey) +{ + char const *test_str_1 = "test_message_before_rekey"; + char const *test_str_2 = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* initial send/recv */ + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* send after rekey */ + send_len = strlen(test_str_2) + 1; + EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* recv blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* recv non-blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + /* recv after rekey */ + EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); +} + +TEST_F(tls, rekey_fail) +{ + char const *test_str_1 = "test_message_before_rekey"; + char const *test_str_2 = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + /* initial send/recv */ + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + + if (variant->tls_version != TLS_1_3_VERSION) { + /* just check that rekey is not supported and return */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EBUSY); + return; + } + + /* successful update */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* invalid update: change of version */ + tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* invalid update (RX socket): change of version */ + tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* invalid update: change of cipher */ + if (variant->cipher_type == TLS_CIPHER_AES_GCM_256) + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_CHACHA20_POLY1305, &tls12, 1); + else + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_256, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* send after rekey, the invalid updates shouldn't have an effect */ + send_len = strlen(test_str_2) + 1; + EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* recv blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* recv non-blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + /* recv after rekey */ + EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); +} + +TEST_F(tls, rekey_peek) +{ + char const *test_str_1 = "test_message_before_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), -1); + + /* peek KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); +} + +TEST_F(tls, splice_rekey) +{ + int send_len = TLS_PAYLOAD_MAX_LEN / 2; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + struct tls_crypto_info_keys tls12; + int p[2]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); + + /* can't splice the KeyUpdate */ + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); + EXPECT_EQ(errno, EINVAL); + + /* peek KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* can't splice before updating the key */ + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + +TEST_F(tls, rekey_peek_splice) +{ + char const *test_str_1 = "test_message_before_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int p[2]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + ASSERT_GE(pipe(p), 0); + + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_recv, test_str_1, send_len), 0); +} + +TEST_F(tls, rekey_getsockopt) +{ + struct tls_crypto_info_keys tls12; + struct tls_crypto_info_keys tls12_get; + socklen_t len; + + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + tls_recv_keyupdate(_metadata, self->cfd, 0); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); +} + +TEST_F(tls, rekey_poll_pending) +{ + char const *test_str = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + struct pollfd pfd = { }; + int send_len; + int ret; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* send immediately after rekey */ + send_len = strlen(test_str) + 1; + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + /* key hasn't been updated, expect cfd to be non-readable */ + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int pid2, status; + + /* wait before installing the new key */ + sleep(1); + + /* update RX key while poll() is sleeping */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5000), 1); + + exit(!__test_passed(_metadata)); + } +} + +TEST_F(tls, rekey_poll_delay) +{ + char const *test_str = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + struct pollfd pfd = { }; + int send_len; + int ret; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int pid2, status; + + /* wait before installing the new key */ + sleep(1); + + /* update RX key while poll() is sleeping */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + sleep(1); + send_len = strlen(test_str) + 1; + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5000), 1); + exit(!__test_passed(_metadata)); + } +} + FIXTURE(tls_err) { int fd, cfd; @@ -1677,7 +2192,7 @@ FIXTURE_SETUP(tls_err) int ret; tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128, - &tls12); + &tls12, 0); ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls); @@ -2099,7 +2614,7 @@ TEST(tls_v6ops) { int sfd, ret, fd; socklen_t len, len2; - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0); addr.sin6_family = AF_INET6; addr.sin6_addr = in6addr_any; @@ -2158,7 +2673,7 @@ TEST(prequeue) { len = sizeof(addr); memrnd(buf, sizeof(buf)); - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12, 0); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index ec60a16c9307..dae91eb97d69 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -77,6 +77,8 @@ static bool cfg_epollet; static bool cfg_do_listen; static uint16_t dest_port = 9000; static bool cfg_print_nsec; +static uint32_t ts_opt_id; +static bool cfg_use_cmsg_opt_id; static struct sockaddr_in daddr; static struct sockaddr_in6 daddr6; @@ -136,12 +138,13 @@ static void validate_key(int tskey, int tstype) /* compare key for each subsequent request * must only test for one type, the first one requested */ - if (saved_tskey == -1) + if (saved_tskey == -1 || cfg_use_cmsg_opt_id) saved_tskey_type = tstype; else if (saved_tskey_type != tstype) return; stepsize = cfg_proto == SOCK_STREAM ? cfg_payload_len : 1; + stepsize = cfg_use_cmsg_opt_id ? 0 : stepsize; if (tskey != saved_tskey + stepsize) { fprintf(stderr, "ERROR: key %d, expected %d\n", tskey, saved_tskey + stepsize); @@ -356,8 +359,12 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) } } - if (batch > 1) + if (batch > 1) { fprintf(stderr, "batched %d timestamps\n", batch); + } else if (!batch) { + fprintf(stderr, "Failed to report timestamps\n"); + test_failed = true; + } } static int recv_errmsg(int fd) @@ -480,7 +487,7 @@ static void fill_header_udp(void *p, bool is_ipv4) static void do_test(int family, unsigned int report_opt) { - char control[CMSG_SPACE(sizeof(uint32_t))]; + char control[2 * CMSG_SPACE(sizeof(uint32_t))]; struct sockaddr_ll laddr; unsigned int sock_opt; struct cmsghdr *cmsg; @@ -620,18 +627,32 @@ static void do_test(int family, unsigned int report_opt) msg.msg_iov = &iov; msg.msg_iovlen = 1; - if (cfg_use_cmsg) { + if (cfg_use_cmsg || cfg_use_cmsg_opt_id) { memset(control, 0, sizeof(control)); msg.msg_control = control; - msg.msg_controllen = sizeof(control); + msg.msg_controllen = cfg_use_cmsg * CMSG_SPACE(sizeof(uint32_t)); + msg.msg_controllen += cfg_use_cmsg_opt_id * CMSG_SPACE(sizeof(uint32_t)); + + cmsg = NULL; + if (cfg_use_cmsg) { + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SO_TIMESTAMPING; - cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + *((uint32_t *)CMSG_DATA(cmsg)) = report_opt; + } + if (cfg_use_cmsg_opt_id) { + cmsg = cmsg ? CMSG_NXTHDR(&msg, cmsg) : CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_TS_OPT_ID; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + + *((uint32_t *)CMSG_DATA(cmsg)) = ts_opt_id; + saved_tskey = ts_opt_id; + } - *((uint32_t *) CMSG_DATA(cmsg)) = report_opt; } val = sendmsg(fd, &msg, 0); @@ -681,6 +702,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -L listen on hostname and port\n" " -n: set no-payload option\n" " -N: print timestamps and durations in nsec (instead of usec)\n" + " -o N: use SCM_TS_OPT_ID control message to provide N as tskey\n" " -p N: connect to port N\n" " -P: use PF_PACKET\n" " -r: use raw\n" @@ -701,7 +723,7 @@ static void parse_opt(int argc, char **argv) int c; while ((c = getopt(argc, argv, - "46bc:CeEFhIl:LnNp:PrRS:t:uv:V:x")) != -1) { + "46bc:CeEFhIl:LnNo:p:PrRS:t:uv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -742,6 +764,10 @@ static void parse_opt(int argc, char **argv) case 'N': cfg_print_nsec = true; break; + case 'o': + ts_opt_id = strtoul(optarg, NULL, 10); + cfg_use_cmsg_opt_id = true; + break; case 'p': dest_port = strtoul(optarg, NULL, 10); break; @@ -799,6 +825,8 @@ static void parse_opt(int argc, char **argv) error(1, 0, "cannot ask for pktinfo over pf_packet"); if (cfg_busy_poll && cfg_use_epoll) error(1, 0, "pass epoll or busy_poll, not both"); + if (cfg_proto == SOCK_STREAM && cfg_use_cmsg_opt_id) + error(1, 0, "TCP sockets don't support SCM_TS_OPT_ID"); if (optind != argc - 1) error(1, 0, "missing required hostname argument"); diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh index 25baca4b148e..fe4649bb8786 100755 --- a/tools/testing/selftests/net/txtimestamp.sh +++ b/tools/testing/selftests/net/txtimestamp.sh @@ -37,11 +37,13 @@ run_test_v4v6() { run_test_tcpudpraw() { local -r args=$@ - run_test_v4v6 ${args} # tcp - run_test_v4v6 ${args} -u # udp - run_test_v4v6 ${args} -r # raw - run_test_v4v6 ${args} -R # raw (IPPROTO_RAW) - run_test_v4v6 ${args} -P # pf_packet + run_test_v4v6 ${args} # tcp + run_test_v4v6 ${args} -u # udp + run_test_v4v6 ${args} -u -o 42 # udp with fixed tskey + run_test_v4v6 ${args} -r # raw + run_test_v4v6 ${args} -r -o 42 # raw + run_test_v4v6 ${args} -R # raw (IPPROTO_RAW) + run_test_v4v6 ${args} -P # pf_packet } run_test_all() { diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 8802604148dd..1dc337c709f8 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -3,12 +3,10 @@ # # Run a series of udpgro functional tests. -source net_helper.sh +source lib.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.o" - # set global exit status, but never reset nonzero one. check_err() { @@ -38,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp + ip netns exec "${PEER_NS}" ethtool -K veth1 gro on } run_one() { @@ -46,17 +44,19 @@ run_one() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -73,6 +73,7 @@ run_one_nat() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 if [[ ${tx_args} = *-4* ]]; then ipt_cmd=iptables @@ -93,16 +94,17 @@ run_one_nat() { # ... so that GRO will match the UDP_GRO enabled socket, but packets # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & - pid=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \ - echo "ok" || \ - echo "failed"& + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - kill -INT $pid - wait $(jobs -p) + check_err $? + kill -INT ${PID1} + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -111,20 +113,26 @@ run_one_2sock() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 + check_err $? wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -196,11 +204,6 @@ run_all() { return $ret } -if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Run 'make' first" - exit -1 -fi - if [[ $# -eq 0 ]]; then run_all elif [[ $1 == "__subprocess" ]]; then diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 7080eae5312b..54fa4821bc5e 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -3,11 +3,11 @@ # # Run a series of udpgro benchmarks -source net_helper.sh +source lib.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.o" +BPF_FILE="lib/xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index e1ff645bd3d1..9a2cfec1153e 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -3,11 +3,11 @@ # # Run a series of udpgro benchmarks -source net_helper.sh +source lib.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.o" +BPF_FILE="lib/xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" @@ -42,8 +42,8 @@ run_one() { ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp tc -n "${PEER_NS}" qdisc add dev veth1 clsact - tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6 direct-action - tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action + tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.bpf.o section schedcls/ingress6/nat_6 direct-action + tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action echo ${rx_args} ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & @@ -89,7 +89,7 @@ if [ ! -f ${BPF_FILE} ]; then exit -1 fi -if [ ! -f nat6to4.o ]; then +if [ ! -f nat6to4.bpf.o ]; then echo "Missing nat6to4 helper. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 83ed987cff34..a39fdc4aa2ff 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,9 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -source net_helper.sh +source lib.sh -BPF_FILE="xdp_dummy.o" +BPF_FILE="lib/xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 85b3baa3f7f3..36ff28af4b19 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -53,6 +53,7 @@ static bool cfg_do_ipv6; static bool cfg_do_connected; static bool cfg_do_connectionless; static bool cfg_do_msgmore; +static bool cfg_do_recv = true; static bool cfg_do_setsockopt; static int cfg_specific_test_id = -1; @@ -66,6 +67,7 @@ struct testcase { int gso_len; /* mss after applying gso */ int r_num_mss; /* recv(): number of calls of full mss */ int r_len_last; /* recv(): size of last non-mss dgram, if any */ + bool v6_ext_hdr; /* send() dgrams with IPv6 extension headers */ }; const struct in6_addr addr6 = { @@ -76,6 +78,8 @@ const struct in_addr addr4 = { __constant_htonl(0x0a000001), /* 10.0.0.1 */ }; +static const char ipv6_hopopts_pad1[8] = { 0 }; + struct testcase testcases_v4[] = { { /* no GSO: send a single byte */ @@ -99,6 +103,19 @@ struct testcase testcases_v4[] = { .r_num_mss = 1, }, { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V4, + .gso_len = CONST_MSS_V4 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V4, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V4 + 1, + .gso_len = CONST_MSS_V4 + 2, + .tfail = true, + }, + { /* send a single MSS + 1B */ .tlen = CONST_MSS_V4 + 1, .gso_len = CONST_MSS_V4, @@ -202,6 +219,19 @@ struct testcase testcases_v6[] = { .r_num_mss = 1, }, { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V6, + .gso_len = CONST_MSS_V6 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V6, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V6 + 1, + .gso_len = CONST_MSS_V6 + 2, + .tfail = true + }, + { /* send a single MSS + 1B */ .tlen = CONST_MSS_V6 + 1, .gso_len = CONST_MSS_V6, @@ -255,6 +285,13 @@ struct testcase testcases_v6[] = { .r_num_mss = 2, }, { + /* send 2 1B segments with extension headers */ + .tlen = 2, + .gso_len = 1, + .r_num_mss = 2, + .v6_ext_hdr = true, + }, + { /* send 2B + 2B + 1B segments */ .tlen = 5, .gso_len = 2, @@ -395,11 +432,18 @@ static void run_one(struct testcase *test, int fdt, int fdr, int i, ret, val, mss; bool sent; - fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", + fprintf(stderr, "ipv%d tx:%d gso:%d %s%s\n", addr->sa_family == AF_INET ? 4 : 6, test->tlen, test->gso_len, + test->v6_ext_hdr ? "ext-hdr " : "", test->tfail ? "(fail)" : ""); + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, + ipv6_hopopts_pad1, sizeof(ipv6_hopopts_pad1))) + error(1, errno, "setsockopt ipv6 hopopts"); + } + val = test->gso_len; if (cfg_do_setsockopt) { if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) @@ -411,9 +455,18 @@ static void run_one(struct testcase *test, int fdt, int fdr, error(1, 0, "send succeeded while expecting failure"); if (!sent && !test->tfail) error(1, 0, "send failed while expecting success"); + + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, NULL, 0)) + error(1, errno, "setsockopt ipv6 hopopts clear"); + } + if (!sent) return; + if (!cfg_do_recv) + return; + if (test->gso_len) mss = test->gso_len; else @@ -464,8 +517,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen) if (fdr == -1) error(1, errno, "socket r"); - if (bind(fdr, addr, alen)) - error(1, errno, "bind"); + if (cfg_do_recv) { + if (bind(fdr, addr, alen)) + error(1, errno, "bind"); + } /* Have tests fail quickly instead of hang */ if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) @@ -524,7 +579,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "46cCmst:")) != -1) { + while ((c = getopt(argc, argv, "46cCmRst:")) != -1) { switch (c) { case '4': cfg_do_ipv4 = true; @@ -541,6 +596,9 @@ static void parse_opts(int argc, char **argv) case 'm': cfg_do_msgmore = true; break; + case 'R': + cfg_do_recv = false; + break; case 's': cfg_do_setsockopt = true; break; diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh index 6c63178086b0..85d1fa3c1ff7 100755 --- a/tools/testing/selftests/net/udpgso.sh +++ b/tools/testing/selftests/net/udpgso.sh @@ -27,6 +27,31 @@ test_route_mtu() { ip route add local fd00::1/128 table local dev lo mtu 1500 } +setup_dummy_sink() { + ip link add name sink mtu 1500 type dummy + ip addr add dev sink 10.0.0.0/24 + ip addr add dev sink fd00::2/64 nodad + ip link set dev sink up +} + +test_hw_gso_hw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic on >/dev/null + ethtool -K sink tx-udp-segmentation on >/dev/null +} + +test_sw_gso_hw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic on >/dev/null + ethtool -K sink tx-udp-segmentation off >/dev/null +} + +test_sw_gso_sw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic off >/dev/null + ethtool -K sink tx-udp-segmentation off >/dev/null +} + if [ "$#" -gt 0 ]; then "$1" shift 2 # pop "test_*" arg and "--" delimiter @@ -56,3 +81,21 @@ echo "ipv4 msg_more" echo "ipv6 msg_more" ./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m + +echo "ipv4 hw-gso hw-csum" +./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -4 -C -R + +echo "ipv6 hw-gso hw-csum" +./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -6 -C -R + +echo "ipv4 sw-gso hw-csum" +./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -4 -C -R + +echo "ipv6 sw-gso hw-csum" +./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -6 -C -R + +echo "ipv4 sw-gso sw-csum" +./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -4 -C -R + +echo "ipv6 sw-gso sw-csum" +./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -6 -C -R diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 640bc43452fa..88fa1d53ba2b 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -92,6 +92,9 @@ run_udp() { echo "udp" run_in_netns ${args} + echo "udp sendmmsg" + run_in_netns ${args} -m + echo "udp gso" run_in_netns ${args} -S 0 diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh index f52aa5f7da52..3e751234ccfe 100755 --- a/tools/testing/selftests/net/unicast_extensions.sh +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -30,14 +30,7 @@ source lib.sh -# nettest can be run from PATH or from same directory as this selftest -if ! which nettest >/dev/null; then - PATH=$PWD:$PATH - if ! which nettest >/dev/null; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip - fi -fi +check_gen_prog "nettest" result=0 diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 3a394b43e274..9709dd067c72 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="xdp_dummy.o" +BPF_FILE="lib/xdp_dummy.bpf.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 @@ -46,8 +46,6 @@ create_ns() { ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad done - echo "#kernel" > $BASE - chmod go-rw $BASE } __chk_flag() { diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh new file mode 100755 index 000000000000..e7cb8c678bde --- /dev/null +++ b/tools/testing/selftests/net/vlan_bridge_binding.sh @@ -0,0 +1,256 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ALL_TESTS=" + test_binding_on + test_binding_off + test_binding_toggle_on + test_binding_toggle_off + test_binding_toggle_on_when_upper_down + test_binding_toggle_off_when_upper_down + test_binding_toggle_on_when_lower_down + test_binding_toggle_off_when_lower_down +" + +setup_prepare() +{ + local port + + ip_link_add br up type bridge vlan_filtering 1 + + for port in d1 d2 d3; do + ip_link_add $port type veth peer name r$port + ip_link_set_up $port + ip_link_set_up r$port + ip_link_set_master $port br + done + + bridge_vlan_add vid 11 dev br self + bridge_vlan_add vid 11 dev d1 master + + bridge_vlan_add vid 12 dev br self + bridge_vlan_add vid 12 dev d2 master + + bridge_vlan_add vid 13 dev br self + bridge_vlan_add vid 13 dev d1 master + bridge_vlan_add vid 13 dev d2 master + + bridge_vlan_add vid 14 dev br self + bridge_vlan_add vid 14 dev d1 master + bridge_vlan_add vid 14 dev d2 master + bridge_vlan_add vid 14 dev d3 master +} + +operstate_is() +{ + local dev=$1; shift + local expect=$1; shift + + local operstate=$(ip -j link show $dev | jq -r .[].operstate) + if [[ $operstate == UP ]]; then + operstate=1 + elif [[ $operstate == DOWN || $operstate == LOWERLAYERDOWN ]]; then + operstate=0 + fi + echo -n $operstate + [[ $operstate == $expect ]] +} + +check_operstate() +{ + local dev=$1; shift + local expect=$1; shift + local operstate + + operstate=$(busywait 1000 \ + operstate_is "$dev" "$expect") + check_err $? "Got operstate of $operstate, expected $expect" +} + +add_one_vlan() +{ + local link=$1; shift + local id=$1; shift + + ip_link_add $link.$id link $link type vlan id $id "$@" +} + +add_vlans() +{ + add_one_vlan br 11 "$@" + add_one_vlan br 12 "$@" + add_one_vlan br 13 "$@" + add_one_vlan br 14 "$@" +} + +set_vlans() +{ + ip link set dev br.11 "$@" + ip link set dev br.12 "$@" + ip link set dev br.13 "$@" + ip link set dev br.14 "$@" +} + +down_netdevs() +{ + local dev + + for dev in "$@"; do + ip_link_set_down $dev + done +} + +check_operstates() +{ + local opst_11=$1; shift + local opst_12=$1; shift + local opst_13=$1; shift + local opst_14=$1; shift + + check_operstate br.11 $opst_11 + check_operstate br.12 $opst_12 + check_operstate br.13 $opst_13 + check_operstate br.14 $opst_14 +} + +do_test_binding() +{ + local inject=$1; shift + local what=$1; shift + local opsts_d1=$1; shift + local opsts_d2=$1; shift + local opsts_d12=$1; shift + local opsts_d123=$1; shift + + RET=0 + + defer_scope_push + down_netdevs d1 + $inject + check_operstates $opsts_d1 + defer_scope_pop + + defer_scope_push + down_netdevs d2 + $inject + check_operstates $opsts_d2 + defer_scope_pop + + defer_scope_push + down_netdevs d1 d2 + $inject + check_operstates $opsts_d12 + defer_scope_pop + + defer_scope_push + down_netdevs d1 d2 d3 + $inject + check_operstates $opsts_d123 + defer_scope_pop + + log_test "Test bridge_binding $what" +} + +do_test_binding_on() +{ + local inject=$1; shift + local what=$1; shift + + do_test_binding "$inject" "$what" \ + "0 1 1 1" \ + "1 0 1 1" \ + "0 0 0 1" \ + "0 0 0 0" +} + +do_test_binding_off() +{ + local inject=$1; shift + local what=$1; shift + + do_test_binding "$inject" "$what" \ + "1 1 1 1" \ + "1 1 1 1" \ + "1 1 1 1" \ + "0 0 0 0" +} + +test_binding_on() +{ + add_vlans bridge_binding on + set_vlans up + do_test_binding_on : "on" +} + +test_binding_off() +{ + add_vlans bridge_binding off + set_vlans up + do_test_binding_off : "off" +} + +test_binding_toggle_on() +{ + add_vlans bridge_binding off + set_vlans up + set_vlans type vlan bridge_binding on + do_test_binding_on : "off->on" +} + +test_binding_toggle_off() +{ + add_vlans bridge_binding on + set_vlans up + set_vlans type vlan bridge_binding off + do_test_binding_off : "on->off" +} + +dfr_set_binding_on() +{ + set_vlans type vlan bridge_binding on + defer set_vlans type vlan bridge_binding off +} + +dfr_set_binding_off() +{ + set_vlans type vlan bridge_binding off + defer set_vlans type vlan bridge_binding on +} + +test_binding_toggle_on_when_lower_down() +{ + add_vlans bridge_binding off + set_vlans up + do_test_binding_on dfr_set_binding_on "off->on when lower down" +} + +test_binding_toggle_off_when_lower_down() +{ + add_vlans bridge_binding on + set_vlans up + do_test_binding_off dfr_set_binding_off "on->off when lower down" +} + +test_binding_toggle_on_when_upper_down() +{ + add_vlans bridge_binding off + set_vlans type vlan bridge_binding on + set_vlans up + do_test_binding_on : "off->on when upper down" +} + +test_binding_toggle_off_when_upper_down() +{ + add_vlans bridge_binding on + set_vlans type vlan bridge_binding off + set_vlans up + do_test_binding_off : "on->off when upper down" +} + +trap defer_scopes_cleanup EXIT +setup_prepare +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index 2da32f4c479b..e9c2f71da207 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -533,6 +533,86 @@ ipv6_ping_frag_asym() ipv6_ping_frag asym } +ipv4_ping_local() +{ + log_section "IPv4 (sym route): VRF ICMP local error route lookup ping" + + setup_sym + + check_connectivity || return + + run_cmd ip netns exec $r1 ip vrf exec blue ping -c1 -w1 ${H2_N2_IP} + log_test $? 0 "VRF ICMP local IPv4" +} + +ipv4_tcp_local() +{ + log_section "IPv4 (sym route): VRF tcp local connection" + + setup_sym + + check_connectivity || return + + run_cmd nettest -s -O "$h2" -l ${H2_N2_IP} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -N "$r1" -d blue -r ${H2_N2_IP} + log_test $? 0 "VRF tcp local connection IPv4" +} + +ipv4_udp_local() +{ + log_section "IPv4 (sym route): VRF udp local connection" + + setup_sym + + check_connectivity || return + + run_cmd nettest -s -D -O "$h2" -l ${H2_N2_IP} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -D -N "$r1" -d blue -r ${H2_N2_IP} + log_test $? 0 "VRF udp local connection IPv4" +} + +ipv6_ping_local() +{ + log_section "IPv6 (sym route): VRF ICMP local error route lookup ping" + + setup_sym + + check_connectivity6 || return + + run_cmd ip netns exec $r1 ip vrf exec blue ${ping6} -c1 -w1 ${H2_N2_IP6} + log_test $? 0 "VRF ICMP local IPv6" +} + +ipv6_tcp_local() +{ + log_section "IPv6 (sym route): VRF tcp local connection" + + setup_sym + + check_connectivity6 || return + + run_cmd nettest -s -6 -O "$h2" -l ${H2_N2_IP6} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -6 -N "$r1" -d blue -r ${H2_N2_IP6} + log_test $? 0 "VRF tcp local connection IPv6" +} + +ipv6_udp_local() +{ + log_section "IPv6 (sym route): VRF udp local connection" + + setup_sym + + check_connectivity6 || return + + run_cmd nettest -s -6 -D -O "$h2" -l ${H2_N2_IP6} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -6 -D -N "$r1" -d blue -r ${H2_N2_IP6} + log_test $? 0 "VRF udp local connection IPv6" +} + ################################################################################ # usage @@ -555,8 +635,12 @@ EOF # Some systems don't have a ping6 binary anymore command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping) -TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym" -TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_ttl_asym ipv6_traceroute_asym" +check_gen_prog "nettest" + +TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_local ipv4_tcp_local +ipv4_udp_local ipv4_ping_ttl_asym ipv4_traceroute_asym" +TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_local ipv6_tcp_local ipv6_udp_local +ipv6_ping_ttl_asym ipv6_traceroute_asym" ret=0 nsuccess=0 @@ -594,12 +678,18 @@ do ipv4_traceroute|traceroute) ipv4_traceroute;;& ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;& ipv4_ping_frag|ping) ipv4_ping_frag;;& + ipv4_ping_local|ping) ipv4_ping_local;;& + ipv4_tcp_local) ipv4_tcp_local;;& + ipv4_udp_local) ipv4_udp_local;;& ipv6_ping_ttl|ping) ipv6_ping_ttl;;& ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;& ipv6_traceroute|traceroute) ipv6_traceroute;;& ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;& ipv6_ping_frag|ping) ipv6_ping_frag;;& + ipv6_ping_local|ping) ipv6_ping_local;;& + ipv6_tcp_local) ipv6_tcp_local;;& + ipv6_udp_local) ipv6_udp_local;;& # setup namespaces and config, but do not run any tests setup_sym|setup) setup_sym; exit 0;; diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 457789530645..3eeeeffb4005 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -293,7 +293,7 @@ check_random_order() local ns=$1 local log=$2 - for i in $(seq 100); do + for i in $(seq 50); do ip -net $ns xfrm policy flush for j in $(seq 0 16 255 | sort -R); do ip -net $ns xfrm policy add dst $j.0.0.0/24 dir out priority 10 action allow @@ -306,7 +306,7 @@ check_random_order() done done - for i in $(seq 100); do + for i in $(seq 50); do ip -net $ns xfrm policy flush for j in $(seq 0 16 255 | sort -R); do local addr=$(printf "e000:0000:%02x00::/56" $j) diff --git a/tools/testing/selftests/net/xfrm_policy_add_speed.sh b/tools/testing/selftests/net/xfrm_policy_add_speed.sh new file mode 100755 index 000000000000..2fab29d3cb91 --- /dev/null +++ b/tools/testing/selftests/net/xfrm_policy_add_speed.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +source lib.sh + +timeout=4m +ret=0 +tmp=$(mktemp) +cleanup() { + cleanup_all_ns + rm -f "$tmp" +} + +trap cleanup EXIT + +maxpolicies=100000 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && maxpolicies=10000 + +do_dummies4() { + local dir="$1" + local max="$2" + + local policies + local pfx + pfx=30 + policies=0 + + ip netns exec "$ns" ip xfrm policy flush + + for i in $(seq 1 100);do + local s + local d + for j in $(seq 1 255);do + s=$((i+0)) + d=$((i+100)) + + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.0/30 dst 10.$d.$j.$a/$pfx dir $dir action block + done + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.$a/30 dst 10.$d.$j.0/$pfx dir $dir action block + done + done + done +} + +setup_ns ns + +do_bench() +{ + local max="$1" + + start=$(date +%s%3N) + do_dummies4 "out" "$max" > "$tmp" + if ! timeout "$timeout" ip netns exec "$ns" ip -batch "$tmp";then + echo "WARNING: policy insertion cancelled after $timeout" + ret=1 + fi + stop=$(date +%s%3N) + + result=$((stop-start)) + + policies=$(wc -l < "$tmp") + printf "Inserted %-06s policies in $result ms\n" $policies + + have=$(ip netns exec "$ns" ip xfrm policy show | grep "action block" | wc -l) + if [ "$have" -ne "$policies" ]; then + echo "WARNING: mismatch, have $have policies, expected $policies" + ret=1 + fi +} + +p=100 +while [ $p -le "$maxpolicies" ]; do + do_bench "$p" + p="${p}0" +done + +exit $ret diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk new file mode 100644 index 000000000000..e907c2751956 --- /dev/null +++ b/tools/testing/selftests/net/ynl.mk @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0 + +# YNL selftest build snippet + +# Inputs: +# +# YNL_GENS: families we need in the selftests +# YNL_PROGS: TEST_PROGS which need YNL (TODO, none exist, yet) +# YNL_GEN_FILES: TEST_GEN_FILES which need YNL + +YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) +YNL_SPECS := \ + $(patsubst %,$(top_srcdir)/Documentation/netlink/specs/%.yaml,$(YNL_GENS)) + +$(YNL_OUTPUTS): $(OUTPUT)/libynl.a +$(YNL_OUTPUTS): CFLAGS += \ + -I$(top_srcdir)/usr/include/ $(KHDR_INCLUDES) \ + -I$(top_srcdir)/tools/net/ynl/lib/ \ + -I$(top_srcdir)/tools/net/ynl/generated/ + +# Make sure we rebuild libynl if user added a new family. We can't easily +# depend on the contents of a variable so create a fake file with a hash. +YNL_GENS_HASH := $(shell echo $(YNL_GENS) | sha1sum | cut -c1-8) +$(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig: + $(Q)rm -f $(OUTPUT)/.libynl-*.sig + $(Q)touch $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig + +$(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig + $(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a + $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \ + GENS="$(YNL_GENS)" RSTS="" libynl.a + $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a + +EXTRA_CLEAN += \ + $(top_srcdir)/tools/net/ynl/pyynl/__pycache__ \ + $(top_srcdir)/tools/net/ynl/pyynl/lib/__pycache__ \ + $(top_srcdir)/tools/net/ynl/lib/*.[ado] \ + $(OUTPUT)/.libynl-*.sig \ + $(OUTPUT)/libynl.a |