diff options
Diffstat (limited to 'tools/testing/selftests/drivers')
243 files changed, 49486 insertions, 0 deletions
diff --git a/tools/testing/selftests/drivers/.gitignore b/tools/testing/selftests/drivers/.gitignore new file mode 100644 index 000000000000..09e23b5afa96 --- /dev/null +++ b/tools/testing/selftests/drivers/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +/dma-buf/udmabuf +/s390x/uvdevice/test_uvdevice diff --git a/tools/testing/selftests/drivers/dma-buf/Makefile b/tools/testing/selftests/drivers/dma-buf/Makefile new file mode 100644 index 000000000000..441407bb0e80 --- /dev/null +++ b/tools/testing/selftests/drivers/dma-buf/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS += $(KHDR_INCLUDES) + +TEST_GEN_PROGS := udmabuf + +top_srcdir ?=../../../../.. + +include ../../lib.mk diff --git a/tools/testing/selftests/drivers/dma-buf/config b/tools/testing/selftests/drivers/dma-buf/config new file mode 100644 index 000000000000..d708515cff1b --- /dev/null +++ b/tools/testing/selftests/drivers/dma-buf/config @@ -0,0 +1 @@ +CONFIG_UDMABUF=y diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c new file mode 100644 index 000000000000..d78aec662586 --- /dev/null +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> +#include <malloc.h> +#include <stdbool.h> + +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <linux/memfd.h> +#include <linux/udmabuf.h> +#include "kselftest.h" + +#define TEST_PREFIX "drivers/dma-buf/udmabuf" +#define NUM_PAGES 4 +#define NUM_ENTRIES 4 +#define MEMFD_SIZE 1024 /* in pages */ + +static unsigned int page_size; + +static int create_memfd_with_seals(off64_t size, bool hpage) +{ + int memfd, ret; + unsigned int flags = MFD_ALLOW_SEALING; + + if (hpage) + flags |= MFD_HUGETLB; + + memfd = memfd_create("udmabuf-test", flags); + if (memfd < 0) { + ksft_print_msg("%s: [skip,no-memfd]\n", TEST_PREFIX); + exit(KSFT_SKIP); + } + + ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) { + ksft_print_msg("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + exit(KSFT_SKIP); + } + + ret = ftruncate(memfd, size); + if (ret == -1) { + ksft_print_msg("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + return memfd; +} + +static int create_udmabuf_list(int devfd, int memfd, off64_t memfd_size) +{ + struct udmabuf_create_list *list; + int ubuf_fd, i; + + list = malloc(sizeof(struct udmabuf_create_list) + + sizeof(struct udmabuf_create_item) * NUM_ENTRIES); + if (!list) { + ksft_print_msg("%s: [FAIL, udmabuf-malloc]\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + for (i = 0; i < NUM_ENTRIES; i++) { + list->list[i].memfd = memfd; + list->list[i].offset = i * (memfd_size / NUM_ENTRIES); + list->list[i].size = getpagesize() * NUM_PAGES; + } + + list->count = NUM_ENTRIES; + list->flags = UDMABUF_FLAGS_CLOEXEC; + ubuf_fd = ioctl(devfd, UDMABUF_CREATE_LIST, list); + free(list); + if (ubuf_fd < 0) { + ksft_print_msg("%s: [FAIL, udmabuf-create]\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + return ubuf_fd; +} + +static void write_to_memfd(void *addr, off64_t size, char chr) +{ + int i; + + for (i = 0; i < size / page_size; i++) { + *((char *)addr + (i * page_size)) = chr; + } +} + +static void *mmap_fd(int fd, off64_t size) +{ + void *addr; + + addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + ksft_print_msg("%s: ubuf_fd mmap fail\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + return addr; +} + +static int compare_chunks(void *addr1, void *addr2, off64_t memfd_size) +{ + off64_t off; + int i = 0, j, k = 0, ret = 0; + char char1, char2; + + while (i < NUM_ENTRIES) { + off = i * (memfd_size / NUM_ENTRIES); + for (j = 0; j < NUM_PAGES; j++, k++) { + char1 = *((char *)addr1 + off + (j * getpagesize())); + char2 = *((char *)addr2 + (k * getpagesize())); + if (char1 != char2) { + ret = -1; + goto err; + } + } + i++; + } +err: + munmap(addr1, memfd_size); + munmap(addr2, NUM_ENTRIES * NUM_PAGES * getpagesize()); + return ret; +} + +int main(int argc, char *argv[]) +{ + struct udmabuf_create create; + int devfd, memfd, buf, ret; + off64_t size; + void *addr1, *addr2; + + ksft_print_header(); + ksft_set_plan(7); + + devfd = open("/dev/udmabuf", O_RDWR); + if (devfd < 0) { + ksft_print_msg( + "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", + TEST_PREFIX); + exit(KSFT_SKIP); + } + + memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); + if (memfd < 0) { + ksft_print_msg("%s: [skip,no-memfd]\n", TEST_PREFIX); + exit(KSFT_SKIP); + } + + ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) { + ksft_print_msg("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + exit(KSFT_SKIP); + } + + size = getpagesize() * NUM_PAGES; + ret = ftruncate(memfd, size); + if (ret == -1) { + ksft_print_msg("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + memset(&create, 0, sizeof(create)); + + /* should fail (offset not page aligned) */ + create.memfd = memfd; + create.offset = getpagesize()/2; + create.size = getpagesize(); + buf = ioctl(devfd, UDMABUF_CREATE, &create); + if (buf >= 0) + ksft_test_result_fail("%s: [FAIL,test-1]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-1]\n", TEST_PREFIX); + + /* should fail (size not multiple of page) */ + create.memfd = memfd; + create.offset = 0; + create.size = getpagesize()/2; + buf = ioctl(devfd, UDMABUF_CREATE, &create); + if (buf >= 0) + ksft_test_result_fail("%s: [FAIL,test-2]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-2]\n", TEST_PREFIX); + + /* should fail (not memfd) */ + create.memfd = 0; /* stdin */ + create.offset = 0; + create.size = size; + buf = ioctl(devfd, UDMABUF_CREATE, &create); + if (buf >= 0) + ksft_test_result_fail("%s: [FAIL,test-3]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-3]\n", TEST_PREFIX); + + /* should work */ + page_size = getpagesize(); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); + create.memfd = memfd; + create.offset = 0; + create.size = size; + buf = ioctl(devfd, UDMABUF_CREATE, &create); + if (buf < 0) + ksft_test_result_fail("%s: [FAIL,test-4]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-4]\n", TEST_PREFIX); + + munmap(addr1, size); + close(buf); + close(memfd); + + /* should work (migration of 4k size pages)*/ + size = MEMFD_SIZE * page_size; + memfd = create_memfd_with_seals(size, false); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); + buf = create_udmabuf_list(devfd, memfd, size); + addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize()); + write_to_memfd(addr1, size, 'b'); + ret = compare_chunks(addr1, addr2, size); + if (ret < 0) + ksft_test_result_fail("%s: [FAIL,test-5]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-5]\n", TEST_PREFIX); + + close(buf); + close(memfd); + + /* should work (migration of 2MB size huge pages)*/ + page_size = getpagesize() * 512; /* 2 MB */ + size = MEMFD_SIZE * page_size; + memfd = create_memfd_with_seals(size, true); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); + buf = create_udmabuf_list(devfd, memfd, size); + addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize()); + write_to_memfd(addr1, size, 'b'); + ret = compare_chunks(addr1, addr2, size); + if (ret < 0) + ksft_test_result_fail("%s: [FAIL,test-6]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-6]\n", TEST_PREFIX); + + close(buf); + close(memfd); + + /* same test as above but we pin first before writing to memfd */ + page_size = getpagesize() * 512; /* 2 MB */ + size = MEMFD_SIZE * page_size; + memfd = create_memfd_with_seals(size, true); + buf = create_udmabuf_list(devfd, memfd, size); + addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize()); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); + write_to_memfd(addr1, size, 'b'); + ret = compare_chunks(addr1, addr2, size); + if (ret < 0) + ksft_test_result_fail("%s: [FAIL,test-7]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-7]\n", TEST_PREFIX); + + close(buf); + close(memfd); + close(devfd); + + ksft_print_msg("%s: ok\n", TEST_PREFIX); + ksft_print_cnts(); + + return 0; +} diff --git a/tools/testing/selftests/drivers/gpu/drm_mm.sh b/tools/testing/selftests/drivers/gpu/drm_mm.sh new file mode 100755 index 000000000000..09c76cd7661d --- /dev/null +++ b/tools/testing/selftests/drivers/gpu/drm_mm.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Runs API tests for struct drm_mm (DRM range manager) + +if ! /sbin/modprobe -n -q test-drm_mm; then + echo "drivers/gpu/drm_mm: module test-drm_mm is not found in /lib/modules/`uname -r` [skip]" + exit 77 +fi + +if /sbin/modprobe -q test-drm_mm; then + /sbin/modprobe -q -r test-drm_mm + echo "drivers/gpu/drm_mm: ok" +else + echo "drivers/gpu/drm_mm: module test-drm_mm could not be removed [FAIL]" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/gpu/i915.sh b/tools/testing/selftests/drivers/gpu/i915.sh new file mode 100755 index 000000000000..d3895bc714b7 --- /dev/null +++ b/tools/testing/selftests/drivers/gpu/i915.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Runs hardware independent tests for i915 (drivers/gpu/drm/i915) + +if ! /sbin/modprobe -q -r i915; then + echo "drivers/gpu/i915: [SKIP]" + exit 77 +fi + +if /sbin/modprobe -q i915 mock_selftests=-1; then + /sbin/modprobe -q -r i915 + echo "drivers/gpu/i915: ok" +else + echo "drivers/gpu/i915: [FAIL]" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore new file mode 100644 index 000000000000..3633c7a3ed65 --- /dev/null +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +gro +napi_id_helper +psp_responder diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile new file mode 100644 index 000000000000..f5c71d993750 --- /dev/null +++ b/tools/testing/selftests/drivers/net/Makefile @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += $(KHDR_INCLUDES) + +TEST_INCLUDES := $(wildcard lib/py/*.py) \ + $(wildcard lib/sh/*.sh) \ + ../../net/lib.sh \ + +TEST_GEN_FILES := \ + gro \ + napi_id_helper \ +# end of TEST_GEN_FILES + +TEST_PROGS := \ + gro.py \ + hds.py \ + napi_id.py \ + napi_threaded.py \ + netcons_basic.sh \ + netcons_cmdline.sh \ + netcons_fragmented_msg.sh \ + netcons_overflow.sh \ + netcons_sysdata.sh \ + netcons_torture.sh \ + netpoll_basic.py \ + ping.py \ + psp.py \ + queues.py \ + ring_reconfig.py \ + shaper.py \ + stats.py \ + xdp.py \ +# end of TEST_PROGS + +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := psp_responder +TEST_GEN_FILES += $(YNL_GEN_FILES) + +include ../../lib.mk + +# YNL build +YNL_GENS := psp + +include ../../net/ynl.mk diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst new file mode 100644 index 000000000000..eb838ae94844 --- /dev/null +++ b/tools/testing/selftests/drivers/net/README.rst @@ -0,0 +1,136 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Running driver tests +==================== + +Networking driver tests are executed within kselftest framework like any +other tests. They support testing both real device drivers and emulated / +software drivers (latter mostly to test the core parts of the stack). + +SW mode +~~~~~~~ + +By default, when no extra parameters are set or exported, tests execute +against software drivers such as netdevsim. No extra preparation is required +the software devices are created and destroyed as part of the test. +In this mode the tests are indistinguishable from other selftests and +(for example) can be run under ``virtme-ng`` like the core networking selftests. + +HW mode +~~~~~~~ + +Executing tests against a real device requires external preparation. +The netdevice against which tests will be run must exist, be running +(in UP state) and be configured with an IP address. + +Refer to list of :ref:`Variables` later in this file to set up running +the tests against a real device. + +Both modes required +~~~~~~~~~~~~~~~~~~~ + +All tests in drivers/net must support running both against a software device +and a real device. SW-only tests should instead be placed in net/ or +drivers/net/netdevsim, HW-only tests in drivers/net/hw. + +Variables +========= + +The variables can be set in the environment or by creating a net.config +file in the same directory as this README file. Example:: + + $ NETIF=eth0 ./some_test.sh + +or:: + + $ cat tools/testing/selftests/drivers/net/net.config + # Variable set in a file + NETIF=eth0 + +Local test (which don't require endpoint for sending / receiving traffic) +need only the ``NETIF`` variable. Remaining variables define the endpoint +and communication method. + +NETIF +~~~~~ + +Name of the netdevice against which the test should be executed. +When empty or not set software devices will be used. + +LOCAL_V4, LOCAL_V6, REMOTE_V4, REMOTE_V6 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Local and remote endpoint IP addresses. + +REMOTE_TYPE +~~~~~~~~~~~ + +Communication method used to run commands on the remote endpoint. +Test framework has built-in support for ``netns`` and ``ssh`` channels. +``netns`` assumes the "remote" interface is part of the same +host, just moved to the specified netns. +``ssh`` communicates with remote endpoint over ``ssh`` and ``scp``. +Using persistent SSH connections is strongly encouraged to avoid +the latency of SSH connection setup on every command. + +Communication methods are defined by classes in ``lib/py/remote_{name}.py``. +It should be possible to add a new method without modifying any of +the framework, by simply adding an appropriately named file to ``lib/py``. + +REMOTE_ARGS +~~~~~~~~~~~ + +Arguments used to construct the communication channel. +Communication channel dependent:: + + for netns - name of the "remote" namespace + for ssh - name/address of the remote host + +Example +======= + +Build the selftests:: + + # make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw" + +"Install" the tests and copy them over to the target machine:: + + # make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw" \ + install INSTALL_PATH=/tmp/ksft-net-drv + + # rsync -ra --delete /tmp/ksft-net-drv root@192.168.1.1:/root/ + +On the target machine, running the tests will use netdevsim by default:: + + [/root] # ./ksft-net-drv/run_kselftest.sh -t drivers/net:ping.py + TAP version 13 + 1..1 + # timeout set to 45 + # selftests: drivers/net: ping.py + # TAP version 13 + # 1..3 + # ok 1 ping.test_v4 + # ok 2 ping.test_v6 + # ok 3 ping.test_tcp + # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0 + ok 1 selftests: drivers/net: ping.py + +Create a config with remote info:: + + [/root] # cat > ./ksft-net-drv/drivers/net/net.config <<EOF + NETIF=eth0 + LOCAL_V4=192.168.1.1 + REMOTE_V4=192.168.1.2 + REMOTE_TYPE=ssh + REMOTE_ARGS=root@192.168.1.2 + EOF + +Run the test:: + + [/root] # ./ksft-net-drv/drivers/net/ping.py + TAP version 13 + 1..3 + ok 1 ping.test_v4 + ok 2 ping.test_v6 # SKIP Test requires IPv6 connectivity + ok 3 ping.test_tcp + # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:1 error:0 diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile new file mode 100644 index 000000000000..6c5c60adb5e8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for net selftests + +TEST_PROGS := \ + bond-arp-interval-causes-panic.sh \ + bond-break-lacpdu-tx.sh \ + bond-eth-type-change.sh \ + bond-lladdr-target.sh \ + bond_ipsec_offload.sh \ + bond_lacp_prio.sh \ + bond_macvlan_ipvlan.sh \ + bond_options.sh \ + bond_passive_lacp.sh \ + dev_addr_lists.sh \ + mode-1-recovery-updelay.sh \ + mode-2-recovery-updelay.sh \ + netcons_over_bonding.sh \ +# end of TEST_PROGS + +TEST_FILES := \ + bond_topo_2d1c.sh \ + bond_topo_3d1c.sh \ + lag_lib.sh \ +# end of TEST_FILES + +TEST_INCLUDES := \ + ../../../net/lib.sh \ + ../lib/sh/lib_netcons.sh \ + ../../../net/forwarding/lib.sh \ +# end of TEST_INCLUDES + +include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh new file mode 100755 index 000000000000..5667febee328 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh @@ -0,0 +1,46 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# cause kernel oops in bond_rr_gen_slave_id +DEBUG=${DEBUG:-0} + +set -e +test ${DEBUG} -ne 0 && set -x + +finish() +{ + ip netns delete server || true + ip netns delete client || true +} + +trap finish EXIT + +client_ip4=192.168.1.198 +server_ip4=192.168.1.254 + +# setup kernel so it reboots after causing the panic +echo 180 >/proc/sys/kernel/panic + +# build namespaces +ip netns add "server" +ip netns add "client" +ip -n client link add eth0 type veth peer name eth0 netns server +ip netns exec server ip link set dev eth0 up +ip netns exec server ip addr add ${server_ip4}/24 dev eth0 + +ip netns exec client ip link add dev bond0 down type bond mode 1 \ + miimon 100 all_slaves_active 1 +ip netns exec client ip link set dev eth0 master bond0 +ip netns exec client ip link set dev bond0 up +ip netns exec client ip addr add ${client_ip4}/24 dev bond0 +ip netns exec client ping -c 5 $server_ip4 >/dev/null + +ip netns exec client ip link set dev eth0 nomaster +ip netns exec client ip link set dev bond0 down +ip netns exec client ip link set dev bond0 type bond mode 0 \ + arp_interval 1000 arp_ip_target "+${server_ip4}" +ip netns exec client ip link set dev eth0 master bond0 +ip netns exec client ip link set dev bond0 up +ip netns exec client ping -c 5 $server_ip4 >/dev/null + +exit 0 diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh new file mode 100755 index 000000000000..1ec7f59db7f4 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh @@ -0,0 +1,80 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Regression Test: +# Verify LACPDUs get transmitted after setting the MAC address of +# the bond. +# +# https://bugzilla.redhat.com/show_bug.cgi?id=2020773 +# +# +---------+ +# | fab-br0 | +# +---------+ +# | +# +---------+ +# | fbond | +# +---------+ +# | | +# +------+ +------+ +# |veth1 | |veth2 | +# +------+ +------+ +# +# We use veths instead of physical interfaces +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +set -e +cleanup() { + ip link del fab-br0 >/dev/null 2>&1 || : + ip link del fbond >/dev/null 2>&1 || : + ip link del veth1-bond >/dev/null 2>&1 || : + ip link del veth2-bond >/dev/null 2>&1 || : +} + +trap cleanup 0 1 2 +cleanup + +# create the bridge +ip link add fab-br0 address 52:54:00:3B:7C:A6 mtu 1500 type bridge \ + forward_delay 15 + +# create the bond +ip link add fbond type bond mode 4 miimon 200 xmit_hash_policy 1 \ + ad_actor_sys_prio 65535 lacp_rate fast + +# set bond address +ip link set fbond address 52:54:00:3B:7C:A6 +ip link set fbond up + +# set again bond sysfs parameters +ip link set fbond type bond ad_actor_sys_prio 65535 + +# create veths +ip link add name veth1-bond type veth peer name veth1-end +ip link add name veth2-bond type veth peer name veth2-end + +# add ports +ip link set fbond master fab-br0 +ip link set veth1-bond master fbond +ip link set veth2-bond master fbond + +# bring up +ip link set veth1-end up +ip link set veth2-end up +ip link set fab-br0 up +ip link set fbond up +ip addr add dev fab-br0 10.0.0.3 + +rc=0 +tc qdisc add dev veth1-end clsact +tc filter add dev veth1-end ingress protocol 0x8809 pref 1 handle 101 flower skip_hw action pass +if slowwait_for_counter 15 2 \ + tc_rule_handle_stats_get "dev veth1-end ingress" 101 ".packets" "" &> /dev/null; then + echo "PASS, captured 2" +else + echo "FAIL" + rc=1 +fi +exit $rc diff --git a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh new file mode 100755 index 000000000000..8293dbc7c18f --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bond device ether type changing +# + +ALL_TESTS=" + bond_test_unsuccessful_enslave_type_change + bond_test_successful_enslave_type_change +" +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +bond_check_flags() +{ + local bonddev=$1 + + ip -d l sh dev "$bonddev" | grep -q "MASTER" + check_err $? "MASTER flag is missing from the bond device" + + ip -d l sh dev "$bonddev" | grep -q "SLAVE" + check_err $? "SLAVE flag is missing from the bond device" +} + +# test enslaved bond dev type change from ARPHRD_ETHER and back +# this allows us to test both MASTER and SLAVE flags at once +bond_test_enslave_type_change() +{ + local test_success=$1 + local devbond0="test-bond0" + local devbond1="test-bond1" + local devbond2="test-bond2" + local nonethdev="test-noneth0" + + # create a non-ARPHRD_ETHER device for testing (e.g. nlmon type) + ip link add name "$nonethdev" type nlmon + check_err $? "could not create a non-ARPHRD_ETHER device (nlmon)" + ip link add name "$devbond0" type bond + if [ $test_success -eq 1 ]; then + # we need devbond0 in active-backup mode to successfully enslave nonethdev + ip link set dev "$devbond0" type bond mode active-backup + check_err $? "could not change bond mode to active-backup" + fi + ip link add name "$devbond1" type bond + ip link add name "$devbond2" type bond + ip link set dev "$devbond0" master "$devbond1" + check_err $? "could not enslave $devbond0 to $devbond1" + # change bond type to non-ARPHRD_ETHER + ip link set dev "$nonethdev" master "$devbond0" 1>/dev/null 2>/dev/null + ip link set dev "$nonethdev" nomaster 1>/dev/null 2>/dev/null + # restore ARPHRD_ETHER type by enslaving such device + ip link set dev "$devbond2" master "$devbond0" + check_err $? "could not enslave $devbond2 to $devbond0" + + bond_check_flags "$devbond0" + + # clean up + ip link del dev "$devbond0" + ip link del dev "$devbond1" + ip link del dev "$devbond2" + ip link del dev "$nonethdev" +} + +bond_test_unsuccessful_enslave_type_change() +{ + RET=0 + + bond_test_enslave_type_change 0 + log_test "Change ether type of an enslaved bond device with unsuccessful enslave" +} + +bond_test_successful_enslave_type_change() +{ + RET=0 + + bond_test_enslave_type_change 1 + log_test "Change ether type of an enslaved bond device with successful enslave" +} + +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh new file mode 100755 index 000000000000..78d3e0fe6604 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Regression Test: +# Verify bond interface could up when set IPv6 link local address target. +# +# +----------------+ +# | br0 | +# | | | sw +# | veth0 veth1 | +# +---+-------+----+ +# | | +# +---+-------+----+ +# | veth0 veth1 | +# | | | host +# | bond0 | +# +----------------+ +# +# We use veths instead of physical interfaces +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +sw="sw-$(mktemp -u XXXXXX)" +host="ns-$(mktemp -u XXXXXX)" + +cleanup() +{ + ip netns del $sw + ip netns del $host +} + +wait_lladdr_dad() +{ + $@ | grep fe80 | grep -qv tentative +} + +wait_bond_up() +{ + $@ | grep -q 'state UP' +} + +trap cleanup 0 1 2 + +ip netns add $sw +ip netns add $host + +ip -n $host link add veth0 type veth peer name veth0 netns $sw +ip -n $host link add veth1 type veth peer name veth1 netns $sw + +ip -n $sw link add br0 type bridge +ip -n $sw link set br0 up +sw_lladdr=$(ip -n $sw addr show br0 | awk '/fe80/{print $2}' | cut -d'/' -f1) +# wait some time to make sure bridge lladdr pass DAD +slowwait 2 wait_lladdr_dad ip -n $sw addr show br0 + +ip -n $host link add bond0 type bond mode 1 ns_ip6_target ${sw_lladdr} \ + arp_validate 3 arp_interval 1000 +# add a lladdr for bond to make sure there is a route to target +ip -n $host addr add fe80::beef/64 dev bond0 +ip -n $host link set bond0 up +ip -n $host link set veth0 master bond0 +ip -n $host link set veth1 master bond0 + +ip -n $sw link set veth0 master br0 +ip -n $sw link set veth1 master br0 +ip -n $sw link set veth0 up +ip -n $sw link set veth1 up + +slowwait 5 wait_bond_up ip -n $host link show bond0 + +rc=0 +if ip -n $host link show bond0 | grep -q LOWER_UP; then + echo "PASS" +else + echo "FAIL" + rc=1 +fi +exit $rc diff --git a/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh new file mode 100755 index 000000000000..f09e100232c7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh @@ -0,0 +1,156 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# IPsec over bonding offload test: +# +# +----------------+ +# | bond0 | +# | | | +# | eth0 eth1 | +# +---+-------+----+ +# +# We use netdevsim instead of physical interfaces +#------------------------------------------------------------------- +# Example commands +# ip x s add proto esp src 192.0.2.1 dst 192.0.2.2 \ +# spi 0x07 mode transport reqid 0x07 replay-window 32 \ +# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \ +# sel src 192.0.2.1/24 dst 192.0.2.2/24 +# offload dev bond0 dir out +# ip x p add dir out src 192.0.2.1/24 dst 192.0.2.2/24 \ +# tmpl proto esp src 192.0.2.1 dst 192.0.2.2 \ +# spi 0x07 mode transport reqid 0x07 +# +#------------------------------------------------------------------- + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh +srcip=192.0.2.1 +dstip=192.0.2.2 +ipsec0=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ipsec +ipsec1=/sys/kernel/debug/netdevsim/netdevsim0/ports/1/ipsec +active_slave="" + +# shellcheck disable=SC2317 +active_slave_changed() +{ + local old_active_slave=$1 + local new_active_slave + + # shellcheck disable=SC2154 + new_active_slave=$(ip -n "${ns}" -d -j link show bond0 | \ + jq -r ".[].linkinfo.info_data.active_slave") + [ "$new_active_slave" != "$old_active_slave" ] && [ "$new_active_slave" != "null" ] +} + +test_offload() +{ + # use ping to exercise the Tx path + ip netns exec "$ns" ping -I bond0 -c 3 -W 1 -i 0 "$dstip" >/dev/null + + active_slave=$(ip -n "${ns}" -d -j link show bond0 | \ + jq -r ".[].linkinfo.info_data.active_slave") + + if [ "$active_slave" = "$nic0" ]; then + sysfs=$ipsec0 + elif [ "$active_slave" = "$nic1" ]; then + sysfs=$ipsec1 + else + check_err 1 "bond_ipsec_offload invalid active_slave $active_slave" + fi + + # The tx/rx order in sysfs may changed after failover + grep -q "SA count=2 tx=3" "$sysfs" && grep -q "tx ipaddr=$dstip" "$sysfs" + check_err $? "incorrect tx count with link ${active_slave}" + + log_test bond_ipsec_offload "active_slave ${active_slave}" +} + +setup_env() +{ + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + defer umount /sys/kernel/debug/ + + fi + + # setup netdevsim since dummy/veth dev doesn't have offload support + if [ ! -w /sys/bus/netdevsim/new_device ] ; then + if ! modprobe -q netdevsim; then + echo "SKIP: can't load netdevsim for ipsec offload" + # shellcheck disable=SC2154 + exit "$ksft_skip" + fi + defer modprobe -r netdevsim + fi + + setup_ns ns + defer cleanup_ns "$ns" +} + +setup_bond() +{ + ip -n "$ns" link add bond0 type bond mode active-backup miimon 100 + ip -n "$ns" addr add "$srcip/24" dev bond0 + ip -n "$ns" link set bond0 up + + echo "0 2" | ip netns exec "$ns" tee /sys/bus/netdevsim/new_device >/dev/null + nic0=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | head -n 1) + nic1=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | tail -n 1) + ip -n "$ns" link set "$nic0" master bond0 + ip -n "$ns" link set "$nic1" master bond0 + + # we didn't create a peer, make sure we can Tx by adding a permanent + # neighbour this need to be added after enslave + ip -n "$ns" neigh add "$dstip" dev bond0 lladdr 00:11:22:33:44:55 + + # create offloaded SAs, both in and out + ip -n "$ns" x p add dir out src "$srcip/24" dst "$dstip/24" \ + tmpl proto esp src "$srcip" dst "$dstip" spi 9 \ + mode transport reqid 42 + + ip -n "$ns" x p add dir in src "$dstip/24" dst "$srcip/24" \ + tmpl proto esp src "$dstip" dst "$srcip" spi 9 \ + mode transport reqid 42 + + ip -n "$ns" x s add proto esp src "$srcip" dst "$dstip" spi 9 \ + mode transport reqid 42 aead "rfc4106(gcm(aes))" \ + 0x3132333435363738393031323334353664636261 128 \ + sel src "$srcip/24" dst "$dstip/24" \ + offload dev bond0 dir out + + ip -n "$ns" x s add proto esp src "$dstip" dst "$srcip" spi 9 \ + mode transport reqid 42 aead "rfc4106(gcm(aes))" \ + 0x3132333435363738393031323334353664636261 128 \ + sel src "$dstip/24" dst "$srcip/24" \ + offload dev bond0 dir in + + # does offload show up in ip output + lines=$(ip -n "$ns" x s list | grep -c "crypto offload parameters: dev bond0 dir") + if [ "$lines" -ne 2 ] ; then + check_err 1 "bond_ipsec_offload SA offload missing from list output" + fi +} + +trap defer_scopes_cleanup EXIT +setup_env +setup_bond + +# start Offload testing +test_offload + +# do failover and re-test +ip -n "$ns" link set "$active_slave" down +slowwait 5 active_slave_changed "$active_slave" +test_offload + +# make sure offload get removed from driver +ip -n "$ns" x s flush +ip -n "$ns" x p flush +line0=$(grep -c "SA count=0" "$ipsec0") +line1=$(grep -c "SA count=0" "$ipsec1") +[ "$line0" -ne 1 ] || [ "$line1" -ne 1 ] +check_fail $? "bond_ipsec_offload SA not removed from driver" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh new file mode 100755 index 000000000000..a483d505c6a8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing if bond lacp per port priority works +# +# Switch (s_ns) Backup Switch (b_ns) +# +-------------------------+ +-------------------------+ +# | bond0 | | bond0 | +# | + | | + | +# | eth0 | eth1 | | eth0 | eth1 | +# | +---+---+ | | +---+---+ | +# | | | | | | | | +# +-------------------------+ +-------------------------+ +# | | | | +# +-----------------------------------------------------+ +# | | | | | | +# | +-------+---------+---------+-------+ | +# | eth0 eth1 | eth2 eth3 | +# | + | +# | bond0 | +# +-----------------------------------------------------+ +# Client (c_ns) + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh + +setup_links() +{ + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" + ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth2 type veth peer name eth0 netns "${b_ns}" + ip -n "${c_ns}" link add eth3 type veth peer name eth1 netns "${b_ns}" + + ip -n "${c_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast ad_select actor_port_prio + ip -n "${s_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast + ip -n "${b_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast + + ip -n "${c_ns}" link set eth0 master bond0 + ip -n "${c_ns}" link set eth1 master bond0 + ip -n "${c_ns}" link set eth2 master bond0 + ip -n "${c_ns}" link set eth3 master bond0 + ip -n "${s_ns}" link set eth0 master bond0 + ip -n "${s_ns}" link set eth1 master bond0 + ip -n "${b_ns}" link set eth0 master bond0 + ip -n "${b_ns}" link set eth1 master bond0 + + ip -n "${c_ns}" link set bond0 up + ip -n "${s_ns}" link set bond0 up + ip -n "${b_ns}" link set bond0 up +} + +test_port_prio_setting() +{ + RET=0 + ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 1000 + prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" \ + ".[].linkinfo.info_slave_data.actor_port_prio") + [ "$prio" -ne 1000 ] && RET=1 + ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 10 + prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" \ + ".[].linkinfo.info_slave_data.actor_port_prio") + [ "$prio" -ne 10 ] && RET=1 +} + +test_agg_reselect() +{ + local bond_agg_id slave_agg_id + local expect_slave="$1" + RET=0 + + # Trigger link state change to reselect the aggregator + ip -n "${c_ns}" link set eth1 down + sleep 0.5 + ip -n "${c_ns}" link set eth1 up + sleep 0.5 + + bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" \ + ".[].linkinfo.info_data.ad_info.aggregator") + slave_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show $expect_slave" \ + ".[].linkinfo.info_slave_data.ad_aggregator_id") + # shellcheck disable=SC2034 + [ "${bond_agg_id}" -ne "${slave_agg_id}" ] && \ + RET=1 +} + +trap cleanup_all_ns EXIT +setup_ns c_ns s_ns b_ns +setup_links + +test_port_prio_setting +log_test "bond 802.3ad" "actor_port_prio setting" + +test_agg_reselect eth0 +log_test "bond 802.3ad" "actor_port_prio select" + +# Change the actor port prio and re-test +ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 10 +ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 1000 +test_agg_reselect eth2 +log_test "bond 802.3ad" "actor_port_prio switch" + +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh new file mode 100755 index 000000000000..559f300f965a --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test macvlan/ipvlan over bond + +lib_dir=$(dirname "$0") +source ${lib_dir}/bond_topo_2d1c.sh + +xvlan1_ns="xvlan1-$(mktemp -u XXXXXX)" +xvlan2_ns="xvlan2-$(mktemp -u XXXXXX)" +xvlan1_ip4="192.0.2.11" +xvlan1_ip6="2001:db8::11" +xvlan2_ip4="192.0.2.12" +xvlan2_ip6="2001:db8::12" + +cleanup() +{ + client_destroy + server_destroy + gateway_destroy + + ip netns del ${xvlan1_ns} + ip netns del ${xvlan2_ns} +} + +check_connection() +{ + local ns=${1} + local target=${2} + local message=${3} + RET=0 + + sleep 0.25 + ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null + check_err $? "ping failed" + log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}" +} + +xvlan_over_bond() +{ + local param="$1" + local xvlan_type="$2" + local xvlan_mode="$3" + RET=0 + + # setup new bond mode + bond_reset "${param}" + + ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode} + ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan1_ns} + ip -n ${xvlan1_ns} link set dev ${xvlan_type}0 up + ip -n ${xvlan1_ns} addr add ${xvlan1_ip4}/24 dev ${xvlan_type}0 + ip -n ${xvlan1_ns} addr add ${xvlan1_ip6}/24 dev ${xvlan_type}0 + + ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode} + ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan2_ns} + ip -n ${xvlan2_ns} link set dev ${xvlan_type}0 up + ip -n ${xvlan2_ns} addr add ${xvlan2_ip4}/24 dev ${xvlan_type}0 + ip -n ${xvlan2_ns} addr add ${xvlan2_ip6}/24 dev ${xvlan_type}0 + + sleep 2 + + check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server" + check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server" + check_connection "${c_ns}" "${xvlan1_ip4}" "IPv4: client->${xvlan_type}_1" + check_connection "${c_ns}" "${xvlan1_ip6}" "IPv6: client->${xvlan_type}_1" + check_connection "${c_ns}" "${xvlan2_ip4}" "IPv4: client->${xvlan_type}_2" + check_connection "${c_ns}" "${xvlan2_ip6}" "IPv6: client->${xvlan_type}_2" + check_connection "${xvlan1_ns}" "${xvlan2_ip4}" "IPv4: ${xvlan_type}_1->${xvlan_type}_2" + check_connection "${xvlan1_ns}" "${xvlan2_ip6}" "IPv6: ${xvlan_type}_1->${xvlan_type}_2" + + check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client" + check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client" + check_connection "${xvlan1_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_1->client" + check_connection "${xvlan1_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_1->client" + check_connection "${xvlan2_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_2->client" + check_connection "${xvlan2_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_2->client" + check_connection "${xvlan2_ns}" "${xvlan1_ip4}" "IPv4: ${xvlan_type}_2->${xvlan_type}_1" + check_connection "${xvlan2_ns}" "${xvlan1_ip6}" "IPv6: ${xvlan_type}_2->${xvlan_type}_1" + + ip -n ${c_ns} neigh flush dev eth0 +} + +trap cleanup EXIT + +setup_prepare +ip netns add ${xvlan1_ns} +ip netns add ${xvlan2_ns} + +bond_modes="active-backup balance-tlb balance-alb" + +for bond_mode in ${bond_modes}; do + xvlan_over_bond "mode ${bond_mode}" macvlan bridge + xvlan_over_bond "mode ${bond_mode}" ipvlan l2 +done + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh new file mode 100755 index 000000000000..187b478d0ddf --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -0,0 +1,578 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bonding options with mode 1,5,6 + +ALL_TESTS=" + prio + arp_validate + num_grat_arp + fail_over_mac + vlan_over_bond +" + +lib_dir=$(dirname "$0") +source ${lib_dir}/bond_topo_3d1c.sh +c_maddr="33:33:ff:00:00:10" +g_maddr="33:33:ff:00:02:54" + +skip_prio() +{ + local skip=1 + + # check if iproute support prio option + ip -n ${s_ns} link set eth0 type bond_slave prio 10 + [[ $? -ne 0 ]] && skip=0 + + # check if kernel support prio option + ip -n ${s_ns} -d link show eth0 | grep -q "prio 10" + [[ $? -ne 0 ]] && skip=0 + + return $skip +} + +skip_ns() +{ + local skip=1 + + # check if iproute support ns_ip6_target option + ip -n ${s_ns} link add bond1 type bond ns_ip6_target ${g_ip6} + [[ $? -ne 0 ]] && skip=0 + + # check if kernel support ns_ip6_target option + ip -n ${s_ns} -d link show bond1 | grep -q "ns_ip6_target ${g_ip6}" + [[ $? -ne 0 ]] && skip=0 + + ip -n ${s_ns} link del bond1 + + return $skip +} + +active_slave="" +active_slave_changed() +{ + local old_active_slave=$1 + local new_active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" \ + ".[].linkinfo.info_data.active_slave") + [ "$new_active_slave" != "$old_active_slave" -a "$new_active_slave" != "null" ] +} + +check_active_slave() +{ + local target_active_slave=$1 + slowwait 5 active_slave_changed $active_slave + active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + test "$active_slave" = "$target_active_slave" + check_err $? "Current active slave is $active_slave but not $target_active_slave" +} + +# Test bonding prio option +prio_test() +{ + local param="$1" + RET=0 + + # create bond + bond_reset "${param}" + # set active_slave to primary eth1 specifically + ip -n ${s_ns} link set bond0 type bond active_slave eth1 + + # check bonding member prio value + ip -n ${s_ns} link set eth0 type bond_slave prio 0 + ip -n ${s_ns} link set eth1 type bond_slave prio 10 + ip -n ${s_ns} link set eth2 type bond_slave prio 11 + cmd_jq "ip -n ${s_ns} -d -j link show eth0" \ + ".[].linkinfo.info_slave_data | select (.prio == 0)" "-e" &> /dev/null + check_err $? "eth0 prio is not 0" + cmd_jq "ip -n ${s_ns} -d -j link show eth1" \ + ".[].linkinfo.info_slave_data | select (.prio == 10)" "-e" &> /dev/null + check_err $? "eth1 prio is not 10" + cmd_jq "ip -n ${s_ns} -d -j link show eth2" \ + ".[].linkinfo.info_slave_data | select (.prio == 11)" "-e" &> /dev/null + check_err $? "eth2 prio is not 11" + + bond_check_connection "setup" + + # active slave should be the primary slave + check_active_slave eth1 + + # active slave should be the higher prio slave + ip -n ${s_ns} link set $active_slave down + check_active_slave eth2 + bond_check_connection "fail over" + + # when only 1 slave is up + ip -n ${s_ns} link set $active_slave down + check_active_slave eth0 + bond_check_connection "only 1 slave up" + + # when a higher prio slave change to up + ip -n ${s_ns} link set eth2 up + bond_check_connection "higher prio slave up" + case $primary_reselect in + "0") + check_active_slave "eth2" + ;; + "1") + check_active_slave "eth0" + ;; + "2") + check_active_slave "eth0" + ;; + esac + local pre_active_slave=$active_slave + + # when the primary slave change to up + ip -n ${s_ns} link set eth1 up + bond_check_connection "primary slave up" + case $primary_reselect in + "0") + check_active_slave "eth1" + ;; + "1") + check_active_slave "$pre_active_slave" + ;; + "2") + check_active_slave "$pre_active_slave" + ip -n ${s_ns} link set $active_slave down + bond_check_connection "pre_active slave down" + check_active_slave "eth1" + ;; + esac + + # Test changing bond slave prio + if [[ "$primary_reselect" == "0" ]];then + ip -n ${s_ns} link set eth0 type bond_slave prio 1000000 + ip -n ${s_ns} link set eth1 type bond_slave prio 0 + ip -n ${s_ns} link set eth2 type bond_slave prio -50 + ip -n ${s_ns} -d link show eth0 | grep -q 'prio 1000000' + check_err $? "eth0 prio is not 1000000" + ip -n ${s_ns} -d link show eth1 | grep -q 'prio 0' + check_err $? "eth1 prio is not 0" + ip -n ${s_ns} -d link show eth2 | grep -q 'prio -50' + check_err $? "eth3 prio is not -50" + check_active_slave "eth1" + + ip -n ${s_ns} link set $active_slave down + check_active_slave "eth0" + bond_check_connection "change slave prio" + fi +} + +prio_miimon() +{ + local primary_reselect + local mode=$1 + + for primary_reselect in 0 1 2; do + prio_test "mode $mode miimon 100 primary eth1 primary_reselect $primary_reselect" + log_test "prio" "$mode miimon primary_reselect $primary_reselect" + done +} + +prio_arp() +{ + local primary_reselect + local mode=$1 + + for primary_reselect in 0 1 2; do + prio_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect" + log_test "prio" "$mode arp_ip_target primary_reselect $primary_reselect" + done +} + +prio_ns() +{ + local primary_reselect + local mode=$1 + + if skip_ns; then + log_test_skip "prio ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'." + return 0 + fi + + for primary_reselect in 0 1 2; do + prio_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect" + log_test "prio" "$mode ns_ip6_target primary_reselect $primary_reselect" + done +} + +prio() +{ + local mode modes="active-backup balance-tlb balance-alb" + + if skip_prio; then + log_test_skip "prio" "Current iproute or kernel doesn't support bond option 'prio'." + return 0 + fi + + for mode in $modes; do + prio_miimon $mode + done + prio_arp "active-backup" + prio_ns "active-backup" +} + +wait_mii_up() +{ + for i in $(seq 0 2); do + mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status") + [ ${mii_status} != "UP" ] && return 1 + done + return 0 +} + +arp_validate_test() +{ + local param="$1" + RET=0 + + # create bond + bond_reset "${param}" + + bond_check_connection + [ $RET -ne 0 ] && log_test "arp_validate" "$retmsg" + + # wait for a while to make sure the mii status stable + slowwait 5 wait_mii_up + for i in $(seq 0 2); do + mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status") + if [ ${mii_status} != "UP" ]; then + RET=1 + log_test "arp_validate" "interface eth$i mii_status $mii_status" + fi + done +} + +# Testing correct multicast groups are added to slaves for ns targets +arp_validate_mcast() +{ + RET=0 + local arp_valid=$(cmd_jq "ip -n ${s_ns} -j -d link show bond0" ".[].linkinfo.info_data.arp_validate") + local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + + for i in $(seq 0 2); do + maddr_list=$(ip -n ${s_ns} maddr show dev eth${i}) + + # arp_valid == 0 or active_slave should not join any maddrs + if { [ "$arp_valid" == "null" ] || [ "eth${i}" == ${active_slave} ]; } && \ + echo "$maddr_list" | grep -qE "${c_maddr}|${g_maddr}"; then + RET=1 + check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group" + # arp_valid != 0 and backup_slave should join both maddrs + elif [ "$arp_valid" != "null" ] && [ "eth${i}" != ${active_slave} ] && \ + ( ! echo "$maddr_list" | grep -q "${c_maddr}" || \ + ! echo "$maddr_list" | grep -q "${m_maddr}"); then + RET=1 + check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group" + fi + done + + # Do failover + ip -n ${s_ns} link set ${active_slave} down + # wait for active link change + slowwait 2 active_slave_changed $active_slave + active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + + for i in $(seq 0 2); do + maddr_list=$(ip -n ${s_ns} maddr show dev eth${i}) + + # arp_valid == 0 or active_slave should not join any maddrs + if { [ "$arp_valid" == "null" ] || [ "eth${i}" == ${active_slave} ]; } && \ + echo "$maddr_list" | grep -qE "${c_maddr}|${g_maddr}"; then + RET=1 + check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group" + # arp_valid != 0 and backup_slave should join both maddrs + elif [ "$arp_valid" != "null" ] && [ "eth${i}" != ${active_slave} ] && \ + ( ! echo "$maddr_list" | grep -q "${c_maddr}" || \ + ! echo "$maddr_list" | grep -q "${m_maddr}"); then + RET=1 + check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group" + fi + done +} + +arp_validate_arp() +{ + local mode=$1 + local val + for val in $(seq 0 6); do + arp_validate_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} arp_validate $val" + log_test "arp_validate" "$mode arp_ip_target arp_validate $val" + done +} + +arp_validate_ns() +{ + local mode=$1 + local val + + if skip_ns; then + log_test_skip "arp_validate ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'." + return 0 + fi + + for val in $(seq 0 6); do + arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6},${c_ip6} arp_validate $val" + log_test "arp_validate" "$mode ns_ip6_target arp_validate $val" + arp_validate_mcast + log_test "arp_validate" "join mcast group" + done +} + +arp_validate() +{ + arp_validate_arp "active-backup" + arp_validate_ns "active-backup" +} + +garp_test() +{ + local param="$1" + local active_slave exp_num real_num i + RET=0 + + # create bond + bond_reset "${param}" + + bond_check_connection + [ $RET -ne 0 ] && log_test "num_grat_arp" "$retmsg" + + + # Add tc rules to count GARP number + for i in $(seq 0 2); do + tc -n ${g_ns} filter add dev s$i ingress protocol arp pref 1 handle 101 \ + flower skip_hw arp_op request arp_sip ${s_ip4} arp_tip ${s_ip4} action pass + done + + # Do failover + active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + ip -n ${s_ns} link set ${active_slave} down + + # wait for active link change + slowwait 2 active_slave_changed $active_slave + + exp_num=$(echo "${param}" | cut -f6 -d ' ') + active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + slowwait_for_counter $((exp_num + 5)) $exp_num tc_rule_handle_stats_get \ + "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" &> /dev/null + + # check result + real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}") + if [ "${real_num}" -ne "${exp_num}" ]; then + echo "$real_num garp packets sent on active slave ${active_slave}" + RET=1 + fi + + for i in $(seq 0 2); do + tc -n ${g_ns} filter del dev s$i ingress + done +} + +num_grat_arp() +{ + local val + for val in 10 20 30; do + garp_test "mode active-backup miimon 10 num_grat_arp $val peer_notify_delay 100" + log_test "num_grat_arp" "active-backup miimon num_grat_arp $val" + done +} + +check_all_mac_same() +{ + RET=0 + # all slaves should have same mac address (with the first port's mac) + local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]') + local eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]') + local eth1_mac=$(ip -n "$s_ns" -j link show eth1 | jq -r '.[]["address"]') + local eth2_mac=$(ip -n "$s_ns" -j link show eth2 | jq -r '.[]["address"]') + if [ "$bond_mac" != "${mac[0]}" ] || [ "$eth0_mac" != "$bond_mac" ] || \ + [ "$eth1_mac" != "$bond_mac" ] || [ "$eth2_mac" != "$bond_mac" ]; then + RET=1 + fi +} + +check_bond_mac_same_with_first() +{ + RET=0 + # bond mac address should be same with the first added slave + local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]') + if [ "$bond_mac" != "${mac[0]}" ]; then + RET=1 + fi +} + +check_bond_mac_same_with_active() +{ + RET=0 + # bond mac address should be same with active slave + local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]') + local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + local active_slave_mac=$(ip -n "$s_ns" -j link show "$active_slave" | jq -r '.[]["address"]') + if [ "$bond_mac" != "$active_slave_mac" ]; then + RET=1 + fi +} + +check_backup_slave_mac_not_change() +{ + RET=0 + # backup slave's mac address is not changed + if ip -n "$s_ns" -d -j link show type bond_slave | jq -e '.[] + | select(.linkinfo.info_slave_data.state=="BACKUP") + | select(.address != .linkinfo.info_slave_data.perm_hwaddr)' &> /dev/null; then + RET=1 + fi +} + +check_backup_slave_mac_inherit() +{ + local backup_mac + RET=0 + + # backup slaves should use mac[1] or mac[2] + local backup_macs=$(ip -n "$s_ns" -d -j link show type bond_slave | \ + jq -r '.[] | select(.linkinfo.info_slave_data.state=="BACKUP") | .address') + for backup_mac in $backup_macs; do + if [ "$backup_mac" != "${mac[1]}" ] && [ "$backup_mac" != "${mac[2]}" ]; then + RET=1 + fi + done +} + +check_first_slave_random_mac() +{ + RET=0 + # remove the first added slave and added it back + ip -n "$s_ns" link set eth0 nomaster + ip -n "$s_ns" link set eth0 master bond0 + + # the first slave should use random mac address + eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]') + [ "$eth0_mac" = "${mac[0]}" ] && RET=1 + log_test "bond fail_over_mac follow" "random first slave mac" + + # remove the first slave, the permanent MAC address should be restored back + ip -n "$s_ns" link set eth0 nomaster + eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]') + [ "$eth0_mac" != "${mac[0]}" ] && RET=1 +} + +do_active_backup_failover() +{ + local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + ip -n ${s_ns} link set ${active_slave} down + slowwait 2 active_slave_changed $active_slave + ip -n ${s_ns} link set ${active_slave} up +} + +fail_over_mac() +{ + # Bring down the first interface on the switch to force the bond to + # select another active interface instead of the first one that joined. + ip -n "$g_ns" link set s0 down + + # fail_over_mac none + bond_reset "mode active-backup miimon 100 fail_over_mac 0" + check_all_mac_same + log_test "fail_over_mac 0" "all slaves have same mac" + do_active_backup_failover + check_all_mac_same + log_test "fail_over_mac 0" "failover: all slaves have same mac" + + # fail_over_mac active + bond_reset "mode active-backup miimon 100 fail_over_mac 1" + check_bond_mac_same_with_active + log_test "fail_over_mac 1" "bond mac is same with active slave mac" + check_backup_slave_mac_not_change + log_test "fail_over_mac 1" "backup slave mac is not changed" + do_active_backup_failover + check_bond_mac_same_with_active + log_test "fail_over_mac 1" "failover: bond mac is same with active slave mac" + check_backup_slave_mac_not_change + log_test "fail_over_mac 1" "failover: backup slave mac is not changed" + + # fail_over_mac follow + bond_reset "mode active-backup miimon 100 fail_over_mac 2" + check_bond_mac_same_with_first + log_test "fail_over_mac 2" "bond mac is same with first slave mac" + check_bond_mac_same_with_active + log_test "fail_over_mac 2" "bond mac is same with active slave mac" + check_backup_slave_mac_inherit + log_test "fail_over_mac 2" "backup slave mac inherit" + do_active_backup_failover + check_bond_mac_same_with_first + log_test "fail_over_mac 2" "failover: bond mac is same with first slave mac" + check_bond_mac_same_with_active + log_test "fail_over_mac 2" "failover: bond mac is same with active slave mac" + check_backup_slave_mac_inherit + log_test "fail_over_mac 2" "failover: backup slave mac inherit" + check_first_slave_random_mac + log_test "fail_over_mac 2" "first slave mac random" +} + +vlan_over_bond_arp() +{ + local mode="$1" + RET=0 + + bond_reset "mode $mode arp_interval 100 arp_ip_target 192.0.3.10" + ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3 + ip -n "${s_ns}" link set bond0.3 up + ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3 + ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3 + + slowwait_for_counter 5 5 tc_rule_handle_stats_get \ + "dev eth0.3 ingress" 101 ".packets" "-n ${c_ns}" &> /dev/null || RET=1 + log_test "vlan over bond arp" "$mode" +} + +vlan_over_bond_ns() +{ + local mode="$1" + RET=0 + + if skip_ns; then + log_test_skip "vlan_over_bond ns" "$mode" + return 0 + fi + + bond_reset "mode $mode arp_interval 100 ns_ip6_target 2001:db8::3:10" + ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3 + ip -n "${s_ns}" link set bond0.3 up + ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3 + ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3 + + slowwait_for_counter 5 5 tc_rule_handle_stats_get \ + "dev eth0.3 ingress" 102 ".packets" "-n ${c_ns}" &> /dev/null || RET=1 + log_test "vlan over bond ns" "$mode" +} + +vlan_over_bond() +{ + # add vlan 3 for client + ip -n "${c_ns}" link add eth0.3 link eth0 type vlan id 3 + ip -n "${c_ns}" link set eth0.3 up + ip -n "${c_ns}" addr add 192.0.3.10/24 dev eth0.3 + ip -n "${c_ns}" addr add 2001:db8::3:10/64 dev eth0.3 + + # Add tc rule to check the vlan pkts + tc -n "${c_ns}" qdisc add dev eth0.3 clsact + tc -n "${c_ns}" filter add dev eth0.3 ingress protocol arp \ + handle 101 flower skip_hw arp_op request \ + arp_sip 192.0.3.1 arp_tip 192.0.3.10 action pass + tc -n "${c_ns}" filter add dev eth0.3 ingress protocol ipv6 \ + handle 102 flower skip_hw ip_proto icmpv6 \ + type 135 src_ip 2001:db8::3:1 action pass + + vlan_over_bond_arp "active-backup" + vlan_over_bond_ns "active-backup" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh new file mode 100755 index 000000000000..9c3b089813df --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test if a bond interface works with lacp_active=off. + +# shellcheck disable=SC2034 +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/forwarding/lib.sh + +# shellcheck disable=SC2317 +check_port_state() +{ + local netns=$1 + local port=$2 + local state=$3 + + ip -n "${netns}" -d -j link show "$port" | \ + jq -e ".[].linkinfo.info_slave_data.ad_actor_oper_port_state_str | index(\"${state}\") != null" > /dev/null +} + +check_pkt_count() +{ + RET=0 + local ns="$1" + local iface="$2" + + # wait 65s, one per 30s + slowwait_for_counter 65 2 tc_rule_handle_stats_get \ + "dev ${iface} egress" 101 ".packets" "-n ${ns}" &> /dev/null +} + +setup() { + setup_ns c_ns s_ns + + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" + ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" + + # Add tc filter to count the pkts + tc -n "${c_ns}" qdisc add dev eth0 clsact + tc -n "${c_ns}" filter add dev eth0 egress handle 101 protocol 0x8809 matchall action pass + tc -n "${s_ns}" qdisc add dev eth1 clsact + tc -n "${s_ns}" filter add dev eth1 egress handle 101 protocol 0x8809 matchall action pass + + ip -n "${s_ns}" link add bond0 type bond mode 802.3ad lacp_active on lacp_rate fast + ip -n "${s_ns}" link set eth0 master bond0 + ip -n "${s_ns}" link set eth1 master bond0 + + ip -n "${c_ns}" link add bond0 type bond mode 802.3ad lacp_active off lacp_rate fast + ip -n "${c_ns}" link set eth0 master bond0 + ip -n "${c_ns}" link set eth1 master bond0 + +} + +trap cleanup_all_ns EXIT +setup + +# The bond will send 2 lacpdu pkts during init time, let's wait at least 2s +# after interface up +ip -n "${c_ns}" link set bond0 up +sleep 2 + +# 1. The passive side shouldn't send LACPDU. +check_pkt_count "${c_ns}" "eth0" && RET=1 +log_test "802.3ad lacp_active off" "init port" + +ip -n "${s_ns}" link set bond0 up +# 2. The passive side should not have the 'active' flag. +RET=0 +slowwait 2 check_port_state "${c_ns}" "eth0" "active" && RET=1 +log_test "802.3ad lacp_active off" "port state active" + +# 3. The active side should have the 'active' flag. +RET=0 +slowwait 2 check_port_state "${s_ns}" "eth0" "active" || RET=1 +log_test "802.3ad lacp_active on" "port state active" + +# 4. Make sure the connection is not expired. +RET=0 +slowwait 5 check_port_state "${s_ns}" "eth0" "distributing" +slowwait 10 check_port_state "${s_ns}" "eth0" "expired" && RET=1 +log_test "bond 802.3ad lacp_active off" "port connection" + +# After testing, disconnect one port on each side to check the state. +ip -n "${s_ns}" link set eth0 nomaster +ip -n "${s_ns}" link set eth0 up +ip -n "${c_ns}" link set eth1 nomaster +ip -n "${c_ns}" link set eth1 up +# Due to Periodic Machine and Rx Machine state change, the bond will still +# send lacpdu pkts in a few seconds. sleep at lease 5s to make sure +# negotiation finished +sleep 5 + +# 5. The active side should keep sending LACPDU. +check_pkt_count "${s_ns}" "eth1" || RET=1 +log_test "bond 802.3ad lacp_active on" "port pkt after disconnect" + +# 6. The passive side shouldn't send LACPDU anymore. +check_pkt_count "${c_ns}" "eth0" && RET=1 +log_test "bond 802.3ad lacp_active off" "port pkt after disconnect" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh new file mode 100644 index 000000000000..167aa4a4a12a --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh @@ -0,0 +1,161 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Topology for Bond mode 1,5,6 testing +# +# +-------------------------+ +# | bond0 | Server +# | + | 192.0.2.1/24 +# | eth0 | eth1 | 2001:db8::1/24 +# | +---+---+ | +# | | | | +# +-------------------------+ +# | | +# +-------------------------+ +# | | | | +# | +---+-------+---+ | Gateway +# | | br0 | | 192.0.2.254/24 +# | +-------+-------+ | 2001:db8::254/24 +# | | | +# +-------------------------+ +# | +# +-------------------------+ +# | | | Client +# | + | 192.0.2.10/24 +# | eth0 | 2001:db8::10/24 +# +-------------------------+ + +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +s_ns="s-$(mktemp -u XXXXXX)" +c_ns="c-$(mktemp -u XXXXXX)" +g_ns="g-$(mktemp -u XXXXXX)" +s_ip4="192.0.2.1" +c_ip4="192.0.2.10" +g_ip4="192.0.2.254" +s_ip6="2001:db8::1" +c_ip6="2001:db8::10" +g_ip6="2001:db8::254" +mac[0]="00:0a:0b:0c:0d:01" +mac[1]="00:0a:0b:0c:0d:02" + +gateway_create() +{ + ip netns add ${g_ns} + ip -n ${g_ns} link add br0 type bridge + ip -n ${g_ns} link set br0 up + ip -n ${g_ns} addr add ${g_ip4}/24 dev br0 + ip -n ${g_ns} addr add ${g_ip6}/24 dev br0 +} + +gateway_destroy() +{ + ip -n ${g_ns} link del br0 + ip netns del ${g_ns} +} + +server_create() +{ + ip netns add ${s_ns} + ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100 + + for i in $(seq 0 1); do + ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns} + ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}" + + ip -n ${g_ns} link set s${i} up + ip -n ${g_ns} link set s${i} master br0 + ip -n ${s_ns} link set eth${i} master bond0 + + tc -n ${g_ns} qdisc add dev s${i} clsact + done + + ip -n ${s_ns} link set bond0 up + ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0 + ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0 +} + +# Reset bond with new mode and options +bond_reset() +{ + # Count the eth link number in real-time as this function + # maybe called from other topologies. + local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth") + local param="$1" + link_num=$((link_num -1)) + + ip -n ${s_ns} link set bond0 down + ip -n ${s_ns} link del bond0 + + ip -n ${s_ns} link add bond0 type bond $param + for i in $(seq 0 ${link_num}); do + ip -n ${s_ns} link set eth$i master bond0 + done + + ip -n ${s_ns} link set bond0 up + ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0 + ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0 + # Wait for IPv6 address ready as it needs DAD + slowwait 2 ip netns exec ${s_ns} ping6 ${c_ip6} -c 1 -W 0.1 &> /dev/null +} + +server_destroy() +{ + # Count the eth link number in real-time as this function + # maybe called from other topologies. + local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth") + link_num=$((link_num -1)) + for i in $(seq 0 ${link_num}); do + ip -n ${s_ns} link del eth${i} + done + ip netns del ${s_ns} +} + +client_create() +{ + ip netns add ${c_ns} + ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns} + + ip -n ${g_ns} link set c0 up + ip -n ${g_ns} link set c0 master br0 + + ip -n ${c_ns} link set eth0 up + ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0 + ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0 +} + +client_destroy() +{ + ip -n ${c_ns} link del eth0 + ip netns del ${c_ns} +} + +setup_prepare() +{ + gateway_create + server_create + client_create +} + +cleanup() +{ + pre_cleanup + + client_destroy + server_destroy + gateway_destroy +} + +bond_check_connection() +{ + local msg=${1:-"check connection"} + + slowwait 2 ip netns exec ${s_ns} ping ${c_ip4} -c 1 -W 0.1 &> /dev/null + ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null + check_err $? "${msg}: ping failed" + ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null + check_err $? "${msg}: ping6 failed" +} diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh new file mode 100644 index 000000000000..23a2932301cc --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Topology for Bond mode 1,5,6 testing +# +# +-------------------------------------+ +# | bond0 | +# | + | Server +# | eth0 | eth1 eth2 | 192.0.2.1/24 +# | +-------------------+ | 2001:db8::1/24 +# | | | | | +# +-------------------------------------+ +# | | | +# +-------------------------------------+ +# | | | | | +# | +---+---------+---------+---+ | Gateway +# | | br0 | | 192.0.2.254/24 +# | +-------------+-------------+ | 2001:db8::254/24 +# | | | +# +-------------------------------------+ +# | +# +-------------------------------------+ +# | | | Client +# | + | 192.0.2.10/24 +# | eth0 | 2001:db8::10/24 +# +-------------------------------------+ + +source bond_topo_2d1c.sh +mac[2]="00:0a:0b:0c:0d:03" + +setup_prepare() +{ + gateway_create + server_create + client_create + + # Add the extra device as we use 3 down links for bond0 + local i=2 + ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns} + ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}" + ip -n ${g_ns} link set s${i} up + ip -n ${g_ns} link set s${i} master br0 + ip -n ${s_ns} link set eth${i} master bond0 + tc -n ${g_ns} qdisc add dev s${i} clsact +} diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config new file mode 100644 index 000000000000..991494376223 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -0,0 +1,21 @@ +CONFIG_BONDING=y +CONFIG_BRIDGE=y +CONFIG_CONFIGFS_FS=y +CONFIG_DUMMY=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y +CONFIG_IPV6=y +CONFIG_IPVLAN=y +CONFIG_MACVLAN=y +CONFIG_NET_ACT_GACT=y +CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETCONSOLE_EXTENDED_LOG=y +CONFIG_NETDEVSIM=m +CONFIG_NET_SCH_INGRESS=y +CONFIG_NLMON=y +CONFIG_VETH=y +CONFIG_VLAN_8021Q=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh new file mode 100755 index 000000000000..e6fa24eded5b --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bond device handling of addr lists (dev->uc, mc) +# + +ALL_TESTS=" + bond_cleanup_mode1 + bond_cleanup_mode4 + bond_listen_lacpdu_multicast_case_down + bond_listen_lacpdu_multicast_case_up +" + +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +source "$lib_dir"/lag_lib.sh + + +destroy() +{ + local ifnames=(dummy1 dummy2 bond1 mv0) + local ifname + + for ifname in "${ifnames[@]}"; do + ip link del "$ifname" &>/dev/null + done +} + +cleanup() +{ + pre_cleanup + + destroy +} + + +# bond driver control paths vary between modes that have a primary slave +# (bond_uses_primary()) and others. Test both kinds of modes. + +bond_cleanup_mode1() +{ + RET=0 + + test_LAG_cleanup "bonding" "active-backup" +} + +bond_cleanup_mode4() { + RET=0 + + test_LAG_cleanup "bonding" "802.3ad" +} + +bond_listen_lacpdu_multicast() +{ + # Initial state of bond device, up | down + local init_state=$1 + local lacpdu_mc="01:80:c2:00:00:02" + + ip link add dummy1 type dummy + ip link add bond1 "$init_state" type bond mode 802.3ad + ip link set dev dummy1 master bond1 + if [ "$init_state" = "down" ]; then + ip link set dev bond1 up + fi + + grep_bridge_fdb "$lacpdu_mc" bridge fdb show brport dummy1 >/dev/null + check_err $? "LACPDU multicast address not present on slave (1)" + + ip link set dev bond1 down + + not grep_bridge_fdb "$lacpdu_mc" bridge fdb show brport dummy1 >/dev/null + check_err $? "LACPDU multicast address still present on slave" + + ip link set dev bond1 up + + grep_bridge_fdb "$lacpdu_mc" bridge fdb show brport dummy1 >/dev/null + check_err $? "LACPDU multicast address not present on slave (2)" + + cleanup + + log_test "bonding LACPDU multicast address to slave (from bond $init_state)" +} + +# The LACPDU mc addr is added by different paths depending on the initial state +# of the bond when enslaving a device. Test both cases. + +bond_listen_lacpdu_multicast_case_down() +{ + RET=0 + + bond_listen_lacpdu_multicast "down" +} + +bond_listen_lacpdu_multicast_case_up() +{ + RET=0 + + bond_listen_lacpdu_multicast "up" +} + + +trap cleanup EXIT + +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh new file mode 100644 index 000000000000..bf9bcd1b5ec0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh @@ -0,0 +1,177 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NAMESPACES="" + +# Test that a link aggregation device (bonding, team) removes the hardware +# addresses that it adds on its underlying devices. +test_LAG_cleanup() +{ + local driver=$1 + local mode=$2 + local ucaddr="02:00:00:12:34:56" + local addr6="fe80::78:9abc/64" + local mcaddr="33:33:ff:78:9a:bc" + local name + + ip link add dummy1 type dummy + ip link add dummy2 type dummy + if [ "$driver" = "bonding" ]; then + name="bond1" + ip link add "$name" up type bond mode "$mode" + ip link set dev dummy1 master "$name" + ip link set dev dummy2 master "$name" + elif [ "$driver" = "team" ]; then + name="team0" + teamd -d -c ' + { + "device": "'"$name"'", + "runner": { + "name": "'"$mode"'" + }, + "ports": { + "dummy1": + {}, + "dummy2": + {} + } + } + ' + ip link set dev "$name" up + else + check_err 1 + log_test test_LAG_cleanup ": unknown driver \"$driver\"" + return + fi + + # Used to test dev->uc handling + ip link add mv0 link "$name" up address "$ucaddr" type macvlan + # Used to test dev->mc handling + ip address add "$addr6" dev "$name" + + # Check that addresses were added as expected + (grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "macvlan unicast address not found on a slave" + + # mcaddr is added asynchronously by addrconf_dad_work(), use busywait + (busywait 10000 grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "IPv6 solicited-node multicast mac address not found on a slave" + + ip link set dev "$name" down + ip link del "$name" + + not grep_bridge_fdb "$ucaddr" bridge fdb show >/dev/null + check_err $? "macvlan unicast address still present on a slave" + + not grep_bridge_fdb "$mcaddr" bridge fdb show >/dev/null + check_err $? "IPv6 solicited-node multicast mac address still present on a slave" + + cleanup + + log_test "$driver cleanup mode $mode" +} + +# Build a generic 2 node net namespace with 2 connections +# between the namespaces +# +# +-----------+ +-----------+ +# | node1 | | node2 | +# | | | | +# | | | | +# | eth0 +-------+ eth0 | +# | | | | +# | eth1 +-------+ eth1 | +# | | | | +# +-----------+ +-----------+ +lag_setup2x2() +{ + local state=${1:-down} + local namespaces="lag_node1 lag_node2" + + # create namespaces + for n in ${namespaces}; do + ip netns add ${n} + done + + # wire up namespaces + ip link add name lag1 type veth peer name lag1-end + ip link set dev lag1 netns lag_node1 $state name eth0 + ip link set dev lag1-end netns lag_node2 $state name eth0 + + ip link add name lag1 type veth peer name lag1-end + ip link set dev lag1 netns lag_node1 $state name eth1 + ip link set dev lag1-end netns lag_node2 $state name eth1 + + NAMESPACES="${namespaces}" +} + +# cleanup all lag related namespaces +lag_cleanup() +{ + for n in ${NAMESPACES}; do + ip netns delete ${n} >/dev/null 2>&1 || true + done +} + +SWITCH="lag_node1" +CLIENT="lag_node2" +CLIENTIP="172.20.2.1" +SWITCHIP="172.20.2.2" + +lag_setup_network() +{ + lag_setup2x2 "down" + + # create switch + ip netns exec ${SWITCH} ip link add br0 up type bridge + ip netns exec ${SWITCH} ip link set eth0 master br0 up + ip netns exec ${SWITCH} ip link set eth1 master br0 up + ip netns exec ${SWITCH} ip addr add ${SWITCHIP}/24 dev br0 +} + +lag_reset_network() +{ + ip netns exec ${CLIENT} ip link del bond0 + ip netns exec ${SWITCH} ip link set eth0 up + ip netns exec ${SWITCH} ip link set eth1 up +} + +create_bond() +{ + # create client + ip netns exec ${CLIENT} ip link set eth0 down + ip netns exec ${CLIENT} ip link set eth1 down + + ip netns exec ${CLIENT} ip link add bond0 type bond $@ + ip netns exec ${CLIENT} ip link set eth0 master bond0 + ip netns exec ${CLIENT} ip link set eth1 master bond0 + ip netns exec ${CLIENT} ip link set bond0 up + ip netns exec ${CLIENT} ip addr add ${CLIENTIP}/24 dev bond0 +} + +test_bond_recovery() +{ + RET=0 + + create_bond $@ + + # verify connectivity + slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null + check_err $? "No connectivity" + + # force the links of the bond down + ip netns exec ${SWITCH} ip link set eth0 down + sleep 2 + ip netns exec ${SWITCH} ip link set eth0 up + ip netns exec ${SWITCH} ip link set eth1 down + + # re-verify connectivity + slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null + + local rc=$? + check_err $rc "Bond failed to recover" + log_test "$1 ($2) bond recovery" + lag_reset_network +} diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh new file mode 100755 index 000000000000..9d26ab4cad0b --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Regression Test: +# When the bond is configured with down/updelay and the link state of +# slave members flaps if there are no remaining members up the bond +# should immediately select a member to bring up. (from bonding.txt +# section 13.1 paragraph 4) +# +# +-------------+ +-----------+ +# | client | | switch | +# | | | | +# | +--------| link1 |-----+ | +# | | +-------+ | | +# | | | | | | +# | | +-------+ | | +# | | bond | link2 | Br0 | | +# +-------------+ +-----------+ +# 172.20.2.1 172.20.2.2 + + +REQUIRE_MZ=no +REQUIRE_JQ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh +source "$lib_dir"/lag_lib.sh + +cleanup() +{ + lag_cleanup +} + +trap cleanup 0 1 2 + +lag_setup_network +test_bond_recovery mode 1 miimon 100 updelay 0 +test_bond_recovery mode 1 miimon 100 updelay 200 +test_bond_recovery mode 1 miimon 100 updelay 500 +test_bond_recovery mode 1 miimon 100 updelay 1000 +test_bond_recovery mode 1 miimon 100 updelay 2000 +test_bond_recovery mode 1 miimon 100 updelay 5000 +test_bond_recovery mode 1 miimon 100 updelay 10000 + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh new file mode 100755 index 000000000000..2d275b3e47dd --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Regression Test: +# When the bond is configured with down/updelay and the link state of +# slave members flaps if there are no remaining members up the bond +# should immediately select a member to bring up. (from bonding.txt +# section 13.1 paragraph 4) +# +# +-------------+ +-----------+ +# | client | | switch | +# | | | | +# | +--------| link1 |-----+ | +# | | +-------+ | | +# | | | | | | +# | | +-------+ | | +# | | bond | link2 | Br0 | | +# +-------------+ +-----------+ +# 172.20.2.1 172.20.2.2 + + +REQUIRE_MZ=no +REQUIRE_JQ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh +source "$lib_dir"/lag_lib.sh + +cleanup() +{ + lag_cleanup +} + +trap cleanup 0 1 2 + +lag_setup_network +test_bond_recovery mode 2 miimon 100 updelay 0 +test_bond_recovery mode 2 miimon 100 updelay 200 +test_bond_recovery mode 2 miimon 100 updelay 500 +test_bond_recovery mode 2 miimon 100 updelay 1000 +test_bond_recovery mode 2 miimon 100 updelay 2000 +test_bond_recovery mode 2 miimon 100 updelay 5000 +test_bond_recovery mode 2 miimon 100 updelay 10000 + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh new file mode 100755 index 000000000000..477cc9379500 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh @@ -0,0 +1,361 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 +# +# This selftest exercises trying to have multiple netpoll users at the same +# time. +# +# This selftest has multiple smalls test inside, and the goal is to +# get interfaces with bonding and netconsole in different orders in order +# to catch any possible issue. +# +# The main test composes of four interfaces being created using netdevsim; two +# of them are bonded to serve as the netconsole's transmit interface. The +# remaining two interfaces are similarly bonded and assigned to a separate +# network namespace, which acts as the receive interface, where socat monitors +# for incoming messages. +# +# A netconsole message is then sent to ensure it is properly received across +# this configuration. +# +# Later, run a few other tests, to make sure that bonding and netconsole +# cannot coexist. +# +# The test's objective is to exercise netpoll usage when managed simultaneously +# by multiple subsystems (netconsole and bonding). +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true +modprobe bonding 2> /dev/null || true +modprobe veth 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup_bond EXIT + +FORMAT="extended" +IP_VERSION="ipv4" +VETH0="veth"$(( RANDOM % 256)) +VETH1="veth"$((256 + RANDOM % 256)) +TXNS="" +RXNS="" + +# Create "bond_tx_XX" and "bond_rx_XX" interfaces, and set DSTIF and SRCIF with +# the bonding interfaces +function setup_bonding_ifaces() { + local RAND=$(( RANDOM % 100 )) + BOND_TX_MAIN_IF="bond_tx_$RAND" + BOND_RX_MAIN_IF="bond_rx_$RAND" + + # Setup TX + if ! ip -n "${TXNS}" link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr + then + echo "Failed to create bond TX interface. Is CONFIG_BONDING set?" >&2 + # only clean nsim ifaces and namespace. Nothing else has been + # initialized + cleanup_bond_nsim + trap - EXIT + exit "${ksft_skip}" + fi + + # create_netdevsim() got the interface up, but it needs to be down + # before being enslaved. + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX_MAIN_IF}" up + + # Setup RX + ip -n "${RXNS}" \ + link add "${BOND_RX_MAIN_IF}" type bond mode balance-rr + ip -n "${RXNS}" \ + link set "${BOND_RX1_SLAVE_IF}" down + ip -n "${RXNS}" \ + link set "${BOND_RX2_SLAVE_IF}" down + ip -n "${RXNS}" \ + link set "${BOND_RX1_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" + ip -n "${RXNS}" \ + link set "${BOND_RX2_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" + ip -n "${RXNS}" \ + link set "${BOND_RX_MAIN_IF}" up + + export DSTIF="${BOND_RX_MAIN_IF}" + export SRCIF="${BOND_TX_MAIN_IF}" +} + +# Create 4 netdevsim interfaces. Two of them will be bound to TX bonding iface +# and the other two will be bond to the RX interface (on the other namespace) +function create_ifaces_bond() { + BOND_TX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_1}" "${TXNS}") + BOND_TX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_2}" "${TXNS}") + BOND_RX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_1}" "${RXNS}") + BOND_RX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_2}" "${RXNS}") +} + +# netdevsim link BOND_TX to BOND_RX interfaces +function link_ifaces_bond() { + local BOND_TX1_SLAVE_IFIDX + local BOND_TX2_SLAVE_IFIDX + local BOND_RX1_SLAVE_IFIDX + local BOND_RX2_SLAVE_IFIDX + local TXNS_FD + local RXNS_FD + + BOND_TX1_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ + cat /sys/class/net/"$BOND_TX1_SLAVE_IF"/ifindex) + BOND_TX2_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ + cat /sys/class/net/"$BOND_TX2_SLAVE_IF"/ifindex) + BOND_RX1_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ + cat /sys/class/net/"$BOND_RX1_SLAVE_IF"/ifindex) + BOND_RX2_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ + cat /sys/class/net/"$BOND_RX2_SLAVE_IF"/ifindex) + + exec {TXNS_FD}</var/run/netns/"${TXNS}" + exec {RXNS_FD}</var/run/netns/"${RXNS}" + + # Linking TX ifaces to the RX ones (on the other namespace) + echo "${TXNS_FD}:$BOND_TX1_SLAVE_IFIDX $RXNS_FD:$BOND_RX1_SLAVE_IFIDX" \ + > "$NSIM_DEV_SYS_LINK" + echo "${TXNS_FD}:$BOND_TX2_SLAVE_IFIDX $RXNS_FD:$BOND_RX2_SLAVE_IFIDX" \ + > "$NSIM_DEV_SYS_LINK" + + exec {TXNS_FD}<&- + exec {RXNS_FD}<&- +} + +function create_all_ifaces() { + # setup_ns function is coming from lib.sh + setup_ns TXNS RXNS + export NAMESPACE="${RXNS}" + + # Create two interfaces for RX and two for TX + create_ifaces_bond + # Link netlink ifaces + link_ifaces_bond +} + +# configure DSTIF and SRCIF IPs +function configure_ifaces_ips() { + local IP_VERSION=${1:-"ipv4"} + select_ipv4_or_ipv6 "${IP_VERSION}" + + ip -n "${RXNS}" addr add "${DSTIP}"/24 dev "${DSTIF}" + ip -n "${RXNS}" link set "${DSTIF}" up + + ip -n "${TXNS}" addr add "${SRCIP}"/24 dev "${SRCIF}" + ip -n "${TXNS}" link set "${SRCIF}" up +} + +function test_enable_netpoll_on_enslaved_iface() { + echo 0 > "${NETCONS_PATH}"/enabled + + # At this stage, BOND_TX1_SLAVE_IF is enslaved to BOND_TX_MAIN_IF, and + # linked to BOND_RX1_SLAVE_IF inside the namespace. + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name + + # This should fail with the following message in dmesg: + # netpoll: netconsole: ethX is a slave device, aborting + set +e + enable_netcons_ns 2> /dev/null + set -e + + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] + then + echo "test failed: Bonding and netpoll cannot co-exists." >&2 + exit "${ksft_fail}" + fi +} + +function test_delete_bond_and_reenable_target() { + ip -n "${TXNS}" \ + link delete "${BOND_TX_MAIN_IF}" type bond + + # BOND_TX1_SLAVE_IF is not attached to a bond interface anymore + # netpoll can be plugged in there + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name + + # this should work, since the interface is not enslaved + enable_netcons_ns + + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: Unable to start netpoll on an unbond iface." >&2 + exit "${ksft_fail}" + fi +} + +# Send a netconsole message to the netconsole target +function test_send_netcons_msg_through_bond_iface() { + # Listen for netconsole port inside the namespace and + # destination interface + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & + # Wait for socat to start and listen to the port. + wait_for_port "${RXNS}" "${PORT}" "${IP_VERSION}" + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + # kill socat in case it is still running + pkill_socat +} + +# BOND_TX1_SLAVE_IF has netconsole enabled on it, bind it to BOND_TX_MAIN_IF. +# Given BOND_TX_MAIN_IF was deleted, recreate it first +function test_enslave_netcons_enabled_iface { + # netconsole got disabled while the interface was down + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: netconsole expected to be enabled against BOND_TX1_SLAVE_IF" >&2 + exit "${ksft_fail}" + fi + + # recreate the bonding iface. it got deleted by previous + # test (test_delete_bond_and_reenable_target) + ip -n "${TXNS}" \ + link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr + + # sub-interface need to be down before attaching to bonding + # This will also disable netconsole. + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX_MAIN_IF}" up + + # netconsole got disabled while the interface was down + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] + then + echo "test failed: Device is part of a bond iface, cannot have netcons enabled" >&2 + exit "${ksft_fail}" + fi +} + +# Get netconsole enabled on a bonding interface and attach a second +# sub-interface. +function test_enslave_iface_to_bond { + # BOND_TX_MAIN_IF has only BOND_TX1_SLAVE_IF right now + echo "${BOND_TX_MAIN_IF}" > "${NETCONS_PATH}"/dev_name + enable_netcons_ns + + # netcons is attached to bond0 and BOND_TX1_SLAVE_IF is + # part of BOND_TX_MAIN_IF. Attach BOND_TX2_SLAVE_IF to BOND_TX_MAIN_IF. + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: Netconsole should be enabled on bonding interface. Failed" >&2 + exit "${ksft_fail}" + fi +} + +function test_enslave_iff_disabled_netpoll_iface { + local ret + + # Create two interfaces. veth interfaces it known to have + # IFF_DISABLE_NETPOLL set + if ! ip link add "${VETH0}" type veth peer name "${VETH1}" + then + echo "Failed to create veth TX interface. Is CONFIG_VETH set?" >&2 + exit "${ksft_skip}" + fi + set +e + # This will print RTNETLINK answers: Device or resource busy + ip link set "${VETH0}" master "${BOND_TX_MAIN_IF}" 2> /dev/null + ret=$? + set -e + if [[ $ret -eq 0 ]] + then + echo "test failed: veth interface could not be enslaved" + exit "${ksft_fail}" + fi +} + +# Given that netconsole picks the current net namespace, we need to enable it +# from inside the TXNS namespace +function enable_netcons_ns() { + ip netns exec "${TXNS}" sh -c \ + "mount -t configfs configfs /sys/kernel/config && echo 1 > $NETCONS_PATH/enabled" +} + +#################### +# Tests start here # +#################### + +# Create regular interfaces using netdevsim and link them +create_all_ifaces + +# Setup the bonding interfaces +# BOND_RX_MAIN_IF has BOND_RX{1,2}_SLAVE_IF +# BOND_TX_MAIN_IF has BOND_TX{1,2}_SLAVE_IF +setup_bonding_ifaces + +# Configure the ips as BOND_RX1_SLAVE_IF and BOND_TX1_SLAVE_IF +configure_ifaces_ips "${IP_VERSION}" + +_create_dynamic_target "${FORMAT}" "${NETCONS_PATH}" +enable_netcons_ns +set_user_data + +# Test #1 : Create an bonding interface and attach netpoll into +# the bonding interface. Netconsole/netpoll should work on +# the bonding interface. +test_send_netcons_msg_through_bond_iface +echo "test #1: netpoll on bonding interface worked. Test passed" >&2 + +# Test #2: Attach netpoll to an enslaved interface +# Try to attach netpoll to an enslaved sub-interface (while still being part of +# a bonding interface), which shouldn't be allowed +test_enable_netpoll_on_enslaved_iface +echo "test #2: netpoll correctly rejected enslaved interface (expected behavior). Test passed." >&2 + +# Test #3: Unplug the sub-interface from bond and enable netconsole +# Detach the interface from a bonding interface and attach netpoll again +test_delete_bond_and_reenable_target +echo "test #3: Able to attach to an unbound interface. Test passed." >&2 + +# Test #4: Enslave a sub-interface that had netconsole enabled +# Try to enslave an interface that has netconsole/netpoll enabled. +# Previous test has netconsole enabled in BOND_TX1_SLAVE_IF, try to enslave it +test_enslave_netcons_enabled_iface +echo "test #4: Enslaving an interface with netpoll attached. Test passed." >&2 + +# Test #5: Enslave a sub-interface to a bonding interface +# Enslave an interface to a bond interface that has netpoll attached +# At this stage, BOND_TX_MAIN_IF is created and BOND_TX1_SLAVE_IF is part of +# it. Netconsole is currently disabled +test_enslave_iface_to_bond +echo "test #5: Enslaving an interface to bond+netpoll. Test passed." >&2 + +# Test #6: Enslave a IFF_DISABLE_NETPOLL sub-interface to a bonding interface +# At this stage, BOND_TX_MAIN_IF has both sub interface and netconsole is +# enabled. This test will try to enslave an a veth (IFF_DISABLE_NETPOLL) interface +# and it should fail, with netpoll: veth0 doesn't support polling +test_enslave_iff_disabled_netpoll_iface +echo "test #6: Enslaving IFF_DISABLE_NETPOLL ifaces to bond iface is not supported. Test passed." >&2 + +cleanup_bond +trap - EXIT +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings new file mode 100644 index 000000000000..79b65bdf05db --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/settings @@ -0,0 +1 @@ +timeout=1200 diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config new file mode 100644 index 000000000000..77ccf83d87e0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/config @@ -0,0 +1,10 @@ +CONFIG_CONFIGFS_FS=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_BTF_MODULES=n +CONFIG_INET_PSP=y +CONFIG_IPV6=y +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETCONSOLE_EXTENDED_LOG=y +CONFIG_NETDEVSIM=m +CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile new file mode 100644 index 000000000000..7994bd0e5c44 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/Makefile @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS := \ + bridge_locked_port.sh \ + bridge_mdb.sh \ + bridge_mld.sh \ + bridge_vlan_aware.sh \ + bridge_vlan_mcast.sh \ + bridge_vlan_unaware.sh \ + local_termination.sh \ + no_forwarding.sh \ + tc_actions.sh \ + test_bridge_fdb_stress.sh \ +# end of TEST_PROGS + +TEST_FILES := \ + forwarding.config \ + run_net_forwarding_test.sh \ +# end of TEST_FILES + +TEST_INCLUDES := \ + ../../../net/forwarding/bridge_locked_port.sh \ + ../../../net/forwarding/bridge_mdb.sh \ + ../../../net/forwarding/bridge_mld.sh \ + ../../../net/forwarding/bridge_vlan_aware.sh \ + ../../../net/forwarding/bridge_vlan_mcast.sh \ + ../../../net/forwarding/bridge_vlan_unaware.sh \ + ../../../net/forwarding/lib.sh \ + ../../../net/forwarding/local_termination.sh \ + ../../../net/forwarding/no_forwarding.sh \ + ../../../net/forwarding/tc_actions.sh \ + ../../../net/forwarding/tc_common.sh \ + ../../../net/lib.sh \ +# end of TEST_INCLUDES + +include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/forwarding.config b/tools/testing/selftests/drivers/net/dsa/forwarding.config new file mode 100644 index 000000000000..7adc1396fae0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/forwarding.config @@ -0,0 +1,2 @@ +NETIF_CREATE=no +STABLE_MAC_ADDRS=yes diff --git a/tools/testing/selftests/drivers/net/dsa/local_termination.sh b/tools/testing/selftests/drivers/net/dsa/local_termination.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/local_termination.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh new file mode 100755 index 000000000000..4106c0a102ea --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +libdir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") +testname=$(basename "${BASH_SOURCE[0]}") + +source "$libdir"/forwarding.config +cd "$libdir"/../../../net/forwarding/ || exit 1 +source "./$testname" "$@" diff --git a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh new file mode 100755 index 000000000000..74682151d04d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Bridge FDB entries can be offloaded to DSA switches without holding the +# rtnl_mutex. Traditionally this mutex has conferred drivers implicit +# serialization, which means their code paths are not well tested in the +# presence of concurrency. +# This test creates a background task that stresses the FDB by adding and +# deleting an entry many times in a row without the rtnl_mutex held. +# It then tests the driver resistance to concurrency by calling .ndo_fdb_dump +# (with rtnl_mutex held) from a foreground task. +# Since either the FDB dump or the additions/removals can fail, but the +# additions and removals are performed in deferred as opposed to process +# context, we cannot simply check for user space error codes. + +WAIT_TIME=1 +NUM_NETIFS=1 +REQUIRE_JQ="no" +REQUIRE_MZ="no" +NETIF_CREATE="no" +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +cleanup() { + echo "Cleaning up" + kill $pid && wait $pid &> /dev/null + ip link del br0 + echo "Please check kernel log for errors" +} +trap 'cleanup' EXIT + +eth=${NETIFS[p1]} + +ip link del br0 2>&1 >/dev/null || : +ip link add br0 type bridge && ip link set $eth master br0 + +(while :; do + bridge fdb add 00:01:02:03:04:05 dev $eth master static + bridge fdb del 00:01:02:03:04:05 dev $eth master static +done) & +pid=$! + +for i in $(seq 1 50); do + bridge fdb show > /dev/null + sleep 3 + echo "$((${i} * 2))% complete..." +done diff --git a/tools/testing/selftests/drivers/net/gro.c b/tools/testing/selftests/drivers/net/gro.c new file mode 100644 index 000000000000..e894037d2e3e --- /dev/null +++ b/tools/testing/selftests/drivers/net/gro.c @@ -0,0 +1,1369 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This testsuite provides conformance testing for GRO coalescing. + * + * Test cases: + * 1.data + * Data packets of the same size and same header setup with correct + * sequence numbers coalesce. The one exception being the last data + * packet coalesced: it can be smaller than the rest and coalesced + * as long as it is in the same flow. + * 2.ack + * Pure ACK does not coalesce. + * 3.flags + * Specific test cases: no packets with PSH, SYN, URG, RST set will + * be coalesced. + * 4.tcp + * Packets with incorrect checksum, non-consecutive seqno and + * different TCP header options shouldn't coalesce. Nit: given that + * some extension headers have paddings, such as timestamp, headers + * that are padding differently would not be coalesced. + * 5.ip: + * Packets with different (ECN, TTL, TOS) header, ip options or + * ip fragments (ipv6) shouldn't coalesce. + * 6.large: + * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce. + * + * MSS is defined as 4096 - header because if it is too small + * (i.e. 1500 MTU - header), it will result in many packets, + * increasing the "large" test case's flakiness. This is because + * due to time sensitivity in the coalescing window, the receiver + * may not coalesce all of the packets. + * + * Note the timing issue applies to all of the test cases, so some + * flakiness is to be expected. + * + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> + +#include "kselftest.h" +#include "../../net/lib/ksft.h" + +#define DPORT 8000 +#define SPORT 1500 +#define PAYLOAD_LEN 100 +#define NUM_PACKETS 4 +#define START_SEQ 100 +#define START_ACK 100 +#define ETH_P_NONE 0 +#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) +#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) +#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) +#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +#define MIN_EXTHDR_SIZE 8 +#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00" +#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11" + +#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +static const char *addr6_src = "fdaa::2"; +static const char *addr6_dst = "fdaa::1"; +static const char *addr4_src = "192.168.1.200"; +static const char *addr4_dst = "192.168.1.100"; +static int proto = -1; +static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN]; +static char *testname = "data"; +static char *ifname = "eth0"; +static char *smac = "aa:00:00:00:00:02"; +static char *dmac = "aa:00:00:00:00:01"; +static bool verbose; +static bool tx_socket = true; +static int tcp_offset = -1; +static int total_hdr_len = -1; +static int ethhdr_proto = -1; +static bool ipip; +static const int num_flush_id_cases = 6; + +static void vlog(const char *fmt, ...) +{ + va_list args; + + if (verbose) { + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + } +} + +static void setup_sock_filter(int fd) +{ + const int dport_off = tcp_offset + offsetof(struct tcphdr, dest); + const int ethproto_off = offsetof(struct ethhdr, h_proto); + int optlen = 0; + int ipproto_off, opt_ipproto_off; + int next_off; + + if (ipip) + next_off = sizeof(struct iphdr) + offsetof(struct iphdr, protocol); + else if (proto == PF_INET) + next_off = offsetof(struct iphdr, protocol); + else + next_off = offsetof(struct ipv6hdr, nexthdr); + ipproto_off = ETH_HLEN + next_off; + + /* Overridden later if exthdrs are used: */ + opt_ipproto_off = ipproto_off; + + if (strcmp(testname, "ip") == 0) { + if (proto == PF_INET) + optlen = sizeof(struct ip_timestamp); + else { + BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE); + BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE); + BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE); + + /* same size for HBH and Fragment extension header types */ + optlen = MIN_EXTHDR_SIZE; + opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr) + + offsetof(struct ip6_ext, ip6e_nxt); + } + } + + /* this filter validates the following: + * - packet is IPv4/IPv6 according to the running test. + * - packet is TCP. Also handles the case of one extension header and then TCP. + * - checks the packet tcp dport equals to DPORT. Also handles the case of one + * extension header and then TCP. + */ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1), + BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF), + BPF_STMT(BPF_RET + BPF_K, 0), + }; + + struct sock_fprog bpf = { + .len = ARRAY_SIZE(filter), + .filter = filter, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0) + error(1, errno, "error setting filter"); +} + +static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + int i; + + for (i = 0; i < len / 2; i++) + sum += words[i]; + if (len & 1) + sum += ((char *)data)[len - 1]; + return sum; +} + +static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum_nofold(data, len, sum); + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + return ~sum; +} + +static uint16_t tcp_checksum(void *buf, int payload_len) +{ + struct pseudo_header6 { + struct in6_addr saddr; + struct in6_addr daddr; + uint16_t protocol; + uint16_t payload_len; + } ph6; + struct pseudo_header4 { + struct in_addr saddr; + struct in_addr daddr; + uint16_t protocol; + uint16_t payload_len; + } ph4; + uint32_t sum = 0; + + if (proto == PF_INET6) { + if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1) + error(1, errno, "inet_pton6 source ip pseudo"); + if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1) + error(1, errno, "inet_pton6 dest ip pseudo"); + ph6.protocol = htons(IPPROTO_TCP); + ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len); + + sum = checksum_nofold(&ph6, sizeof(ph6), 0); + } else if (proto == PF_INET) { + if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1) + error(1, errno, "inet_pton source ip pseudo"); + if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1) + error(1, errno, "inet_pton dest ip pseudo"); + ph4.protocol = htons(IPPROTO_TCP); + ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len); + + sum = checksum_nofold(&ph4, sizeof(ph4), 0); + } + + return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum); +} + +static void read_MAC(uint8_t *mac_addr, char *mac) +{ + if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + &mac_addr[0], &mac_addr[1], &mac_addr[2], + &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) + error(1, 0, "sscanf"); +} + +static void fill_datalinklayer(void *buf) +{ + struct ethhdr *eth = buf; + + memcpy(eth->h_dest, dst_mac, ETH_ALEN); + memcpy(eth->h_source, src_mac, ETH_ALEN); + eth->h_proto = ethhdr_proto; +} + +static void fill_networklayer(void *buf, int payload_len, int protocol) +{ + struct ipv6hdr *ip6h = buf; + struct iphdr *iph = buf; + + if (proto == PF_INET6) { + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); + ip6h->nexthdr = protocol; + ip6h->hop_limit = 8; + if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1) + error(1, errno, "inet_pton source ip6"); + if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1) + error(1, errno, "inet_pton dest ip6"); + } else if (proto == PF_INET) { + memset(iph, 0, sizeof(*iph)); + + iph->version = 4; + iph->ihl = 5; + iph->ttl = 8; + iph->protocol = protocol; + iph->tot_len = htons(sizeof(struct tcphdr) + + payload_len + sizeof(struct iphdr)); + iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ + if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1) + error(1, errno, "inet_pton source ip"); + if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1) + error(1, errno, "inet_pton dest ip"); + iph->check = checksum_fold(buf, sizeof(struct iphdr), 0); + } +} + +static void fill_transportlayer(void *buf, int seq_offset, int ack_offset, + int payload_len, int fin) +{ + struct tcphdr *tcph = buf; + + memset(tcph, 0, sizeof(*tcph)); + + tcph->source = htons(SPORT); + tcph->dest = htons(DPORT); + tcph->seq = ntohl(START_SEQ + seq_offset); + tcph->ack_seq = ntohl(START_ACK + ack_offset); + tcph->ack = 1; + tcph->fin = fin; + tcph->doff = 5; + tcph->window = htons(TCP_MAXWIN); + tcph->urg_ptr = 0; + tcph->check = tcp_checksum(tcph, payload_len); +} + +static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr) +{ + int ret = -1; + + ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr)); + if (ret == -1) + error(1, errno, "sendto failure"); + if (ret != len) + error(1, errno, "sendto wrong length"); +} + +static void create_packet(void *buf, int seq_offset, int ack_offset, + int payload_len, int fin) +{ + memset(buf, 0, total_hdr_len); + memset(buf + total_hdr_len, 'a', payload_len); + + fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, + payload_len, fin); + + if (ipip) { + fill_networklayer(buf + ETH_HLEN, payload_len + sizeof(struct iphdr), + IPPROTO_IPIP); + fill_networklayer(buf + ETH_HLEN + sizeof(struct iphdr), + payload_len, IPPROTO_TCP); + } else { + fill_networklayer(buf + ETH_HLEN, payload_len, IPPROTO_TCP); + } + + fill_datalinklayer(buf); +} + +/* send one extra flag, not first and not last pkt */ +static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, + int rst, int urg) +{ + static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + int payload_len, pkt_size, flag, i; + struct tcphdr *tcph; + + payload_len = PAYLOAD_LEN * psh; + pkt_size = total_hdr_len + payload_len; + flag = NUM_PACKETS / 2; + + create_packet(flag_buf, flag * payload_len, 0, payload_len, 0); + + tcph = (struct tcphdr *)(flag_buf + tcp_offset); + tcph->psh = psh; + tcph->syn = syn; + tcph->rst = rst; + tcph->urg = urg; + tcph->check = 0; + tcph->check = tcp_checksum(tcph, payload_len); + + for (i = 0; i < NUM_PACKETS + 1; i++) { + if (i == flag) { + write_packet(fd, flag_buf, pkt_size, daddr); + continue; + } + create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); + } +} + +/* Test for data of same length, smaller than previous + * and of different lengths + */ +static void send_data_pkts(int fd, struct sockaddr_ll *daddr, + int payload_len1, int payload_len2) +{ + static char buf[ETH_HLEN + IP_MAXPACKET]; + + create_packet(buf, 0, 0, payload_len1, 0); + write_packet(fd, buf, total_hdr_len + payload_len1, daddr); + create_packet(buf, payload_len1, 0, payload_len2, 0); + write_packet(fd, buf, total_hdr_len + payload_len2, daddr); +} + +/* If incoming segments make tracked segment length exceed + * legal IP datagram length, do not coalesce + */ +static void send_large(int fd, struct sockaddr_ll *daddr, int remainder) +{ + static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS]; + static char last[TOTAL_HDR_LEN + MSS]; + static char new_seg[TOTAL_HDR_LEN + MSS]; + int i; + + for (i = 0; i < NUM_LARGE_PKT; i++) + create_packet(pkts[i], i * MSS, 0, MSS, 0); + create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0); + create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0); + + for (i = 0; i < NUM_LARGE_PKT; i++) + write_packet(fd, pkts[i], total_hdr_len + MSS, daddr); + write_packet(fd, last, total_hdr_len + remainder, daddr); + write_packet(fd, new_seg, total_hdr_len + remainder, daddr); +} + +/* Pure acks and dup acks don't coalesce */ +static void send_ack(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN]; + + create_packet(buf, 0, 0, 0, 0); + write_packet(fd, buf, total_hdr_len, daddr); + write_packet(fd, buf, total_hdr_len, daddr); + create_packet(buf, 0, 1, 0, 0); + write_packet(fd, buf, total_hdr_len, daddr); +} + +static void recompute_packet(char *buf, char *no_ext, int extlen) +{ + struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + memmove(buf, no_ext, total_hdr_len); + memmove(buf + total_hdr_len + extlen, + no_ext + total_hdr_len, PAYLOAD_LEN); + + tcphdr->doff = tcphdr->doff + (extlen / 4); + tcphdr->check = 0; + tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen); + if (proto == PF_INET) { + iph->tot_len = htons(ntohs(iph->tot_len) + extlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + + if (ipip) { + iph += 1; + iph->tot_len = htons(ntohs(iph->tot_len) + extlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } + } else { + ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); + } +} + +static void tcp_write_options(char *buf, int kind, int ts) +{ + struct tcp_option_ts { + uint8_t kind; + uint8_t len; + uint32_t tsval; + uint32_t tsecr; + } *opt_ts = (void *)buf; + struct tcp_option_window { + uint8_t kind; + uint8_t len; + uint8_t shift; + } *opt_window = (void *)buf; + + switch (kind) { + case TCPOPT_NOP: + buf[0] = TCPOPT_NOP; + break; + case TCPOPT_WINDOW: + memset(opt_window, 0, sizeof(struct tcp_option_window)); + opt_window->kind = TCPOPT_WINDOW; + opt_window->len = TCPOLEN_WINDOW; + opt_window->shift = 0; + break; + case TCPOPT_TIMESTAMP: + memset(opt_ts, 0, sizeof(struct tcp_option_ts)); + opt_ts->kind = TCPOPT_TIMESTAMP; + opt_ts->len = TCPOLEN_TIMESTAMP; + opt_ts->tsval = ts; + opt_ts->tsecr = 0; + break; + default: + error(1, 0, "unimplemented TCP option"); + break; + } +} + +/* TCP with options is always a permutation of {TS, NOP, NOP}. + * Implement different orders to verify coalescing stops. + */ +static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order) +{ + switch (order) { + case 0: + tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */, + TCPOPT_TIMESTAMP, ts); + break; + case 1: + tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 1, + TCPOPT_TIMESTAMP, ts); + tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP, + TCPOPT_NOP, 0); + break; + case 2: + tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts); + tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1, + TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2, + TCPOPT_NOP, 0); + break; + default: + error(1, 0, "unknown order"); + break; + } + recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA); +} + +/* Packets with invalid checksum don't coalesce. */ +static void send_changed_checksum(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + tcph->check = tcph->check - 1; + write_packet(fd, buf, pkt_size, daddr); +} + + /* Packets with non-consecutive sequence number don't coalesce.*/ +static void send_changed_seq(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + tcph->seq = ntohl(htonl(tcph->seq) + 1); + tcph->check = 0; + tcph->check = tcp_checksum(tcph, PAYLOAD_LEN); + write_packet(fd, buf, pkt_size, daddr); +} + + /* Packet with different timestamp option or different timestamps + * don't coalesce. + */ +static void send_changed_ts(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; + int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 0, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 0, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 1); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 2); + write_packet(fd, extpkt, pkt_size, daddr); +} + +/* Packet with different tcp options don't coalesce. */ +static void send_diff_opt(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; + static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG]; + int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; + int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt1, buf, 0, 0); + write_packet(fd, extpkt1, extpkt1_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt1, buf, 0, 0); + write_packet(fd, extpkt1, extpkt1_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0); + tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0); + recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1); + write_packet(fd, extpkt2, extpkt2_size, daddr); +} + +static void add_ipv4_ts_option(void *buf, void *optpkt) +{ + struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset); + int optlen = sizeof(struct ip_timestamp); + struct iphdr *iph; + + if (optlen % 4) + error(1, 0, "ipv4 timestamp length is not a multiple of 4B"); + + ts->ipt_code = IPOPT_TS; + ts->ipt_len = optlen; + ts->ipt_ptr = 5; + ts->ipt_flg = IPOPT_TS_TSONLY; + + memcpy(optpkt, buf, tcp_offset); + memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + + iph = (struct iphdr *)(optpkt + ETH_HLEN); + iph->ihl = 5 + (optlen / 4); + iph->tot_len = htons(ntohs(iph->tot_len) + optlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0); +} + +static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload) +{ + struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset); + struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN); + char *exthdr_payload_start = (char *)(exthdr + 1); + + exthdr->hdrlen = 0; + exthdr->nexthdr = IPPROTO_TCP; + + memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr)); + + memcpy(optpkt, buf, tcp_offset); + memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + + iph->nexthdr = exthdr_type; + iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE); +} + +static void fix_ip4_checksum(struct iphdr *iph) +{ + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); +} + +static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) +{ + static char buf1[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf2[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf3[MAX_HDR_LEN + PAYLOAD_LEN]; + bool send_three = false; + struct iphdr *iph1; + struct iphdr *iph2; + struct iphdr *iph3; + + iph1 = (struct iphdr *)(buf1 + ETH_HLEN); + iph2 = (struct iphdr *)(buf2 + ETH_HLEN); + iph3 = (struct iphdr *)(buf3 + ETH_HLEN); + + create_packet(buf1, 0, 0, PAYLOAD_LEN, 0); + create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + + switch (tcase) { + case 0: /* DF=1, Incrementing - should coalesce */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(9); + break; + + case 1: /* DF=1, Fixed - should coalesce */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(8); + break; + + case 2: /* DF=0, Incrementing - should coalesce */ + iph1->frag_off &= ~htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off &= ~htons(IP_DF); + iph2->id = htons(9); + break; + + case 3: /* DF=0, Fixed - should coalesce */ + iph1->frag_off &= ~htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off &= ~htons(IP_DF); + iph2->id = htons(8); + break; + + case 4: /* DF=1, two packets incrementing, and one fixed - should + * coalesce only the first two packets + */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(9); + + iph3->frag_off |= htons(IP_DF); + iph3->id = htons(9); + send_three = true; + break; + + case 5: /* DF=1, two packets fixed, and one incrementing - should + * coalesce only the first two packets + */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(8); + + iph3->frag_off |= htons(IP_DF); + iph3->id = htons(9); + send_three = true; + break; + } + + fix_ip4_checksum(iph1); + fix_ip4_checksum(iph2); + write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr); + write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr); + + if (send_three) { + fix_ip4_checksum(iph3); + write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr); + } +} + +static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt) +{ + for (int i = 0; i < num_flush_id_cases; i++) { + sleep(1); + send_flush_id_case(fd, daddr, i); + sleep(1); + write_packet(fd, fin_pkt, total_hdr_len, daddr); + } +} + +static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE]; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1); + write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); + + create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2); + write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); +} + +/* IPv4 options shouldn't coalesce */ +static void send_ip_options(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)]; + int optlen = sizeof(struct ip_timestamp); + int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); + + create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); + add_ipv4_ts_option(buf, optpkt); + write_packet(fd, optpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); +} + +/* IPv4 fragments shouldn't coalesce */ +static void send_fragment4(int fd, struct sockaddr_ll *daddr) +{ + static char buf[IP_MAXPACKET]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + /* Once fragmented, packet would retain the total_len. + * Tcp header is prepared as if rest of data is in follow-up frags, + * but follow up frags aren't actually sent. + */ + memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2); + fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0); + fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN, IPPROTO_TCP); + fill_datalinklayer(buf); + + iph->frag_off = htons(0x6000); // DF = 1, MF = 1 + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + write_packet(fd, buf, pkt_size, daddr); +} + +/* IPv4 packets with different ttl don't coalesce.*/ +static void send_changed_ttl(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + iph->ttl = 7; + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + write_packet(fd, buf, pkt_size, daddr); +} + +/* Packets with different tos don't coalesce.*/ +static void send_changed_tos(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + if (proto == PF_INET) { + iph->tos = 1; + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else if (proto == PF_INET6) { + ip6h->priority = 0xf; + } + write_packet(fd, buf, pkt_size, daddr); +} + +/* Packets with different ECN don't coalesce.*/ +static void send_changed_ECN(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + if (proto == PF_INET) { + buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10 + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else { + buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10 + } + write_packet(fd, buf, pkt_size, daddr); +} + +/* IPv6 fragments and packets with extensions don't coalesce.*/ +static void send_fragment6(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN + + sizeof(struct ip6_frag)]; + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + struct ip6_frag *frag = (void *)(extpkt + tcp_offset); + int extlen = sizeof(struct ip6_frag); + int bufpkt_len = total_hdr_len + PAYLOAD_LEN; + int extpkt_len = bufpkt_len + extlen; + int i; + + for (i = 0; i < 2; i++) { + create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, bufpkt_len, daddr); + } + sleep(1); + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + memset(extpkt, 0, extpkt_len); + + ip6h->nexthdr = IPPROTO_FRAGMENT; + ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); + frag->ip6f_nxt = IPPROTO_TCP; + + memcpy(extpkt, buf, tcp_offset); + memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + write_packet(fd, extpkt, extpkt_len, daddr); + + create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, bufpkt_len, daddr); +} + +static void bind_packetsocket(int fd) +{ + struct sockaddr_ll daddr = {}; + + daddr.sll_family = AF_PACKET; + daddr.sll_protocol = ethhdr_proto; + daddr.sll_ifindex = if_nametoindex(ifname); + if (daddr.sll_ifindex == 0) + error(1, errno, "if_nametoindex"); + + if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0) + error(1, errno, "could not bind socket"); +} + +static void set_timeout(int fd) +{ + struct timeval timeout; + + timeout.tv_sec = 3; + timeout.tv_usec = 0; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, + sizeof(timeout)) < 0) + error(1, errno, "cannot set timeout, setsockopt failed"); +} + +static void check_recv_pkts(int fd, int *correct_payload, + int correct_num_pkts) +{ + static char buffer[IP_MAXPACKET + ETH_HLEN + 1]; + struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN); + struct tcphdr *tcph; + bool bad_packet = false; + int tcp_ext_len = 0; + int ip_ext_len = 0; + int pkt_size = -1; + int data_len = 0; + int num_pkt = 0; + int i; + + vlog("Expected {"); + for (i = 0; i < correct_num_pkts; i++) + vlog("%d ", correct_payload[i]); + vlog("}, Total %d packets\nReceived {", correct_num_pkts); + + while (1) { + ip_ext_len = 0; + pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); + if (pkt_size < 0) + error(1, errno, "could not receive"); + + if (iph->version == 4) + ip_ext_len = (iph->ihl - 5) * 4; + else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) + ip_ext_len = MIN_EXTHDR_SIZE; + + tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); + + if (tcph->fin) + break; + + tcp_ext_len = (tcph->doff - 5) * 4; + data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len; + /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3. + * Ipv4/tcp packets without at least 6 bytes of data will be padded. + * Packet sockets are protocol agnostic, and will not trim the padding. + */ + if (pkt_size == ETH_ZLEN && iph->version == 4) { + data_len = ntohs(iph->tot_len) + - sizeof(struct tcphdr) - sizeof(struct iphdr); + } + vlog("%d ", data_len); + if (data_len != correct_payload[num_pkt]) { + vlog("[!=%d]", correct_payload[num_pkt]); + bad_packet = true; + } + num_pkt++; + } + vlog("}, Total %d packets.\n", num_pkt); + if (num_pkt != correct_num_pkts) + error(1, 0, "incorrect number of packets"); + if (bad_packet) + error(1, 0, "incorrect packet geometry"); + + printf("Test succeeded\n\n"); +} + +static void gro_sender(void) +{ + const int fin_delay_us = 100 * 1000; + static char fin_pkt[MAX_HDR_LEN]; + struct sockaddr_ll daddr = {}; + int txfd = -1; + + txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW); + if (txfd < 0) + error(1, errno, "socket creation"); + + memset(&daddr, 0, sizeof(daddr)); + daddr.sll_ifindex = if_nametoindex(ifname); + if (daddr.sll_ifindex == 0) + error(1, errno, "if_nametoindex"); + daddr.sll_family = AF_PACKET; + memcpy(daddr.sll_addr, dst_mac, ETH_ALEN); + daddr.sll_halen = ETH_ALEN; + create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1); + + if (strcmp(testname, "data") == 0) { + send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ack") == 0) { + send_ack(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "flags") == 0) { + send_flags(txfd, &daddr, 1, 0, 0, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 1, 0, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 0, 1, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 0, 0, 1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "tcp") == 0) { + send_changed_checksum(txfd, &daddr); + /* Adding sleep before sending FIN so that it is not + * received prior to other packets. + */ + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_seq(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_ts(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_diff_opt(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip") == 0) { + send_changed_ECN(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_tos(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + if (proto == PF_INET) { + /* Modified packets may be received out of order. + * Sleep function added to enforce test boundaries + * so that fin pkts are not received prior to other pkts. + */ + sleep(1); + send_changed_ttl(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + send_ip_options(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + send_fragment4(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + test_flush_id(txfd, &daddr, fin_pkt); + } else if (proto == PF_INET6) { + sleep(1); + send_fragment6(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + /* send IPv6 packets with ext header with same payload */ + send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + /* send IPv6 packets with ext header with different payload */ + send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } + } else if (strcmp(testname, "large") == 0) { + /* 20 is the difference between min iphdr size + * and min ipv6hdr size. Like MAX_HDR_SIZE, + * MAX_PAYLOAD is defined with the larger header of the two. + */ + int offset = (proto == PF_INET && !ipip) ? 20 : 0; + int remainder = (MAX_PAYLOAD + offset) % MSS; + + send_large(txfd, &daddr, remainder); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_large(txfd, &daddr, remainder + 1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else { + error(1, 0, "Unknown testcase"); + } + + if (close(txfd)) + error(1, errno, "socket close"); +} + +static void gro_receiver(void) +{ + static int correct_payload[NUM_PACKETS]; + int rxfd = -1; + + rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE)); + if (rxfd < 0) + error(1, 0, "socket creation"); + setup_sock_filter(rxfd); + set_timeout(rxfd); + bind_packetsocket(rxfd); + + ksft_ready(); + + memset(correct_payload, 0, sizeof(correct_payload)); + + if (strcmp(testname, "data") == 0) { + printf("pure data packet of same size: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("large data packets followed by a smaller one: "); + correct_payload[0] = PAYLOAD_LEN * 1.5; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("small data packets followed by a larger one: "); + correct_payload[0] = PAYLOAD_LEN / 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ack") == 0) { + printf("duplicate ack and pure ack: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "flags") == 0) { + correct_payload[0] = PAYLOAD_LEN * 3; + correct_payload[1] = PAYLOAD_LEN * 2; + + printf("psh flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 2); + + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = 0; + correct_payload[2] = PAYLOAD_LEN * 2; + printf("syn flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + + printf("rst flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + + printf("urg flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "tcp") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + correct_payload[3] = PAYLOAD_LEN; + + printf("changed checksum does not coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("Wrong Seq number doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("Different timestamp doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 4); + + printf("Different options doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ip") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + + printf("different ECN doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("different tos doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + if (proto == PF_INET) { + printf("different ttl doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("ip options doesn't coalesce: "); + correct_payload[2] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 3); + + printf("fragmented ip4 doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + /* is_atomic checks */ + printf("DF=1, Incrementing - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=1, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=0, Incrementing - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=0, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + + printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (proto == PF_INET6) { + /* GRO doesn't check for ipv6 hop limit when flushing. + * Hence no corresponding test to the ipv4 case. + */ + printf("fragmented ip6 doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 3); + + printf("ipv6 with ext header does coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("ipv6 with ext header with different payloads doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + } + } else if (strcmp(testname, "large") == 0) { + int offset = (proto == PF_INET && !ipip) ? 20 : 0; + int remainder = (MAX_PAYLOAD + offset) % MSS; + + correct_payload[0] = (MAX_PAYLOAD + offset); + correct_payload[1] = remainder; + printf("Shouldn't coalesce if exceed IP max pkt size: "); + check_recv_pkts(rxfd, correct_payload, 2); + + /* last segment sent individually, doesn't start new segment */ + correct_payload[0] = correct_payload[0] - remainder; + correct_payload[1] = remainder + 1; + correct_payload[2] = remainder + 1; + check_recv_pkts(rxfd, correct_payload, 3); + } else { + error(1, 0, "Test case error, should never trigger"); + } + + if (close(rxfd)) + error(1, 0, "socket close"); +} + +static void parse_args(int argc, char **argv) +{ + static const struct option opts[] = { + { "daddr", required_argument, NULL, 'd' }, + { "dmac", required_argument, NULL, 'D' }, + { "iface", required_argument, NULL, 'i' }, + { "ipv4", no_argument, NULL, '4' }, + { "ipv6", no_argument, NULL, '6' }, + { "ipip", no_argument, NULL, 'e' }, + { "rx", no_argument, NULL, 'r' }, + { "saddr", required_argument, NULL, 's' }, + { "smac", required_argument, NULL, 'S' }, + { "test", required_argument, NULL, 't' }, + { "verbose", no_argument, NULL, 'v' }, + { 0, 0, 0, 0 } + }; + int c; + + while ((c = getopt_long(argc, argv, "46d:D:ei:rs:S:t:v", opts, NULL)) != -1) { + switch (c) { + case '4': + proto = PF_INET; + ethhdr_proto = htons(ETH_P_IP); + break; + case '6': + proto = PF_INET6; + ethhdr_proto = htons(ETH_P_IPV6); + break; + case 'e': + ipip = true; + proto = PF_INET; + ethhdr_proto = htons(ETH_P_IP); + break; + case 'd': + addr4_dst = addr6_dst = optarg; + break; + case 'D': + dmac = optarg; + break; + case 'i': + ifname = optarg; + break; + case 'r': + tx_socket = false; + break; + case 's': + addr4_src = addr6_src = optarg; + break; + case 'S': + smac = optarg; + break; + case 't': + testname = optarg; + break; + case 'v': + verbose = true; + break; + default: + error(1, 0, "%s invalid option %c\n", __func__, c); + break; + } + } +} + +int main(int argc, char **argv) +{ + parse_args(argc, argv); + + if (ipip) { + tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2; + total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (proto == PF_INET) { + tcp_offset = ETH_HLEN + sizeof(struct iphdr); + total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (proto == PF_INET6) { + tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr); + total_hdr_len = MAX_HDR_LEN; + } else { + error(1, 0, "Protocol family is not ipv4 or ipv6"); + } + + read_MAC(src_mac, smac); + read_MAC(dst_mac, dmac); + + if (tx_socket) { + gro_sender(); + } else { + /* Only the receiver exit status determines test success. */ + gro_receiver(); + fprintf(stderr, "Gro::%s test passed.\n", testname); + } + + return 0; +} diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py new file mode 100755 index 000000000000..ba83713bf7b5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/gro.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +GRO (Generic Receive Offload) conformance tests. + +Validates that GRO coalescing works correctly by running the gro +binary in different configurations and checking for correct packet +coalescing behavior. + +Test cases: + - data: Data packets with same size/headers and correct seq numbers coalesce + - ack: Pure ACK packets do not coalesce + - flags: Packets with PSH, SYN, URG, RST flags do not coalesce + - tcp: Packets with incorrect checksum, non-consecutive seqno don't coalesce + - ip: Packets with different ECN, TTL, TOS, or IP options don't coalesce + - large: Packets larger than GRO_MAX_SIZE don't coalesce +""" + +import os +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import NetDrvEpEnv, KsftXfailEx +from lib.py import cmd, defer, bkg, ip +from lib.py import ksft_variants + + +def _resolve_dmac(cfg, ipver): + """ + Find the destination MAC address remote host should use to send packets + towards the local host. It may be a router / gateway address. + """ + + attr = "dmac" + ipver + # Cache the response across test cases + if hasattr(cfg, attr): + return getattr(cfg, attr) + + route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}", + json=True, host=cfg.remote)[0] + gw = route.get("gateway") + # Local L2 segment, address directly + if not gw: + setattr(cfg, attr, cfg.dev['address']) + return getattr(cfg, attr) + + # ping to make sure neighbor is resolved, + # bind to an interface, for v6 the GW is likely link local + cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote) + + neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}", + json=True, host=cfg.remote)[0] + setattr(cfg, attr, neigh['lladdr']) + return getattr(cfg, attr) + + +def _write_defer_restore(cfg, path, val, defer_undo=False): + with open(path, "r", encoding="utf-8") as fp: + orig_val = fp.read().strip() + if str(val) == orig_val: + return + with open(path, "w", encoding="utf-8") as fp: + fp.write(val) + if defer_undo: + defer(_write_defer_restore, cfg, path, orig_val) + + +def _set_mtu_restore(dev, mtu, host): + if dev['mtu'] < mtu: + ip(f"link set dev {dev['ifname']} mtu {mtu}", host=host) + defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host) + + +def _setup(cfg, test_name): + """ Setup hardware loopback mode for GRO testing. """ + + if not hasattr(cfg, "bin_remote"): + cfg.bin_local = cfg.test_dir / "gro" + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + # "large" test needs at least 4k MTU + if test_name == "large": + _set_mtu_restore(cfg.dev, 4096, None) + _set_mtu_restore(cfg.remote_dev, 4096, cfg.remote) + + flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout" + irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs" + + _write_defer_restore(cfg, flush_path, "200000", defer_undo=True) + _write_defer_restore(cfg, irq_path, "10", defer_undo=True) + + try: + # Disable TSO for local tests + cfg.require_nsim() # will raise KsftXfailEx if not running on nsim + + cmd(f"ethtool -K {cfg.ifname} gro on tso off") + cmd(f"ethtool -K {cfg.remote_ifname} gro on tso off", host=cfg.remote) + except KsftXfailEx: + pass + +def _gro_variants(): + """Generator that yields all combinations of protocol and test types.""" + + for protocol in ["ipv4", "ipv6", "ipip"]: + for test_name in ["data", "ack", "flags", "tcp", "ip", "large"]: + yield protocol, test_name + + +@ksft_variants(_gro_variants()) +def test(cfg, protocol, test_name): + """Run a single GRO test with retries.""" + + ipver = "6" if protocol[-1] == "6" else "4" + cfg.require_ipver(ipver) + + _setup(cfg, test_name) + + base_cmd_args = [ + f"--{protocol}", + f"--dmac {_resolve_dmac(cfg, ipver)}", + f"--smac {cfg.remote_dev['address']}", + f"--daddr {cfg.addr_v[ipver]}", + f"--saddr {cfg.remote_addr_v[ipver]}", + f"--test {test_name}", + "--verbose" + ] + base_args = " ".join(base_cmd_args) + + # Each test is run 6 times to deflake, because given the receive timing, + # not all packets that should coalesce will be considered in the same flow + # on every try. + max_retries = 6 + for attempt in range(max_retries): + rx_cmd = f"{cfg.bin_local} {base_args} --rx --iface {cfg.ifname}" + tx_cmd = f"{cfg.bin_remote} {base_args} --iface {cfg.remote_ifname}" + + fail_now = attempt >= max_retries - 1 + + with bkg(rx_cmd, ksft_ready=True, exit_wait=True, + fail=fail_now) as rx_proc: + cmd(tx_cmd, host=cfg.remote) + + if rx_proc.ret == 0: + return + + ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# ')) + ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# ')) + + if test_name == "large" and os.environ.get("KSFT_MACHINE_SLOW"): + ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment") + return + + ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...") + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + ksft_run(cases=[test], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py new file mode 100755 index 000000000000..c4fe049e9baa --- /dev/null +++ b/tools/testing/selftests/drivers/net/hds.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import errno +import os +import random +from typing import Union +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx +from lib.py import CmdExitFailure, EthtoolFamily, NlError +from lib.py import NetDrvEnv +from lib.py import defer, ethtool, ip + + +def _get_hds_mode(cfg, netnl) -> str: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + return rings['tcp-data-split'] + + +def _xdp_onoff(cfg): + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + ip("link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, prog)) + ip("link set dev %s xdp off" % cfg.ifname) + + +def _ioctl_ringparam_modify(cfg, netnl) -> None: + """ + Helper for performing a hopefully unimportant IOCTL SET. + IOCTL does not support HDS, so it should not affect the HDS config. + """ + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + + if 'tx' not in rings: + raise KsftSkipEx('setting Tx ring size not supported') + + try: + ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] // 2}") + except CmdExitFailure as e: + ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] * 2}") + defer(ethtool, f"-G {cfg.ifname} tx {rings['tx']}") + + +def get_hds(cfg, netnl) -> None: + _get_hds_mode(cfg, netnl) + + +def get_hds_thresh(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + + +def _hds_reset(cfg, netnl, rings) -> None: + cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + + arg = {'header': {'dev-index': cfg.ifindex}} + if cur.get('tcp-data-split') != rings.get('tcp-data-split'): + # Try to reset to "unknown" first, we don't know if the setting + # was the default or user chose it. Default seems more likely. + arg['tcp-data-split'] = "unknown" + netnl.rings_set(arg) + cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if cur['tcp-data-split'] == rings['tcp-data-split']: + del arg['tcp-data-split'] + else: + # Try the explicit setting + arg['tcp-data-split'] = rings['tcp-data-split'] + if cur.get('hds-thresh') != rings.get('hds-thresh'): + arg['hds-thresh'] = rings['hds-thresh'] + if len(arg) > 1: + netnl.rings_set(arg) + + +def _defer_reset_hds(cfg, netnl) -> Union[dict, None]: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if 'hds-thresh' in rings or 'tcp-data-split' in rings: + defer(_hds_reset, cfg, netnl, rings) + except NlError as e: + pass + + +def set_hds_enable(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("disabling of HDS not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + + ksft_eq('enabled', rings['tcp-data-split']) + +def set_hds_disable(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("disabling of HDS not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + + ksft_eq('disabled', rings['tcp-data-split']) + +def set_hds_thresh_zero(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + + ksft_eq(0, rings['hds-thresh']) + +def set_hds_thresh_random(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + if 'hds-thresh-max' not in rings: + raise KsftSkipEx('hds-thresh-max not defined by device') + + if rings['hds-thresh-max'] < 2: + raise KsftSkipEx('hds-thresh-max is too small') + elif rings['hds-thresh-max'] == 2: + hds_thresh = 1 + else: + while True: + hds_thresh = random.randint(1, rings['hds-thresh-max'] - 1) + if hds_thresh != rings['hds-thresh']: + break + + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_thresh}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(hds_thresh, rings['hds-thresh']) + +def set_hds_thresh_max(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': rings['hds-thresh-max']}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(rings['hds-thresh'], rings['hds-thresh-max']) + +def set_hds_thresh_gt(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + if 'hds-thresh-max' not in rings: + raise KsftSkipEx('hds-thresh-max not defined by device') + hds_gt = rings['hds-thresh-max'] + 1 + with ksft_raises(NlError) as e: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt}) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + + +def set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "auto" / UNKNOWN mode, XDP installation should work. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + _defer_reset_hds(cfg, netnl) + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + _xdp_onoff(cfg) + + +def enabled_set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + with ksft_raises(CmdExitFailure) as e: + _xdp_onoff(cfg) + + +def set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "auto" / UNKNOWN mode, XDP installation should work. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + _xdp_onoff(cfg) + + +def enabled_set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) # Trigger skip if not supported + + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + with ksft_raises(CmdExitFailure) as e: + _xdp_onoff(cfg) + + +def ioctl(cfg, netnl) -> None: + mode1 = _get_hds_mode(cfg, netnl) + _ioctl_ringparam_modify(cfg, netnl) + mode2 = _get_hds_mode(cfg, netnl) + + ksft_eq(mode1, mode2) + + +def ioctl_set_xdp(cfg, netnl) -> None: + """ + Like set_xdp(), but we perturb the settings via the legacy ioctl. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + _ioctl_ringparam_modify(cfg, netnl) + + _xdp_onoff(cfg) + + +def ioctl_enabled_set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) # Trigger skip if not supported + + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + with ksft_raises(CmdExitFailure) as e: + _xdp_onoff(cfg) + + +def main() -> None: + with NetDrvEnv(__file__, queue_count=3) as cfg: + ksft_run([get_hds, + get_hds_thresh, + set_hds_disable, + set_hds_enable, + set_hds_thresh_random, + set_hds_thresh_zero, + set_hds_thresh_max, + set_hds_thresh_gt, + set_xdp, + enabled_set_xdp, + ioctl, + ioctl_set_xdp, + ioctl_enabled_set_xdp], + args=(cfg, EthtoolFamily())) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore new file mode 100644 index 000000000000..46540468a775 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/.gitignore @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +iou-zcrx +ncdevmem +toeplitz diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile new file mode 100644 index 000000000000..9c163ba6feee --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +# Check if io_uring supports zero-copy receive +HAS_IOURING_ZCRX := $(shell \ + echo -e '#include <liburing.h>\n' \ + 'void *func = (void *)io_uring_register_ifq;\n' \ + 'int main() {return 0;}' | \ + $(CC) -luring -x c - -o /dev/null 2>&1 && echo y) + +ifeq ($(HAS_IOURING_ZCRX),y) +COND_GEN_FILES += iou-zcrx +else +$(warning excluding iouring tests, liburing not installed or too old) +endif + +TEST_GEN_FILES := \ + $(COND_GEN_FILES) \ +# end of TEST_GEN_FILES + +TEST_PROGS = \ + csum.py \ + devlink_port_split.py \ + devlink_rate_tc_bw.py \ + devmem.py \ + ethtool.sh \ + ethtool_extended_state.sh \ + ethtool_mm.sh \ + ethtool_rmon.sh \ + hw_stats_l3.sh \ + hw_stats_l3_gre.sh \ + iou-zcrx.py \ + irq.py \ + loopback.sh \ + nic_timestamp.py \ + pp_alloc_fail.py \ + rss_api.py \ + rss_ctx.py \ + rss_flow_label.py \ + rss_input_xfrm.py \ + toeplitz.py \ + tso.py \ + xsk_reconfig.py \ + # + +TEST_FILES := \ + ethtool_lib.sh \ + # + +TEST_INCLUDES := \ + $(wildcard lib/py/*.py ../lib/py/*.py) \ + ../../../net/lib.sh \ + ../../../net/forwarding/ipip_lib.sh \ + ../../../net/forwarding/lib.sh \ + ../../../net/forwarding/tc_common.sh \ + # + +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := \ + ncdevmem \ + toeplitz \ +# end of YNL_GEN_FILES +TEST_GEN_FILES += $(YNL_GEN_FILES) +TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) + +include ../../../lib.mk + +# YNL build +YNL_GENS := \ + ethtool \ + netdev \ +# end of YNL_GENS + +include ../../../net/ynl.mk + +include ../../../net/bpf.mk + +ifeq ($(HAS_IOURING_ZCRX),y) +$(OUTPUT)/iou-zcrx: LDLIBS += -luring +endif diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config new file mode 100644 index 000000000000..2307aa001be1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/config @@ -0,0 +1,11 @@ +CONFIG_FAIL_FUNCTION=y +CONFIG_FAULT_INJECTION=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FUNCTION_ERROR_INJECTION=y +CONFIG_IO_URING=y +CONFIG_IPV6=y +CONFIG_IPV6_GRE=y +CONFIG_NET_IPGRE=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_UDMABUF=y +CONFIG_VXLAN=y diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py new file mode 100755 index 000000000000..3e3a89a34afe --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Run the tools/testing/selftests/net/csum testsuite.""" + +from os import path + +from lib.py import ksft_run, ksft_exit, KsftSkipEx +from lib.py import EthtoolFamily, NetDrvEpEnv +from lib.py import bkg, cmd, wait_port_listen + +def test_receive(cfg, ipver="6", extra_args=None): + """Test local nic checksum receive. Remote host sends crafted packets.""" + if not cfg.have_rx_csum: + raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}") + + ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}" + + rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" + tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}" + + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(34000, proto="udp") + cmd(tx_cmd, host=cfg.remote) + + +def test_transmit(cfg, ipver="6", extra_args=None): + """Test local nic checksum transmit. Remote host verifies packets.""" + if (not cfg.have_tx_csum_generic and + not (cfg.have_tx_csum_ipv4 and ipver == "4") and + not (cfg.have_tx_csum_ipv6 and ipver == "6")): + raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}") + + ip_args = f"-{ipver} -S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}" + + # Cannot randomize input when calculating zero checksum + if extra_args != "-U -Z": + extra_args += " -r 1" + + rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}" + tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}" + + with bkg(rx_cmd, host=cfg.remote, exit_wait=True): + wait_port_listen(34000, proto="udp", host=cfg.remote) + cmd(tx_cmd) + + +def test_builder(name, cfg, ipver="6", tx=False, extra_args=""): + """Construct specific tests from the common template. + + Most tests follow the same basic pattern, differing only in + Direction of the test and optional flags passed to csum.""" + def f(cfg): + cfg.require_ipver(ipver) + + if tx: + test_transmit(cfg, ipver, extra_args) + else: + test_receive(cfg, ipver, extra_args) + + f.__name__ = f"ipv{ipver}_" + name + return f + + +def check_nic_features(cfg) -> None: + """Test whether Tx and Rx checksum offload are enabled. + + If the device under test has either off, then skip the relevant tests.""" + cfg.have_tx_csum_generic = False + cfg.have_tx_csum_ipv4 = False + cfg.have_tx_csum_ipv6 = False + cfg.have_rx_csum = False + + ethnl = EthtoolFamily() + features = ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + for f in features["active"]["bits"]["bit"]: + if f["name"] == "tx-checksum-ip-generic": + cfg.have_tx_csum_generic = True + elif f["name"] == "tx-checksum-ipv4": + cfg.have_tx_csum_ipv4 = True + elif f["name"] == "tx-checksum-ipv6": + cfg.have_tx_csum_ipv6 = True + elif f["name"] == "rx-checksum": + cfg.have_rx_csum = True + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + check_nic_features(cfg) + + cfg.bin_local = cfg.net_lib_dir / "csum" + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + cases = [] + for ipver in ["4", "6"]: + cases.append(test_builder("rx_tcp", cfg, ipver, False, "-t")) + cases.append(test_builder("rx_tcp_invalid", cfg, ipver, False, "-t -E")) + + cases.append(test_builder("rx_udp", cfg, ipver, False, "")) + cases.append(test_builder("rx_udp_invalid", cfg, ipver, False, "-E")) + + cases.append(test_builder("tx_udp_csum_offload", cfg, ipver, True, "-U")) + cases.append(test_builder("tx_udp_zero_checksum", cfg, ipver, True, "-U -Z")) + + ksft_run(cases=cases, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/devlink_port_split.py b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py new file mode 100755 index 000000000000..2d84c7a0be6b --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from subprocess import PIPE, Popen +import json +import time +import argparse +import collections +import sys + +# +# Test port split configuration using devlink-port lanes attribute. +# The test is skipped in case the attribute is not available. +# +# First, check that all the ports with 1 lane fail to split. +# Second, check that all the ports with more than 1 lane can be split +# to all valid configurations (e.g., split to 2, split to 4 etc.) +# + + +# Kselftest framework requirement - SKIP code is 4 +KSFT_SKIP=4 +Port = collections.namedtuple('Port', 'bus_info name') + + +def run_command(cmd, should_fail=False): + """ + Run a command in subprocess. + Return: Tuple of (stdout, stderr). + """ + + p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) + stdout, stderr = p.communicate() + stdout, stderr = stdout.decode(), stderr.decode() + + if stderr != "" and not should_fail: + print("Error sending command: %s" % cmd) + print(stdout) + print(stderr) + return stdout, stderr + + +class devlink_ports(object): + """ + Class that holds information on the devlink ports, required to the tests; + if_names: A list of interfaces in the devlink ports. + """ + + def get_if_names(dev): + """ + Get a list of physical devlink ports. + Return: Array of tuples (bus_info/port, if_name). + """ + + arr = [] + + cmd = "devlink -j port show" + stdout, stderr = run_command(cmd) + assert stderr == "" + ports = json.loads(stdout)['port'] + + validate_devlink_output(ports, 'flavour') + + for port in ports: + if dev in port: + if ports[port]['flavour'] == 'physical': + arr.append(Port(bus_info=port, name=ports[port]['netdev'])) + + return arr + + def __init__(self, dev): + self.if_names = devlink_ports.get_if_names(dev) + + +def get_max_lanes(port): + """ + Get the $port's maximum number of lanes. + Return: number of lanes, e.g. 1, 2, 4 and 8. + """ + + cmd = "devlink -j port show %s" % port + stdout, stderr = run_command(cmd) + assert stderr == "" + values = list(json.loads(stdout)['port'].values())[0] + + if 'lanes' in values: + lanes = values['lanes'] + else: + lanes = 0 + return lanes + + +def get_split_ability(port): + """ + Get the $port split ability. + Return: split ability, true or false. + """ + + cmd = "devlink -j port show %s" % port.name + stdout, stderr = run_command(cmd) + assert stderr == "" + values = list(json.loads(stdout)['port'].values())[0] + + return values['splittable'] + + +def split(k, port, should_fail=False): + """ + Split $port into $k ports. + If should_fail == True, the split should fail. Otherwise, should pass. + Return: Array of sub ports after splitting. + If the $port wasn't split, the array will be empty. + """ + + cmd = "devlink port split %s count %s" % (port.bus_info, k) + stdout, stderr = run_command(cmd, should_fail=should_fail) + + if should_fail: + if not test(stderr != "", "%s is unsplittable" % port.name): + print("split an unsplittable port %s" % port.name) + return create_split_group(port, k) + else: + if stderr == "": + return create_split_group(port, k) + print("didn't split a splittable port %s" % port.name) + + return [] + + +def unsplit(port): + """ + Unsplit $port. + """ + + cmd = "devlink port unsplit %s" % port + stdout, stderr = run_command(cmd) + test(stderr == "", "Unsplit port %s" % port) + + +def exists(port, dev): + """ + Check if $port exists in the devlink ports. + Return: True is so, False otherwise. + """ + + return any(dev_port.name == port + for dev_port in devlink_ports.get_if_names(dev)) + + +def exists_and_lanes(ports, lanes, dev): + """ + Check if every port in the list $ports exists in the devlink ports and has + $lanes number of lanes after splitting. + Return: True if both are True, False otherwise. + """ + + for port in ports: + max_lanes = get_max_lanes(port) + if not exists(port, dev): + print("port %s doesn't exist in devlink ports" % port) + return False + if max_lanes != lanes: + print("port %s has %d lanes, but %s were expected" + % (port, lanes, max_lanes)) + return False + return True + + +def test(cond, msg): + """ + Check $cond and print a message accordingly. + Return: True is pass, False otherwise. + """ + + if cond: + print("TEST: %-60s [ OK ]" % msg) + else: + print("TEST: %-60s [FAIL]" % msg) + + return cond + + +def create_split_group(port, k): + """ + Create the split group for $port. + Return: Array with $k elements, which are the split port group. + """ + + return list(port.name + "s" + str(i) for i in range(k)) + + +def split_unsplittable_port(port, k): + """ + Test that splitting of unsplittable port fails. + """ + + # split to max + new_split_group = split(k, port, should_fail=True) + + if new_split_group != []: + unsplit(port.bus_info) + + +def split_splittable_port(port, k, lanes, dev): + """ + Test that splitting of splittable port passes correctly. + """ + + new_split_group = split(k, port) + + # Once the split command ends, it takes some time to the sub ifaces' + # to get their names. Use udevadm to continue only when all current udev + # events are handled. + cmd = "udevadm settle" + stdout, stderr = run_command(cmd) + assert stderr == "" + + if new_split_group != []: + test(exists_and_lanes(new_split_group, lanes/k, dev), + "split port %s into %s" % (port.name, k)) + + unsplit(port.bus_info) + + +def validate_devlink_output(devlink_data, target_property=None): + """ + Determine if test should be skipped by checking: + 1. devlink_data contains values + 2. The target_property exist in devlink_data + """ + skip_reason = None + if any(devlink_data.values()): + if target_property: + skip_reason = "{} not found in devlink output, test skipped".format(target_property) + for key in devlink_data: + if target_property in devlink_data[key]: + skip_reason = None + else: + skip_reason = 'devlink output is empty, test skipped' + + if skip_reason: + print(skip_reason) + sys.exit(KSFT_SKIP) + + +def make_parser(): + parser = argparse.ArgumentParser(description='A test for port splitting.') + parser.add_argument('--dev', + help='The devlink handle of the device under test. ' + + 'The default is the first registered devlink ' + + 'handle.') + + return parser + + +def main(cmdline=None): + parser = make_parser() + args = parser.parse_args(cmdline) + + dev = args.dev + if not dev: + cmd = "devlink -j dev show" + stdout, stderr = run_command(cmd) + assert stderr == "" + + validate_devlink_output(json.loads(stdout)) + devs = json.loads(stdout)['dev'] + dev = list(devs.keys())[0] + + cmd = "devlink dev show %s" % dev + stdout, stderr = run_command(cmd) + if stderr != "": + print("devlink device %s can not be found" % dev) + sys.exit(1) + + ports = devlink_ports(dev) + + found_max_lanes = False + for port in ports.if_names: + max_lanes = get_max_lanes(port.name) + + # If max lanes is 0, do not test port splitting at all + if max_lanes == 0: + continue + + # If 1 lane, shouldn't be able to split + elif max_lanes == 1: + test(not get_split_ability(port), + "%s should not be able to split" % port.name) + split_unsplittable_port(port, max_lanes) + + # Else, splitting should pass and all the split ports should exist. + else: + lane = max_lanes + test(get_split_ability(port), + "%s should be able to split" % port.name) + while lane > 1: + split_splittable_port(port, lane, max_lanes, dev) + + lane //= 2 + found_max_lanes = True + + if not found_max_lanes: + print(f"Test not started, no port of device {dev} reports max_lanes") + sys.exit(KSFT_SKIP) + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py new file mode 100755 index 000000000000..4e4faa9275bb --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py @@ -0,0 +1,439 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Devlink Rate TC Bandwidth Test Suite +=================================== + +This test suite verifies the functionality of devlink-rate traffic class (TC) +bandwidth distribution in a virtualized environment. The tests validate that +bandwidth can be properly allocated between different traffic classes and +that TC mapping works as expected. + +Test Environment: +---------------- +- Creates 1 VF +- Establishes a bridge connecting the VF representor and the uplink representor +- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102) +- Configures different traffic classes (TC3 and TC4) for each VLAN + +Test Cases: +---------- +1. test_no_tc_mapping_bandwidth: + - Verifies that without TC mapping, bandwidth is NOT distributed according to + the configured 20/80 split between TC3 and TC4 + - This test should fail if bandwidth matches the 20/80 split without TC + mapping + - Expected: Bandwidth should NOT be distributed as 20/80 + +2. test_tc_mapping_bandwidth: + - Configures TC mapping using mqprio qdisc + - Verifies that with TC mapping, bandwidth IS distributed according to the + configured 20/80 split between TC3 and TC4 + - Expected: Bandwidth should be distributed as 20/80 + +Bandwidth Distribution: +---------------------- +- TC3 (VLAN 101): Configured for 20% of total bandwidth +- TC4 (VLAN 102): Configured for 80% of total bandwidth +- Total bandwidth: 1Gbps +- Tolerance: +-12% + +Hardware-Specific Behavior (mlx5): +-------------------------- +mlx5 hardware enforces traffic class separation by ensuring that each transmit +queue (SQ) is associated with a single TC. If a packet is sent on a queue that +doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set +mapping), the hardware moves the queue to the correct TC scheduler to preserve +traffic isolation. + +This behavior means that even without explicit TC-to-queue mapping, bandwidth +enforcement may still appear to work—because the hardware dynamically adjusts +the scheduling context. However, this can lead to performance issues in high +rates and HOL blocking if traffic from different TCs is mixed on the same queue. +""" + +import json +import os +import subprocess +import threading +import time + +from lib.py import ksft_pr, ksft_run, ksft_exit +from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx +from lib.py import NetDrvEpEnv, DevlinkFamily +from lib.py import NlError +from lib.py import cmd, defer, ethtool, ip +from lib.py import Iperf3Runner + + +class BandwidthValidator: + """ + Validates total bandwidth and individual shares with tolerance + relative to the overall total. + """ + + def __init__(self, shares): + self.tolerance_percent = 12 + self.expected_total = sum(shares.values()) + self.bounds = {} + + for name, exp in shares.items(): + self.bounds[name] = (self.min_expected(exp), self.max_expected(exp)) + + def min_expected(self, value): + """Calculates the minimum acceptable value based on tolerance.""" + return value - (self.expected_total * self.tolerance_percent / 100) + + def max_expected(self, value): + """Calculates the maximum acceptable value based on tolerance.""" + return value + (self.expected_total * self.tolerance_percent / 100) + + def bound(self, values): + """ + Return True if all given values fall within tolerance. + """ + for name, value in values.items(): + low, high = self.bounds[name] + if not low <= value <= high: + return False + return True + + +def setup_vf(cfg, set_tc_mapping=True): + """ + Sets up a VF on the given network interface. + + Enables SR-IOV and switchdev mode, brings the VF interface up, + and optionally configures TC mapping using mqprio. + """ + try: + cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev") + defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy") + except Exception as exc: + raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc + try: + cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True) + defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True) + except Exception as exc: + raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc + + time.sleep(2) + vf_ifc = (os.listdir( + f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0] + if vf_ifc: + ip(f"link set dev {vf_ifc} up") + else: + raise KsftSkipEx("VF interface not found") + if set_tc_mapping: + cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8") + + return vf_ifc + + +def setup_vlans_on_vf(vf_ifc): + """ + Sets up two VLAN interfaces on the given VF, each mapped to a different TC. + """ + vlan_configs = [ + {"vlan_id": 101, "tc": 3, "ip": "198.51.100.1"}, + {"vlan_id": 102, "tc": 4, "ip": "198.51.100.9"}, + ] + + for config in vlan_configs: + vlan_dev = f"{vf_ifc}.{config['vlan_id']}" + ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}") + ip(f"addr add {config['ip']}/29 dev {vlan_dev}") + ip(f"link set dev {vlan_dev} up") + ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}") + ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}") + + +def get_vf_info(cfg): + """ + Finds the VF representor interface and devlink port index + for the given PCI device used in the test environment. + """ + cfg.vf_representor = None + cfg.vf_port_index = None + out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8") + ports = json.loads(out)["port"] + + for port_name, props in ports.items(): + netdev = props.get("netdev") + + if (port_name.startswith(f"pci/{cfg.pci}/") and + props.get("vfnum") == 0): + cfg.vf_representor = netdev + cfg.vf_port_index = int(port_name.split("/")[-1]) + break + + +def setup_bridge(cfg): + """ + Creates and configures a Linux bridge, with both the uplink + and VF representor interfaces attached to it. + """ + bridge_name = f"br_{os.getpid()}" + ip(f"link add name {bridge_name} type bridge") + defer(cmd, f"ip link del name {bridge_name} type bridge") + + ip(f"link set dev {cfg.ifname} master {bridge_name}") + + rep_name = cfg.vf_representor + if rep_name: + ip(f"link set dev {rep_name} master {bridge_name}") + ip(f"link set dev {rep_name} up") + ksft_pr(f"Set representor {rep_name} up and added to bridge") + else: + raise KsftSkipEx("Could not find representor for the VF") + + ip(f"link set dev {bridge_name} up") + + +def setup_devlink_rate(cfg): + """ + Configures devlink rate tx_max and traffic class bandwidth for the VF. + """ + port_index = cfg.vf_port_index + if port_index is None: + raise KsftSkipEx("Could not find VF port index") + try: + cfg.devnl.rate_set({ + "bus-name": "pci", + "dev-name": cfg.pci, + "port-index": port_index, + "rate-tx-max": 125000000, + "rate-tc-bws": [ + {"index": 0, "bw": 0}, + {"index": 1, "bw": 0}, + {"index": 2, "bw": 0}, + {"index": 3, "bw": 20}, + {"index": 4, "bw": 80}, + {"index": 5, "bw": 0}, + {"index": 6, "bw": 0}, + {"index": 7, "bw": 0}, + ] + }) + except NlError as exc: + if exc.error == 95: # EOPNOTSUPP + raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc + raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc + + +def setup_remote_vlans(cfg): + """ + Sets up VLAN interfaces on the remote side. + """ + remote_dev = cfg.remote_ifname + vlan_ids = [101, 102] + remote_ips = ["198.51.100.2", "198.51.100.10"] + + for vlan_id, ip_addr in zip(vlan_ids, remote_ips): + vlan_dev = f"{remote_dev}.{vlan_id}" + cmd(f"ip link add link {remote_dev} name {vlan_dev} " + f"type vlan id {vlan_id}", host=cfg.remote) + cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote) + cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote) + defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote) + + +def setup_test_environment(cfg, set_tc_mapping=True): + """ + Sets up the complete test environment including VF creation, VLANs, + bridge configuration and devlink rate setup. + """ + vf_ifc = setup_vf(cfg, set_tc_mapping) + ksft_pr(f"Created VF interface: {vf_ifc}") + + setup_vlans_on_vf(vf_ifc) + + get_vf_info(cfg) + setup_bridge(cfg) + + setup_devlink_rate(cfg) + setup_remote_vlans(cfg) + + +def measure_bandwidth(cfg, server_ip, client_ip, barrier): + """ + Synchronizes with peers and runs an iperf3-based bandwidth measurement + between the given endpoints. Returns average Gbps. + """ + runner = Iperf3Runner(cfg, server_ip=server_ip, client_ip=client_ip) + try: + barrier.wait(timeout=10) + except Exception as exc: + raise KsftFailEx("iperf3 barrier wait timed") from exc + + try: + bw_gbps = runner.measure_bandwidth(reverse=True) + except Exception as exc: + raise KsftFailEx("iperf3 bandwidth measurement failed") from exc + + return bw_gbps + + +def run_bandwidth_test(cfg): + """ + Runs parallel bandwidth measurements for each VLAN/TC pair and collects results. + """ + def _run_measure_bandwidth_thread(local_ip, remote_ip, results, barrier, tc_ix): + results[tc_ix] = measure_bandwidth(cfg, local_ip, remote_ip, barrier) + + vf_vlan_data = [ + # (local_ip, remote_ip, TC) + ("198.51.100.1", "198.51.100.2", 3), + ("198.51.100.9", "198.51.100.10", 4), + ] + + results = {} + threads = [] + start_barrier = threading.Barrier(len(vf_vlan_data)) + + for local_ip, remote_ip, tc_ix in vf_vlan_data: + thread = threading.Thread( + target=_run_measure_bandwidth_thread, + args=(local_ip, remote_ip, results, start_barrier, tc_ix) + ) + thread.start() + threads.append(thread) + + for thread in threads: + thread.join() + + for tc_ix, tc_bw in results.items(): + if tc_bw is None: + raise KsftFailEx("iperf3 failed; cannot evaluate bandwidth") + + return results + + +def calculate_bandwidth_percentages(results): + """ + Calculates the percentage of total bandwidth received by TC3 and TC4. + """ + if 3 not in results or 4 not in results: + raise KsftFailEx(f"Missing expected TC results in {results}") + + tc3_bw = results[3] + tc4_bw = results[4] + total_bw = tc3_bw + tc4_bw + tc3_percentage = (tc3_bw / total_bw) * 100 + tc4_percentage = (tc4_bw / total_bw) * 100 + + return { + 'tc3_bw': tc3_bw, + 'tc4_bw': tc4_bw, + 'tc3_percentage': tc3_percentage, + 'tc4_percentage': tc4_percentage, + 'total_bw': total_bw + } + + +def print_bandwidth_results(bw_data, test_name): + """ + Prints bandwidth measurements and TC usage summary for a given test. + """ + ksft_pr(f"Bandwidth check results {test_name}:") + ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec") + ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec") + ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec") + ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%") + ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%") + + +def verify_total_bandwidth(bw_data, validator): + """ + Ensures the total measured bandwidth falls within the acceptable tolerance. + """ + total = bw_data['total_bw'] + + if validator.bound({"total": total}): + return + + low, high = validator.bounds["total"] + + if total < low: + raise KsftSkipEx( + f"Total bandwidth {total:.2f} Gbps < minimum " + f"{low:.2f} Gbps; " + f"parent tx_max ({validator.expected_total:.1f} G) " + f"not reached, cannot validate share" + ) + + raise KsftFailEx( + f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling " + f"{high:.2f} Gbps " + f"(VF tx_max set to {validator.expected_total:.1f} G)" + ) + + +def run_bandwidth_distribution_test(cfg, set_tc_mapping): + """ + Runs parallel bandwidth measurements for both TCs and collects results. + """ + setup_test_environment(cfg, set_tc_mapping) + bandwidths = run_bandwidth_test(cfg) + bw_data = calculate_bandwidth_percentages(bandwidths) + test_name = "with TC mapping" if set_tc_mapping else "without TC mapping" + print_bandwidth_results(bw_data, test_name) + + verify_total_bandwidth(bw_data, cfg.traffic_bw_validator) + + return cfg.tc_bw_validator.bound({"tc3": bw_data['tc3_percentage'], + "tc4": bw_data['tc4_percentage']}) + + +def test_no_tc_mapping_bandwidth(cfg): + """ + Verifies that bandwidth is not split 20/80 without traffic class mapping. + """ + pass_bw_msg = "Bandwidth is NOT distributed as 20/80 without TC mapping" + fail_bw_msg = "Bandwidth matched 20/80 split without TC mapping" + is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout + + if run_bandwidth_distribution_test(cfg, set_tc_mapping=False): + if is_mlx5: + raise KsftXfailEx(fail_bw_msg) + raise KsftFailEx(fail_bw_msg) + if is_mlx5: + raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg) + ksft_pr(pass_bw_msg) + + +def test_tc_mapping_bandwidth(cfg): + """ + Verifies that bandwidth is correctly split 20/80 between TC3 and TC4 + when traffic class mapping is set. + """ + if run_bandwidth_distribution_test(cfg, set_tc_mapping=True): + ksft_pr("Bandwidth is distributed as 20/80 with TC mapping") + else: + raise KsftFailEx("Bandwidth did not match 20/80 split with TC mapping") + + +def main() -> None: + """ + Main entry point for running the test cases. + """ + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.devnl = DevlinkFamily() + + cfg.pci = os.path.basename( + os.path.realpath(f"/sys/class/net/{cfg.ifname}/device") + ) + if not cfg.pci: + raise KsftSkipEx("Could not get PCI address of the interface") + + cfg.traffic_bw_validator = BandwidthValidator({"total": 1}) + cfg.tc_bw_validator = BandwidthValidator({"tc3": 20, "tc4": 80}) + + cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth] + + ksft_run(cases=cases, args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py new file mode 100755 index 000000000000..45c2d49d55b6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from os import path +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, KsftSkipEx +from lib.py import NetDrvEpEnv +from lib.py import bkg, cmd, rand_port, wait_port_listen +from lib.py import ksft_disruptive + + +def require_devmem(cfg): + if not hasattr(cfg, "_devmem_probed"): + probe_command = f"{cfg.bin_local} -f {cfg.ifname}" + cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0 + cfg._devmem_probed = True + + if not cfg._devmem_supported: + raise KsftSkipEx("Test requires devmem support") + + +@ksft_disruptive +def check_rx(cfg) -> None: + require_devmem(cfg) + + port = rand_port() + socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}" + listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7" + + with bkg(listen_cmd, exit_wait=True) as ncdevmem: + wait_port_listen(port) + cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \ + head -c 1K | {socat}", host=cfg.remote, shell=True) + + ksft_eq(ncdevmem.ret, 0) + + +@ksft_disruptive +def check_tx(cfg) -> None: + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" + + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True) + + ksft_eq(socat.stdout.strip(), "hello\nworld") + + +@ksft_disruptive +def check_tx_chunks(cfg) -> None: + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" + + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True) + + ksft_eq(socat.stdout.strip(), "hello\nworld") + + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem") + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + ksft_run([check_rx, check_tx, check_tx_chunks], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/ethtool.sh b/tools/testing/selftests/drivers/net/hw/ethtool.sh new file mode 100755 index 000000000000..fa6953de6b6d --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ethtool.sh @@ -0,0 +1,297 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + same_speeds_autoneg_off + different_speeds_autoneg_off + combination_of_neg_on_and_off + advertise_subset_of_speeds + check_highest_speed_is_chosen + different_speeds_autoneg_on +" +NUM_NETIFS=2 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh +source ethtool_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy +} + +same_speeds_autoneg_off() +{ + # Check that when each of the reported speeds is forced, the links come + # up and are operational. + local -a speeds_arr=($(common_speeds_get $h1 $h2 0 0)) + + for speed in "${speeds_arr[@]}"; do + RET=0 + ethtool_set $h1 speed $speed autoneg off + ethtool_set $h2 speed $speed autoneg off + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_err $? "ping with speed $speed autoneg off" + log_test "force speed $speed on both ends" + done + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +different_speeds_autoneg_off() +{ + # Test that when we force different speeds, links are not up and ping + # fails. + RET=0 + + local -a speeds_arr=($(different_speeds_get $h1 $h2 0 0)) + local speed1=${speeds_arr[0]} + local speed2=${speeds_arr[1]} + + ethtool_set $h1 speed $speed1 autoneg off + ethtool_set $h2 speed $speed2 autoneg off + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_fail $? "ping with different speeds" + + log_test "force of different speeds autoneg off" + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +combination_of_neg_on_and_off() +{ + # Test that when one device is forced to a speed supported by both + # endpoints and the other device is configured to autoneg on, the links + # are up and ping passes. + local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1)) + + for speed in "${speeds_arr[@]}"; do + RET=0 + ethtool_set $h1 speed $speed autoneg off + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_err $? "ping with h1-speed=$speed autoneg off, h2 autoneg on" + log_test "force speed $speed vs. autoneg" + done + + ethtool -s $h1 autoneg on +} + +hex_speed_value_get() +{ + local speed=$1; shift + + local shift_size=${speed_values[$speed]} + speed=$((0x1 << $"shift_size")) + printf "%#x" "$speed" +} + +subset_of_common_speeds_get() +{ + local dev1=$1; shift + local dev2=$1; shift + local adver=$1; shift + + local -a speeds_arr=($(common_speeds_get $dev1 $dev2 0 $adver)) + local speed_to_advertise=0 + local speed_to_remove=${speeds_arr[0]} + speed_to_remove+='base' + + local -a speeds_mode_arr=($(common_speeds_get $dev1 $dev2 1 $adver)) + + for speed in ${speeds_mode_arr[@]}; do + if [[ $speed != $speed_to_remove* ]]; then + speed=$(hex_speed_value_get $speed) + speed_to_advertise=$(($speed_to_advertise | \ + $speed)) + fi + + done + + # Convert to hex. + printf "%#x" "$speed_to_advertise" +} + +speed_to_advertise_get() +{ + # The function returns the hex number that is composed by OR-ing all + # the modes corresponding to the provided speed. + local speed_without_mode=$1; shift + local supported_speeds=("$@"); shift + local speed_to_advertise=0 + + speed_without_mode+='base' + + for speed in ${supported_speeds[@]}; do + if [[ $speed == $speed_without_mode* ]]; then + speed=$(hex_speed_value_get $speed) + speed_to_advertise=$(($speed_to_advertise | \ + $speed)) + fi + + done + + # Convert to hex. + printf "%#x" "$speed_to_advertise" +} + +advertise_subset_of_speeds() +{ + # Test that when one device advertises a subset of speeds and another + # advertises a specific speed (but all modes of this speed), the links + # are up and ping passes. + RET=0 + + local speed_1_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1) + ethtool_set $h1 advertise $speed_1_to_advertise + + if [ $RET != 0 ]; then + log_test "advertise subset of speeds" + return + fi + + local -a speeds_arr_without_mode=($(common_speeds_get $h1 $h2 0 1)) + # Check only speeds that h1 advertised. Remove the first speed. + unset speeds_arr_without_mode[0] + local -a speeds_arr_with_mode=($(common_speeds_get $h1 $h2 1 1)) + + for speed_value in ${speeds_arr_without_mode[@]}; do + RET=0 + local speed_2_to_advertise=$(speed_to_advertise_get $speed_value \ + "${speeds_arr_with_mode[@]}") + ethtool_set $h2 advertise $speed_2_to_advertise + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_err $? "ping with h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)" + + log_test "advertise $speed_1_to_advertise vs. $speed_2_to_advertise" + done + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +check_highest_speed_is_chosen() +{ + # Test that when one device advertises a subset of speeds, the other + # chooses the highest speed. This test checks configuration without + # traffic. + RET=0 + + local max_speed + local chosen_speed + local speed_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1) + + ethtool_set $h1 advertise $speed_to_advertise + + if [ $RET != 0 ]; then + log_test "check highest speed" + return + fi + + local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1)) + + max_speed=${speeds_arr[0]} + for current in ${speeds_arr[@]}; do + if [[ $current -gt $max_speed ]]; then + max_speed=$current + fi + done + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + chosen_speed=$(ethtool $h1 | grep 'Speed:') + chosen_speed=${chosen_speed%"Mb/s"*} + chosen_speed=${chosen_speed#*"Speed: "} + ((chosen_speed == max_speed)) + check_err $? "h1 advertise $speed_to_advertise, h2 sync to speed $chosen_speed" + + log_test "check highest speed" + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +different_speeds_autoneg_on() +{ + # Test that when we configure links to advertise different speeds, + # links are not up and ping fails. + RET=0 + + local -a speeds=($(different_speeds_get $h1 $h2 1 1)) + local speed1=${speeds[0]} + local speed2=${speeds[1]} + + speed1=$(hex_speed_value_get $speed1) + speed2=$(hex_speed_value_get $speed2) + + ethtool_set $h1 advertise $speed1 + ethtool_set $h2 advertise $speed2 + + if (($RET)); then + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_fail $? "ping with different speeds autoneg on" + fi + + log_test "advertise different speeds autoneg on" + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +declare -gA speed_values +eval "speed_values=($(speeds_arr_get))" + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh new file mode 100755 index 000000000000..a7584448416e --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh @@ -0,0 +1,116 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + autoneg + autoneg_force_mode + no_cable +" + +NUM_NETIFS=2 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh +source ethtool_lib.sh + +TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + swp3=$NETIF_NO_CABLE +} + +ethtool_ext_state() +{ + local dev=$1; shift + local expected_ext_state=$1; shift + local expected_ext_substate=${1:-""}; shift + + local ext_state=$(ethtool $dev | grep "Link detected" \ + | cut -d "(" -f2 | cut -d ")" -f1) + local ext_substate=$(echo $ext_state | cut -sd "," -f2 \ + | sed -e 's/^[[:space:]]*//') + ext_state=$(echo $ext_state | cut -d "," -f1) + + if [[ $ext_state != $expected_ext_state ]]; then + echo "Expected \"$expected_ext_state\", got \"$ext_state\"" + return 1 + fi + if [[ $ext_substate != $expected_ext_substate ]]; then + echo "Expected \"$expected_ext_substate\", got \"$ext_substate\"" + return 1 + fi +} + +autoneg() +{ + local msg + + RET=0 + + ip link set dev $swp1 up + + msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ + "Autoneg" "No partner detected") + check_err $? "$msg" + + log_test "Autoneg, No partner detected" + + ip link set dev $swp1 down +} + +autoneg_force_mode() +{ + local msg + + RET=0 + + ip link set dev $swp1 up + ip link set dev $swp2 up + + local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0)) + local speed1=${speeds_arr[0]} + local speed2=${speeds_arr[1]} + + ethtool_set $swp1 speed $speed1 autoneg off + ethtool_set $swp2 speed $speed2 autoneg off + + msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ + "Autoneg" "No partner detected during force mode") + check_err $? "$msg" + + msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \ + "Autoneg" "No partner detected during force mode") + check_err $? "$msg" + + log_test "Autoneg, No partner detected during force mode" + + ethtool -s $swp2 autoneg on + ethtool -s $swp1 autoneg on + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +no_cable() +{ + local msg + + RET=0 + + ip link set dev $swp3 up + + msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable") + check_err $? "$msg" + + log_test "No cable" + + ip link set dev $swp3 down +} + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh new file mode 100644 index 000000000000..b9bfb45085af --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +speeds_arr_get() +{ + cmd='/ETHTOOL_LINK_MODE_[^[:space:]]*_BIT[[:space:]]+=[[:space:]]+/ \ + {sub(/,$/, "") \ + sub(/ETHTOOL_LINK_MODE_/,"") \ + sub(/_BIT/,"") \ + sub(/_Full/,"/Full") \ + sub(/_Half/,"/Half");\ + print "["$1"]="$3}' + + awk "${cmd}" /usr/include/linux/ethtool.h +} + +ethtool_set() +{ + local cmd="$@" + local out=$(ethtool -s $cmd 2>&1 | wc -l) + + check_err $out "error in configuration. $cmd" +} + +dev_linkmodes_params_get() +{ + local dev=$1; shift + local adver=$1; shift + local -a linkmodes_params + local param_count + local arr + + if (($adver)); then + mode="Advertised link modes" + else + mode="Supported link modes" + fi + + local -a dev_linkmodes=($(dev_speeds_get $dev 1 $adver)) + for ((i=0; i<${#dev_linkmodes[@]}; i++)); do + linkmodes_params[$i]=$(echo -e "${dev_linkmodes[$i]}" | \ + # Replaces all non numbers with spaces + sed -e 's/[^0-9]/ /g' | \ + # Squeeze spaces in sequence to 1 space + tr -s ' ') + # Count how many numbers were found in the linkmode + param_count=$(echo "${linkmodes_params[$i]}" | wc -w) + if [[ $param_count -eq 1 ]]; then + linkmodes_params[$i]="${linkmodes_params[$i]} 1" + elif [[ $param_count -ge 3 ]]; then + arr=(${linkmodes_params[$i]}) + # Take only first two params + linkmodes_params[$i]=$(echo "${arr[@]:0:2}") + fi + done + echo ${linkmodes_params[@]} +} + +dev_speeds_get() +{ + local dev=$1; shift + local with_mode=$1; shift + local adver=$1; shift + local speeds_str + + if (($adver)); then + mode="Advertised link modes" + else + mode="Supported link modes" + fi + + speeds_str=$(ethtool "$dev" | \ + # Snip everything before the link modes section. + sed -n '/'"$mode"':/,$p' | \ + # Quit processing the rest at the start of the next section. + # When checking, skip the header of this section (hence the 2,). + sed -n '2,${/^[\t][^ \t]/q};p' | \ + # Drop the section header of the current section. + cut -d':' -f2) + + local -a speeds_arr=($speeds_str) + if [[ $with_mode -eq 0 ]]; then + for ((i=0; i<${#speeds_arr[@]}; i++)); do + speeds_arr[$i]=${speeds_arr[$i]%base*} + done + fi + echo ${speeds_arr[@]} +} + +common_speeds_get() +{ + dev1=$1; shift + dev2=$1; shift + with_mode=$1; shift + adver=$1; shift + + local -a dev1_speeds=($(dev_speeds_get $dev1 $with_mode $adver)) + local -a dev2_speeds=($(dev_speeds_get $dev2 $with_mode $adver)) + + comm -12 \ + <(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \ + <(printf '%s\n' "${dev2_speeds[@]}" | sort -u) +} + +different_speeds_get() +{ + local dev1=$1; shift + local dev2=$1; shift + local with_mode=$1; shift + local adver=$1; shift + + local -a speeds_arr + + speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver)) + if [[ ${#speeds_arr[@]} < 2 ]]; then + check_err 1 "cannot check different speeds. There are not enough speeds" + fi + + echo ${speeds_arr[0]} ${speeds_arr[1]} +} diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh new file mode 100755 index 000000000000..c301e735c8ab --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh @@ -0,0 +1,341 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + manual_with_verification_h1_to_h2 + manual_with_verification_h2_to_h1 + manual_without_verification_h1_to_h2 + manual_without_verification_h2_to_h1 + manual_failed_verification_h1_to_h2 + manual_failed_verification_h2_to_h1 + lldp +" + +NUM_NETIFS=2 +REQUIRE_MZ=no +PREEMPTIBLE_PRIO=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +traffic_test() +{ + local if=$1; shift + local src=$1; shift + local num_pkts=10000 + local before= + local after= + local delta= + + if [ ${has_pmac_stats[$if]} = false ]; then + src="aggregate" + fi + + before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src) + + $MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO + + after=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src) + + delta=$((after - before)) + + # Allow an extra 1% tolerance for random packets sent by the stack + [ $delta -ge $num_pkts ] && [ $delta -le $((num_pkts + 100)) ] +} + +manual_with_verification() +{ + local tx=$1; shift + local rx=$1; shift + + RET=0 + + # It isn't completely clear from IEEE 802.3-2018 Figure 99-5: Transmit + # Processing state diagram whether the "send_r" variable (send response + # to verification frame) should be taken into consideration while the + # MAC Merge TX direction is disabled. That being said, at least the + # NXP ENETC does not, and requires tx-enabled on in order to respond to + # the link partner's verification frames. + ethtool --set-mm $rx tx-enabled on + ethtool --set-mm $tx verify-enabled on tx-enabled on + + # Wait for verification to finish + sleep 1 + + ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ + grep -q 'SUCCEEDED' + check_err "$?" "Verification did not succeed" + + ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' + check_err "$?" "pMAC TX is not active" + + traffic_test $tx "pmac" + check_err "$?" "Traffic did not get sent through $tx's pMAC" + + ethtool --set-mm $tx verify-enabled off tx-enabled off + ethtool --set-mm $rx tx-enabled off + + log_test "Manual configuration with verification: $tx to $rx" +} + +manual_with_verification_h1_to_h2() +{ + manual_with_verification $h1 $h2 +} + +manual_with_verification_h2_to_h1() +{ + manual_with_verification $h2 $h1 +} + +manual_without_verification() +{ + local tx=$1; shift + local rx=$1; shift + + RET=0 + + ethtool --set-mm $tx verify-enabled off tx-enabled on + + ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ + grep -q 'DISABLED' + check_err "$?" "Verification is not disabled" + + ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' + check_err "$?" "pMAC TX is not active" + + traffic_test $tx "pmac" + check_err "$?" "Traffic did not get sent through $tx's pMAC" + + ethtool --set-mm $tx verify-enabled off tx-enabled off + + log_test "Manual configuration without verification: $tx to $rx" +} + +manual_without_verification_h1_to_h2() +{ + manual_without_verification $h1 $h2 +} + +manual_without_verification_h2_to_h1() +{ + manual_without_verification $h2 $h1 +} + +manual_failed_verification() +{ + local tx=$1; shift + local rx=$1; shift + + RET=0 + + ethtool --set-mm $rx pmac-enabled off + ethtool --set-mm $tx verify-enabled on tx-enabled on + + # Wait for verification to time out + sleep 1 + + ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ + grep -q 'SUCCEEDED' + check_fail "$?" "Verification succeeded when it shouldn't have" + + ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' + check_fail "$?" "pMAC TX is active when it shouldn't have" + + traffic_test $tx "emac" + check_err "$?" "Traffic did not get sent through $tx's eMAC" + + ethtool --set-mm $tx verify-enabled off tx-enabled off + ethtool --set-mm $rx pmac-enabled on + + log_test "Manual configuration with failed verification: $tx to $rx" +} + +manual_failed_verification_h1_to_h2() +{ + manual_failed_verification $h1 $h2 +} + +manual_failed_verification_h2_to_h1() +{ + manual_failed_verification $h2 $h1 +} + +smallest_supported_add_frag_size() +{ + local iface=$1 + local rx_min_frag_size= + + rx_min_frag_size=$(ethtool --json --show-mm $iface | \ + jq '.[]."rx-min-frag-size"') + + if [ $rx_min_frag_size -le 60 ]; then + echo 0 + elif [ $rx_min_frag_size -le 124 ]; then + echo 1 + elif [ $rx_min_frag_size -le 188 ]; then + echo 2 + elif [ $rx_min_frag_size -le 252 ]; then + echo 3 + else + echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP" + exit 1 + fi +} + +expected_add_frag_size() +{ + local iface=$1 + local requested=$2 + local min=$(smallest_supported_add_frag_size $iface) + + [ $requested -le $min ] && echo $min || echo $requested +} + +lldp_change_add_frag_size() +{ + local add_frag_size=$1 + local pattern= + + lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null + # Wait for TLVs to be received + sleep 2 + pattern=$(printf "Additional fragment size: %d" \ + $(expected_add_frag_size $h1 $add_frag_size)) + lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern" +} + +lldp() +{ + RET=0 + + systemctl start lldpad + + # Configure the interfaces to receive and transmit LLDPDUs + lldptool -L -i $h1 adminStatus=rxtx >/dev/null + lldptool -L -i $h2 adminStatus=rxtx >/dev/null + + # Enable the transmission of Additional Ethernet Capabilities TLV + lldptool -T -i $h1 -V addEthCaps enableTx=yes >/dev/null + lldptool -T -i $h2 -V addEthCaps enableTx=yes >/dev/null + + # Wait for TLVs to be received + sleep 2 + + lldptool -i $h1 -t -n -V addEthCaps | \ + grep -q "Preemption capability active" + check_err "$?" "$h1 pMAC TX is not active" + + lldptool -i $h2 -t -n -V addEthCaps | \ + grep -q "Preemption capability active" + check_err "$?" "$h2 pMAC TX is not active" + + lldp_change_add_frag_size 3 + check_err "$?" "addFragSize 3" + + lldp_change_add_frag_size 2 + check_err "$?" "addFragSize 2" + + lldp_change_add_frag_size 1 + check_err "$?" "addFragSize 1" + + lldp_change_add_frag_size 0 + check_err "$?" "addFragSize 0" + + traffic_test $h1 "pmac" + check_err "$?" "Traffic did not get sent through $h1's pMAC" + + traffic_test $h2 "pmac" + check_err "$?" "Traffic did not get sent through $h2's pMAC" + + systemctl stop lldpad + + log_test "LLDP" +} + +h1_create() +{ + ip link set dev $h1 up + + tc qdisc add dev $h1 root mqprio num_tc 4 map 0 1 2 3 \ + queues 1@0 1@1 1@2 1@3 \ + fp P E E E \ + hw 1 + + ethtool --set-mm $h1 pmac-enabled on tx-enabled off verify-enabled off +} + +h2_create() +{ + ip link set dev $h2 up + + ethtool --set-mm $h2 pmac-enabled on tx-enabled off verify-enabled off + + tc qdisc add dev $h2 root mqprio num_tc 4 map 0 1 2 3 \ + queues 1@0 1@1 1@2 1@3 \ + fp P E E E \ + hw 1 +} + +h1_destroy() +{ + ethtool --set-mm $h1 pmac-enabled off tx-enabled off verify-enabled off + + tc qdisc del dev $h1 root + + ip link set dev $h1 down +} + +h2_destroy() +{ + tc qdisc del dev $h2 root + + ethtool --set-mm $h2 pmac-enabled off tx-enabled off verify-enabled off + + ip link set dev $h2 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy +} + +check_ethtool_mm_support +check_tc_fp_support +require_command lldptool +bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually" + +for netif in ${NETIFS[@]}; do + ethtool --show-mm $netif 2>&1 &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: $netif does not support MAC Merge" + exit $ksft_skip + fi + + if check_ethtool_pmac_std_stats_support $netif eth-mac; then + has_pmac_stats[$netif]=true + else + has_pmac_stats[$netif]=false + echo "$netif does not report pMAC statistics, falling back to aggregate" + fi +done + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh new file mode 100755 index 000000000000..8f60c1685ad4 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + rmon_rx_histogram + rmon_tx_histogram +" + +NUM_NETIFS=2 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +ETH_FCS_LEN=4 +ETH_HLEN=$((6+6+2)) + +declare -A netif_mtu + +ensure_mtu() +{ + local iface=$1; shift + local len=$1; shift + local current=$(ip -j link show dev $iface | jq -r '.[0].mtu') + local required=$((len - ETH_HLEN - ETH_FCS_LEN)) + + if [ $current -lt $required ]; then + ip link set dev $iface mtu $required || return 1 + fi +} + +bucket_test() +{ + local iface=$1; shift + local neigh=$1; shift + local set=$1; shift + local bucket=$1; shift + local len=$1; shift + local num_rx=10000 + local num_tx=20000 + local expected= + local before= + local after= + local delta= + + # Mausezahn does not include FCS bytes in its length - but the + # histogram counters do + len=$((len - ETH_FCS_LEN)) + len=$((len > 0 ? len : 0)) + + before=$(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") + + # Send 10k one way and 20k in the other, to detect counters + # mapped to the wrong direction + $MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us + $MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us + + after=$(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") + + delta=$((after - before)) + + expected=$([ $set = rx ] && echo $num_rx || echo $num_tx) + + # Allow some extra tolerance for other packets sent by the stack + [ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ] +} + +rmon_histogram() +{ + local iface=$1; shift + local neigh=$1; shift + local set=$1; shift + local nbuckets=0 + local step= + + RET=0 + + while read -r -a bucket; do + step="$set-pkts${bucket[0]}to${bucket[1]} on $iface" + + for if in $iface $neigh; do + if ! ensure_mtu $if ${bucket[0]}; then + log_test_xfail "$if does not support the required MTU for $step" + return + fi + done + + if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then + check_err 1 "$step failed" + return 1 + fi + log_test "$step" + nbuckets=$((nbuckets + 1)) + done < <(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null) + + if [ $nbuckets -eq 0 ]; then + log_test_xfail "$iface does not support $set histogram counters" + return + fi +} + +rmon_rx_histogram() +{ + rmon_histogram $h1 $h2 rx + rmon_histogram $h2 $h1 rx +} + +rmon_tx_histogram() +{ + rmon_histogram $h1 $h2 tx + rmon_histogram $h2 $h1 tx +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + for iface in $h1 $h2; do + netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu') + ip link set dev $iface up + done +} + +cleanup() +{ + pre_cleanup + + for iface in $h2 $h1; do + ip link set dev $iface \ + mtu ${netif_mtu[$iface]} \ + down + done +} + +check_ethtool_counter_group_support +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh new file mode 100755 index 000000000000..67fafefc80be --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh @@ -0,0 +1,334 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +----------------------+ +# | H1 | | H2 | +# | | | | +# | $h1.200 + | | + $h2.200 | +# | 192.0.2.1/28 | | | | 192.0.2.18/28 | +# | 2001:db8:1::1/64 | | | | 2001:db8:2::1/64 | +# | | | | | | +# | $h1 + | | + $h2 | +# | | | | | | +# +------------------|-+ +-|--------------------+ +# | | +# +------------------|-------------------------|--------------------+ +# | SW | | | +# | | | | +# | $rp1 + + $rp2 | +# | | | | +# | $rp1.200 + + $rp2.200 | +# | 192.0.2.2/28 192.0.2.17/28 | +# | 2001:db8:1::2/64 2001:db8:2::2/64 | +# | | +# +-----------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + test_stats_rx_ipv4 + test_stats_tx_ipv4 + test_stats_rx_ipv6 + test_stats_tx_ipv6 + respin_enablement + test_stats_rx_ipv4 + test_stats_tx_ipv4 + test_stats_rx_ipv6 + test_stats_tx_ipv6 + reapply_config + ping_ipv4 + ping_ipv6 + test_stats_rx_ipv4 + test_stats_tx_ipv4 + test_stats_rx_ipv6 + test_stats_tx_ipv6 + test_stats_report_rx + test_stats_report_tx + test_destroy_enabled + test_double_enable +" +NUM_NETIFS=4 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh +source "$lib_dir"/../../../net/forwarding/tc_common.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64 + ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 + ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + vlan_destroy $h1 200 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64 + ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2 + ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 + vlan_destroy $h2 200 + simple_if_fini $h2 +} + +router_rp1_200_create() +{ + ip link add name $rp1.200 link $rp1 type vlan id 200 + ip link set dev $rp1.200 addrgenmode eui64 + ip link set dev $rp1.200 up + ip address add dev $rp1.200 192.0.2.2/28 + ip address add dev $rp1.200 2001:db8:1::2/64 + ip stats set dev $rp1.200 l3_stats on +} + +router_rp1_200_destroy() +{ + ip stats set dev $rp1.200 l3_stats off + ip address del dev $rp1.200 2001:db8:1::2/64 + ip address del dev $rp1.200 192.0.2.2/28 + ip link del dev $rp1.200 +} + +router_create() +{ + ip link set dev $rp1 up + router_rp1_200_create + + ip link set dev $rp2 up + vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64 +} + +router_destroy() +{ + vlan_destroy $rp2 200 + ip link set dev $rp2 down + + router_rp1_200_destroy + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1mac=$(mac_get $rp1) + rp2mac=$(mac_get $rp2) + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.200 192.0.2.18 " IPv4" +} + +ping_ipv6() +{ + ping_test $h1.200 2001:db8:2::1 " IPv6" +} + +send_packets_rx_ipv4() +{ + # Send 21 packets instead of 20, because the first one might trap and go + # through the SW datapath, which might not bump the HW counter. + $MZ $h1.200 -c 21 -d 20msec -p 100 \ + -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \ + -q -t udp sp=54321,dp=12345 +} + +send_packets_rx_ipv6() +{ + $MZ $h1.200 -6 -c 21 -d 20msec -p 100 \ + -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \ + -q -t udp sp=54321,dp=12345 +} + +send_packets_tx_ipv4() +{ + $MZ $h2.200 -c 21 -d 20msec -p 100 \ + -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \ + -q -t udp sp=54321,dp=12345 +} + +send_packets_tx_ipv6() +{ + $MZ $h2.200 -6 -c 21 -d 20msec -p 100 \ + -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \ + -q -t udp sp=54321,dp=12345 +} + +___test_stats() +{ + local dir=$1; shift + local prot=$1; shift + + local a + local b + + a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets) + send_packets_${dir}_${prot} + "$@" + b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ + hw_stats_get l3_stats $rp1.200 ${dir} packets) + check_err $? "Traffic not reflected in the counter: $a -> $b" +} + +__test_stats() +{ + local dir=$1; shift + local prot=$1; shift + + RET=0 + ___test_stats "$dir" "$prot" + log_test "Test $dir packets: $prot" +} + +test_stats_rx_ipv4() +{ + __test_stats rx ipv4 +} + +test_stats_tx_ipv4() +{ + __test_stats tx ipv4 +} + +test_stats_rx_ipv6() +{ + __test_stats rx ipv6 +} + +test_stats_tx_ipv6() +{ + __test_stats tx ipv6 +} + +# Make sure everything works well even after stats have been disabled and +# reenabled on the same device without touching the L3 configuration. +respin_enablement() +{ + log_info "Turning stats off and on again" + ip stats set dev $rp1.200 l3_stats off + ip stats set dev $rp1.200 l3_stats on +} + +# For the initial run, l3_stats is enabled on a completely set up netdevice. Now +# do it the other way around: enabling the L3 stats on an L2 netdevice, and only +# then apply the L3 configuration. +reapply_config() +{ + log_info "Reapplying configuration" + + router_rp1_200_destroy + + ip link add name $rp1.200 link $rp1 type vlan id 200 + ip link set dev $rp1.200 addrgenmode none + ip stats set dev $rp1.200 l3_stats on + ip link set dev $rp1.200 addrgenmode eui64 + ip link set dev $rp1.200 up + ip address add dev $rp1.200 192.0.2.2/28 + ip address add dev $rp1.200 2001:db8:1::2/64 +} + +__test_stats_report() +{ + local dir=$1; shift + local prot=$1; shift + + local a + local b + + RET=0 + + a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets) + send_packets_${dir}_${prot} + ip address flush dev $rp1.200 + b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ + hw_stats_get l3_stats $rp1.200 ${dir} packets) + check_err $? "Traffic not reflected in the counter: $a -> $b" + log_test "Test ${dir} packets: stats pushed on loss of L3" + + ip stats set dev $rp1.200 l3_stats off + ip link del dev $rp1.200 + router_rp1_200_create +} + +test_stats_report_rx() +{ + __test_stats_report rx ipv4 +} + +test_stats_report_tx() +{ + __test_stats_report tx ipv4 +} + +test_destroy_enabled() +{ + RET=0 + + ip link del dev $rp1.200 + router_rp1_200_create + + log_test "Destroy l3_stats-enabled netdev" +} + +test_double_enable() +{ + RET=0 + ___test_stats rx ipv4 \ + ip stats set dev $rp1.200 l3_stats on + log_test "Test stat retention across a spurious enablement" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info | + jq '.[].info.l3_stats.used') +[[ $used = true ]] +check_err $? "hw_stats_info.used=$used" +log_test "l3_stats offloaded" +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh new file mode 100755 index 000000000000..a94d92e1abce --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test L3 stats on IP-in-IP GRE tunnel without key. + +# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more +# details. + +ALL_TESTS=" + ping_ipv4 + test_stats_rx + test_stats_tx +" +NUM_NETIFS=6 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh +source "$lib_dir"/../../../net/forwarding/ipip_lib.sh +source "$lib_dir"/../../../net/forwarding/tc_common.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + ol1mac=$(mac_get $ol1) + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create gre $ol1 $ul1 + sw2_flat_create gre $ol2 $ul2 + ip stats set dev g1a l3_stats on + ip stats set dev g2a l3_stats on +} + +cleanup() +{ + pre_cleanup + + ip stats set dev g1a l3_stats off + ip stats set dev g2a l3_stats off + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + + vrf_cleanup + forwarding_restore +} + +ping_ipv4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre flat" +} + +send_packets_ipv4() +{ + # Send 21 packets instead of 20, because the first one might trap and go + # through the SW datapath, which might not bump the HW counter. + $MZ $h1 -c 21 -d 20msec -p 100 \ + -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \ + -q -t udp sp=54321,dp=12345 +} + +test_stats() +{ + local dev=$1; shift + local dir=$1; shift + + local a + local b + + RET=0 + + a=$(hw_stats_get l3_stats $dev $dir packets) + send_packets_ipv4 + b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ + hw_stats_get l3_stats $dev $dir packets) + check_err $? "Traffic not reflected in the counter: $a -> $b" + + log_test "Test $dir packets: $prot" +} + +test_stats_tx() +{ + test_stats g1a tx +} + +test_stats_rx() +{ + test_stats g2a rx +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c new file mode 100644 index 000000000000..62456df947bc --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <linux/errqueue.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> + +#include <liburing.h> + +static long page_size; +#define AREA_SIZE (8192 * page_size) +#define SEND_SIZE (512 * 4096) +#define min(a, b) \ + ({ \ + typeof(a) _a = (a); \ + typeof(b) _b = (b); \ + _a < _b ? _a : _b; \ + }) +#define min_t(t, a, b) \ + ({ \ + t _ta = (a); \ + t _tb = (b); \ + min(_ta, _tb); \ + }) + +#define ALIGN_UP(v, align) (((v) + (align) - 1) & ~((align) - 1)) + +static int cfg_server; +static int cfg_client; +static int cfg_port = 8000; +static int cfg_payload_len; +static const char *cfg_ifname; +static int cfg_queue_id = -1; +static bool cfg_oneshot; +static int cfg_oneshot_recvs; +static int cfg_send_size = SEND_SIZE; +static struct sockaddr_in6 cfg_addr; + +static char *payload; +static void *area_ptr; +static void *ring_ptr; +static size_t ring_size; +static struct io_uring_zcrx_rq rq_ring; +static unsigned long area_token; +static int connfd; +static bool stop; +static size_t received; + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static inline size_t get_refill_ring_size(unsigned int rq_entries) +{ + size_t size; + + ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe); + /* add space for the header (head/tail/etc.) */ + ring_size += page_size; + return ALIGN_UP(ring_size, page_size); +} + +static void setup_zcrx(struct io_uring *ring) +{ + unsigned int ifindex; + unsigned int rq_entries = 4096; + int ret; + + ifindex = if_nametoindex(cfg_ifname); + if (!ifindex) + error(1, 0, "bad interface name: %s", cfg_ifname); + + area_ptr = mmap(NULL, + AREA_SIZE, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, + 0); + if (area_ptr == MAP_FAILED) + error(1, 0, "mmap(): zero copy area"); + + ring_size = get_refill_ring_size(rq_entries); + ring_ptr = mmap(NULL, + ring_size, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, + 0); + + struct io_uring_region_desc region_reg = { + .size = ring_size, + .user_addr = (__u64)(unsigned long)ring_ptr, + .flags = IORING_MEM_REGION_TYPE_USER, + }; + + struct io_uring_zcrx_area_reg area_reg = { + .addr = (__u64)(unsigned long)area_ptr, + .len = AREA_SIZE, + .flags = 0, + }; + + struct io_uring_zcrx_ifq_reg reg = { + .if_idx = ifindex, + .if_rxq = cfg_queue_id, + .rq_entries = rq_entries, + .area_ptr = (__u64)(unsigned long)&area_reg, + .region_ptr = (__u64)(unsigned long)®ion_reg, + }; + + ret = io_uring_register_ifq(ring, ®); + if (ret) + error(1, 0, "io_uring_register_ifq(): %d", ret); + + rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head); + rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail); + rq_ring.rqes = (struct io_uring_zcrx_rqe *)((char *)ring_ptr + reg.offsets.rqes); + rq_ring.rq_tail = 0; + rq_ring.ring_entries = reg.rq_entries; + + area_token = area_reg.rq_area_token; +} + +static void add_accept(struct io_uring *ring, int sockfd) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_accept(sqe, sockfd, NULL, NULL, 0); + sqe->user_data = 1; +} + +static void add_recvzc(struct io_uring *ring, int sockfd) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, 0, 0); + sqe->ioprio |= IORING_RECV_MULTISHOT; + sqe->user_data = 2; +} + +static void add_recvzc_oneshot(struct io_uring *ring, int sockfd, size_t len) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, len, 0); + sqe->ioprio |= IORING_RECV_MULTISHOT; + sqe->user_data = 2; +} + +static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe) +{ + if (cqe->res < 0) + error(1, 0, "accept()"); + if (connfd) + error(1, 0, "Unexpected second connection"); + + connfd = cqe->res; + if (cfg_oneshot) + add_recvzc_oneshot(ring, connfd, page_size); + else + add_recvzc(ring, connfd); +} + +static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe) +{ + unsigned rq_mask = rq_ring.ring_entries - 1; + struct io_uring_zcrx_cqe *rcqe; + struct io_uring_zcrx_rqe *rqe; + struct io_uring_sqe *sqe; + uint64_t mask; + char *data; + ssize_t n; + int i; + + if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs == 0) { + stop = true; + return; + } + + if (cqe->res < 0) + error(1, 0, "recvzc(): %d", cqe->res); + + if (cfg_oneshot) { + if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) { + add_recvzc_oneshot(ring, connfd, page_size); + cfg_oneshot_recvs--; + } + } else if (!(cqe->flags & IORING_CQE_F_MORE)) { + add_recvzc(ring, connfd); + } + + rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1); + + n = cqe->res; + mask = (1ULL << IORING_ZCRX_AREA_SHIFT) - 1; + data = (char *)area_ptr + (rcqe->off & mask); + + for (i = 0; i < n; i++) { + if (*(data + i) != payload[(received + i)]) + error(1, 0, "payload mismatch at %d", i); + } + received += n; + + rqe = &rq_ring.rqes[(rq_ring.rq_tail & rq_mask)]; + rqe->off = (rcqe->off & ~IORING_ZCRX_AREA_MASK) | area_token; + rqe->len = cqe->res; + io_uring_smp_store_release(rq_ring.ktail, ++rq_ring.rq_tail); +} + +static void server_loop(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + unsigned int count = 0; + unsigned int head; + int i, ret; + + io_uring_submit_and_wait(ring, 1); + + io_uring_for_each_cqe(ring, head, cqe) { + if (cqe->user_data == 1) + process_accept(ring, cqe); + else if (cqe->user_data == 2) + process_recvzc(ring, cqe); + else + error(1, 0, "unknown cqe"); + count++; + } + io_uring_cq_advance(ring, count); +} + +static void run_server(void) +{ + unsigned int flags = 0; + struct io_uring ring; + int fd, enable, ret; + uint64_t tstop; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, 0, "socket()"); + + enable = 1; + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)); + if (ret < 0) + error(1, 0, "setsockopt(SO_REUSEADDR)"); + + ret = bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + if (ret < 0) + error(1, 0, "bind()"); + + if (listen(fd, 1024) < 0) + error(1, 0, "listen()"); + + flags |= IORING_SETUP_COOP_TASKRUN; + flags |= IORING_SETUP_SINGLE_ISSUER; + flags |= IORING_SETUP_DEFER_TASKRUN; + flags |= IORING_SETUP_SUBMIT_ALL; + flags |= IORING_SETUP_CQE32; + + io_uring_queue_init(512, &ring, flags); + + setup_zcrx(&ring); + + add_accept(&ring, fd); + + tstop = gettimeofday_ms() + 5000; + while (!stop && gettimeofday_ms() < tstop) + server_loop(&ring); + + if (!stop) + error(1, 0, "test failed\n"); +} + +static void run_client(void) +{ + ssize_t to_send = cfg_send_size; + ssize_t sent = 0; + ssize_t chunk, res; + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, 0, "socket()"); + + if (connect(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr))) + error(1, 0, "connect()"); + + while (to_send) { + void *src = &payload[sent]; + + chunk = min_t(ssize_t, cfg_payload_len, to_send); + res = send(fd, src, chunk, 0); + if (res < 0) + error(1, 0, "send(): %zd", sent); + sent += res; + to_send -= res; + } + + close(fd); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-4|-6) (-s|-c) -h<server_ip> -p<port> " + "-l<payload_size> -i<ifname> -q<rxq_id>", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + const int max_payload_len = SEND_SIZE - + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) - + 40 /* max tcp options */; + struct sockaddr_in6 *addr6 = (void *) &cfg_addr; + char *addr = NULL; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + cfg_payload_len = max_payload_len; + + while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) { + switch (c) { + case 's': + if (cfg_client) + error(1, 0, "Pass one of -s or -c"); + cfg_server = 1; + break; + case 'c': + if (cfg_server) + error(1, 0, "Pass one of -s or -c"); + cfg_client = 1; + break; + case 'h': + addr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 'i': + cfg_ifname = optarg; + break; + case 'q': + cfg_queue_id = strtoul(optarg, NULL, 0); + break; + case 'o': { + cfg_oneshot = true; + cfg_oneshot_recvs = strtoul(optarg, NULL, 0); + break; + } + case 'z': + cfg_send_size = strtoul(optarg, NULL, 0); + break; + } + } + + if (cfg_server && addr) + error(1, 0, "Receiver cannot have -h specified"); + + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + addr6->sin6_addr = in6addr_any; + if (addr) { + ret = parse_address(addr, cfg_port, addr6); + if (ret) + error(1, 0, "receiver address parse error: %s", addr); + } + + if (cfg_payload_len > max_payload_len) + error(1, 0, "-l: payload exceeds max (%d)", max_payload_len); +} + +int main(int argc, char **argv) +{ + const char *cfg_test = argv[argc - 1]; + int i; + + page_size = sysconf(_SC_PAGESIZE); + if (page_size < 0) + return 1; + + if (posix_memalign((void **)&payload, page_size, SEND_SIZE)) + return 1; + + parse_opts(argc, argv); + + for (i = 0; i < SEND_SIZE; i++) + payload[i] = 'a' + (i % 26); + + if (cfg_server) + run_server(); + else if (cfg_client) + run_client(); + + return 0; +} diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py new file mode 100755 index 000000000000..712c806508b5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import re +from os import path +from lib.py import ksft_run, ksft_exit, KsftSkipEx +from lib.py import NetDrvEpEnv +from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen + + +def _get_current_settings(cfg): + output = ethtool(f"-g {cfg.ifname}", json=True)[0] + return (output['rx'], output['hds-thresh']) + + +def _get_combined_channels(cfg): + output = ethtool(f"-l {cfg.ifname}").stdout + values = re.findall(r'Combined:\s+(\d+)', output) + return int(values[1]) + + +def _create_rss_ctx(cfg, chan): + output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout + values = re.search(r'New RSS context is (\d+)', output).group(1) + ctx_id = int(values) + return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}")) + + +def _set_flow_rule(cfg, port, chan): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout + values = re.search(r'ID (\d+)', output).group(1) + return int(values) + + +def _set_flow_rule_rss(cfg, port, ctx_id): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout + values = re.search(r'ID (\d+)', output).group(1) + return int(values) + + +def test_zcrx(cfg) -> None: + cfg.require_ipver('6') + + combined_chans = _get_combined_channels(cfg) + if combined_chans < 2: + raise KsftSkipEx('at least 2 combined channels required') + (rx_ring, hds_thresh) = _get_current_settings(cfg) + port = rand_port() + + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") + + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") + + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") + + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") + + flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) + + +def test_zcrx_oneshot(cfg) -> None: + cfg.require_ipver('6') + + combined_chans = _get_combined_channels(cfg) + if combined_chans < 2: + raise KsftSkipEx('at least 2 combined channels required') + (rx_ring, hds_thresh) = _get_current_settings(cfg) + port = rand_port() + + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") + + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") + + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") + + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") + + flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) + + +def test_zcrx_rss(cfg) -> None: + cfg.require_ipver('6') + + combined_chans = _get_combined_channels(cfg) + if combined_chans < 2: + raise KsftSkipEx('at least 2 combined channels required') + (rx_ring, hds_thresh) = _get_current_settings(cfg) + port = rand_port() + + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") + + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") + + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") + + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") + + (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1) + flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) + + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx") + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py new file mode 100755 index 000000000000..0699d6a8b4e2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/irq.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_ge, ksft_eq +from lib.py import KsftSkipEx +from lib.py import ksft_disruptive +from lib.py import EthtoolFamily, NetdevFamily +from lib.py import NetDrvEnv +from lib.py import cmd, ip, defer + + +def read_affinity(irq) -> str: + with open(f'/proc/irq/{irq}/smp_affinity', 'r') as fp: + return fp.read().lstrip("0,").strip() + + +def write_affinity(irq, what) -> str: + if what != read_affinity(irq): + with open(f'/proc/irq/{irq}/smp_affinity', 'w') as fp: + fp.write(what) + + +def check_irqs_reported(cfg) -> None: + """ Check that device reports IRQs for NAPI instances """ + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + irqs = sum(['irq' in x for x in napis]) + + ksft_ge(irqs, 1) + ksft_eq(irqs, len(napis)) + + +def _check_reconfig(cfg, reconfig_cb) -> None: + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + for n in reversed(napis): + if 'irq' in n: + break + else: + raise KsftSkipEx(f"Device has no NAPI with IRQ attribute (#napis: {len(napis)}") + + old = read_affinity(n['irq']) + # pick an affinity that's not the current one + new = "3" if old != "3" else "5" + write_affinity(n['irq'], new) + defer(write_affinity, n['irq'], old) + + reconfig_cb(cfg) + + ksft_eq(read_affinity(n['irq']), new, comment="IRQ affinity changed after reconfig") + + +def check_reconfig_queues(cfg) -> None: + def reconfig(cfg) -> None: + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + rx_type = 'rx' + else: + rx_type = 'combined' + cur_queue_cnt = channels[f'{rx_type}-count'] + max_queue_cnt = channels[f'{rx_type}-max'] + + cmd(f"ethtool -L {cfg.ifname} {rx_type} 1") + cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}") + cmd(f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}") + + _check_reconfig(cfg, reconfig) + + +def check_reconfig_xdp(cfg) -> None: + def reconfig(cfg) -> None: + ip(f"link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o")) + ip(f"link set dev %s xdp off" % cfg.ifname) + + _check_reconfig(cfg, reconfig) + + +@ksft_disruptive +def check_down(cfg) -> None: + def reconfig(cfg) -> None: + ip("link set dev %s down" % cfg.ifname) + ip("link set dev %s up" % cfg.ifname) + + _check_reconfig(cfg, reconfig) + + +def main() -> None: + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + + ksft_run([check_irqs_reported, check_reconfig_queues, + check_reconfig_xdp, check_down], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py new file mode 100644 index 000000000000..766bfc4ad842 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: GPL-2.0 + +""" +Driver test environment (hardware-only tests). +NetDrvEnv and NetDrvEpEnv are the main environment classes. +Former is for local host only tests, latter creates / connects +to a remote endpoint. See NIPA wiki for more information about +running and writing driver tests. +""" + +import sys +from pathlib import Path + +KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve() + +try: + sys.path.append(KSFT_DIR.as_posix()) + + # Import one by one to avoid pylint false positives + from net.lib.py import NetNS, NetNSEnter, NetdevSimDev + from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ + NlError, RtnlFamily, DevlinkFamily, PSPFamily + from net.lib.py import CmdExitFailure + from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ + fd_read_timeout, ip, rand_port, wait_port_listen, wait_file + from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx + from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ + ksft_setup, ksft_variants, KsftNamedVariant + from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ + ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none + from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner + from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv + + __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", + "EthtoolFamily", "NetdevFamily", "NetshaperFamily", + "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", + "CmdExitFailure", + "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", + "fd_read_timeout", "ip", "rand_port", + "wait_port_listen", "wait_file", + "KsftSkipEx", "KsftFailEx", "KsftXfailEx", + "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", + "ksft_setup", "ksft_variants", "KsftNamedVariant", + "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", + "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", + "ksft_not_none", "ksft_not_none", + "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote", + "Iperf3Runner"] +except ModuleNotFoundError as e: + print("Failed importing `net` library from kernel sources") + print(str(e)) + sys.exit(4) diff --git a/tools/testing/selftests/drivers/net/hw/loopback.sh b/tools/testing/selftests/drivers/net/hw/loopback.sh new file mode 100755 index 000000000000..5acc3ff820aa --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/loopback.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +ALL_TESTS="loopback_test" +NUM_NETIFS=2 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/tc_common.sh +source "$lib_dir"/../../../net/forwarding/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 + tc qdisc add dev $h1 clsact +} + +h1_destroy() +{ + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 +} + +h2_destroy() +{ + simple_if_fini $h2 +} + +loopback_test() +{ + RET=0 + + tc filter add dev $h1 ingress protocol arp pref 1 handle 101 flower \ + skip_hw arp_op reply arp_tip 192.0.2.1 action drop + + $MZ $h1 -c 1 -t arp -q + + tc_check_packets "dev $h1 ingress" 101 1 + check_fail $? "Matched on a filter without loopback setup" + + ethtool -K $h1 loopback on + check_err $? "Failed to enable loopback" + + setup_wait_dev $h1 + + $MZ $h1 -c 1 -t arp -q + + tc_check_packets "dev $h1 ingress" 101 1 + check_err $? "Did not match on filter with loopback" + + ethtool -K $h1 loopback off + check_err $? "Failed to disable loopback" + + $MZ $h1 -c 1 -t arp -q + + tc_check_packets "dev $h1 ingress" 101 2 + check_fail $? "Matched on a filter after loopback was removed" + + tc filter del dev $h1 ingress protocol arp pref 1 handle 101 flower + + log_test "loopback" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + vrf_prepare + + h1_create + h2_create + + if ethtool -k $h1 | grep loopback | grep -q fixed; then + log_test "SKIP: dev $h1 does not support loopback feature" + exit $ksft_skip + fi +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c new file mode 100644 index 000000000000..3288ed04ce08 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -0,0 +1,1524 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tcpdevmem netcat. Works similarly to netcat but does device memory TCP + * instead of regular TCP. Uses udmabuf to mock a dmabuf provider. + * + * Usage: + * + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 + * + * On client: + * echo -n "hello\nworld" | \ + * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1 + * + * Note this is compatible with regular netcat. i.e. the sender or receiver can + * be replaced with regular netcat to test the RX or TX path in isolation. + * + * Test data validation (devmem TCP on RX only): + * + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7 + * + * On client: + * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ + * head -c 1G | \ + * nc <server IP> 5201 -p 5201 + * + * Test data validation (devmem TCP on RX and TX, validation happens on RX): + * + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1 + * + * On client: + * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \ + * head -c 1M | \ + * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1 + */ +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + +#include <linux/uio.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#define __iovec_defined +#include <fcntl.h> +#include <malloc.h> +#include <error.h> +#include <poll.h> + +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <sys/time.h> + +#include <linux/memfd.h> +#include <linux/dma-buf.h> +#include <linux/errqueue.h> +#include <linux/udmabuf.h> +#include <linux/types.h> +#include <linux/netlink.h> +#include <linux/genetlink.h> +#include <linux/netdev.h> +#include <linux/ethtool_netlink.h> +#include <time.h> +#include <net/if.h> + +#include "netdev-user.h" +#include "ethtool-user.h" +#include <ynl.h> + +#define PAGE_SHIFT 12 +#define TEST_PREFIX "ncdevmem" +#define NUM_PAGES 16000 + +#ifndef MSG_SOCK_DEVMEM +#define MSG_SOCK_DEVMEM 0x2000000 +#endif + +#define MAX_IOV 1024 + +static size_t max_chunk; +static char *server_ip; +static char *client_ip; +static char *port; +static size_t do_validation; +static int start_queue = -1; +static int num_queues = -1; +static char *ifname; +static unsigned int ifindex; +static unsigned int dmabuf_id; +static uint32_t tx_dmabuf_id; +static int waittime_ms = 500; + +/* System state loaded by current_config_load() */ +#define MAX_FLOWS 8 +static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, }; + +struct memory_buffer { + int fd; + size_t size; + + int devfd; + int memfd; + char *buf_mem; +}; + +struct memory_provider { + struct memory_buffer *(*alloc)(size_t size); + void (*free)(struct memory_buffer *ctx); + void (*memcpy_to_device)(struct memory_buffer *dst, size_t off, + void *src, int n); + void (*memcpy_from_device)(void *dst, struct memory_buffer *src, + size_t off, int n); +}; + +static void pr_err(const char *fmt, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", TEST_PREFIX); + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + if (errno != 0) + fprintf(stderr, ": %s", strerror(errno)); + fprintf(stderr, "\n"); +} + +static struct memory_buffer *udmabuf_alloc(size_t size) +{ + struct udmabuf_create create; + struct memory_buffer *ctx; + int ret; + + ctx = malloc(sizeof(*ctx)); + if (!ctx) + return NULL; + + ctx->size = size; + + ctx->devfd = open("/dev/udmabuf", O_RDWR); + if (ctx->devfd < 0) { + pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]"); + goto err_free_ctx; + } + + ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); + if (ctx->memfd < 0) { + pr_err("[skip,no-memfd]"); + goto err_close_dev; + } + + ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) { + pr_err("[skip,fcntl-add-seals]"); + goto err_close_memfd; + } + + ret = ftruncate(ctx->memfd, size); + if (ret == -1) { + pr_err("[FAIL,memfd-truncate]"); + goto err_close_memfd; + } + + memset(&create, 0, sizeof(create)); + + create.memfd = ctx->memfd; + create.offset = 0; + create.size = size; + ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); + if (ctx->fd < 0) { + pr_err("[FAIL, create udmabuf]"); + goto err_close_fd; + } + + ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + ctx->fd, 0); + if (ctx->buf_mem == MAP_FAILED) { + pr_err("[FAIL, map udmabuf]"); + goto err_close_fd; + } + + return ctx; + +err_close_fd: + close(ctx->fd); +err_close_memfd: + close(ctx->memfd); +err_close_dev: + close(ctx->devfd); +err_free_ctx: + free(ctx); + return NULL; +} + +static void udmabuf_free(struct memory_buffer *ctx) +{ + munmap(ctx->buf_mem, ctx->size); + close(ctx->fd); + close(ctx->memfd); + close(ctx->devfd); + free(ctx); +} + +static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off, + void *src, int n) +{ + struct dma_buf_sync sync = {}; + + sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE; + ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); + + memcpy(dst->buf_mem + off, src, n); + + sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE; + ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); +} + +static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, + size_t off, int n) +{ + struct dma_buf_sync sync = {}; + + sync.flags = DMA_BUF_SYNC_START; + ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); + + memcpy(dst, src->buf_mem + off, n); + + sync.flags = DMA_BUF_SYNC_END; + ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); +} + +static struct memory_provider udmabuf_memory_provider = { + .alloc = udmabuf_alloc, + .free = udmabuf_free, + .memcpy_to_device = udmabuf_memcpy_to_device, + .memcpy_from_device = udmabuf_memcpy_from_device, +}; + +static struct memory_provider *provider = &udmabuf_memory_provider; + +static void print_nonzero_bytes(void *ptr, size_t size) +{ + unsigned char *p = ptr; + unsigned int i; + + for (i = 0; i < size; i++) + putchar(p[i]); +} + +int validate_buffer(void *line, size_t size) +{ + static unsigned char seed = 1; + unsigned char *ptr = line; + unsigned char expected; + static int errors; + size_t i; + + for (i = 0; i < size; i++) { + expected = seed ? seed : '\n'; + if (ptr[i] != expected) { + fprintf(stderr, + "Failed validation: expected=%u, actual=%u, index=%lu\n", + expected, ptr[i], i); + errors++; + if (errors > 20) { + pr_err("validation failed"); + return -1; + } + } + seed++; + if (seed == do_validation) + seed = 0; + } + + fprintf(stdout, "Validated buffer\n"); + return 0; +} + +static int +__run_command(char *out, size_t outlen, const char *cmd, va_list args) +{ + char command[256]; + FILE *fp; + + vsnprintf(command, sizeof(command), cmd, args); + + fprintf(stderr, "Running: %s\n", command); + fp = popen(command, "r"); + if (!fp) + return -1; + if (out) { + size_t len; + + if (!fgets(out, outlen, fp)) + return -1; + + /* Remove trailing newline if present */ + len = strlen(out); + if (len && out[len - 1] == '\n') + out[len - 1] = '\0'; + } + return pclose(fp); +} + +static int run_command(const char *cmd, ...) +{ + va_list args; + int ret; + + va_start(args, cmd); + ret = __run_command(NULL, 0, cmd, args); + va_end(args); + + return ret; +} + +static int ethtool_add_flow(const char *format, ...) +{ + char local_output[256], cmd[256]; + const char *id_start; + int flow_idx, ret; + char *endptr; + long flow_id; + va_list args; + + for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++) + if (ntuple_ids[flow_idx] == -1) + break; + if (flow_idx == MAX_FLOWS) { + fprintf(stderr, "Error: too many flows\n"); + return -1; + } + + snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format); + + va_start(args, format); + ret = __run_command(local_output, sizeof(local_output), cmd, args); + va_end(args); + + if (ret != 0) + return ret; + + /* Extract the ID from the output */ + id_start = strstr(local_output, "Added rule with ID "); + if (!id_start) + return -1; + id_start += strlen("Added rule with ID "); + + flow_id = strtol(id_start, &endptr, 10); + if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX) + return -1; + + fprintf(stderr, "Added flow rule with ID %ld\n", flow_id); + ntuple_ids[flow_idx] = flow_id; + return flow_id; +} + +static int rxq_num(int ifindex) +{ + struct ethtool_channels_get_req *req; + struct ethtool_channels_get_rsp *rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + int num = -1; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = ethtool_channels_get_req_alloc(); + ethtool_channels_get_req_set_header_dev_index(req, ifindex); + rsp = ethtool_channels_get(ys, req); + if (rsp) + num = rsp->rx_count + rsp->combined_count; + ethtool_channels_get_req_free(req); + ethtool_channels_get_rsp_free(rsp); + + ynl_sock_destroy(ys); + + return num; +} + +static void reset_flow_steering(void) +{ + int i; + + for (i = 0; i < MAX_FLOWS; i++) { + if (ntuple_ids[i] == -1) + continue; + run_command("ethtool -N %s delete %d", + ifname, ntuple_ids[i]); + ntuple_ids[i] = -1; + } +} + +static const char *tcp_data_split_str(int val) +{ + switch (val) { + case 0: + return "off"; + case 1: + return "auto"; + case 2: + return "on"; + default: + return "?"; + } +} + +static struct ethtool_rings_get_rsp *get_ring_config(void) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return NULL; + } + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + ynl_sock_destroy(ys); + + return get_rsp; +} + +static void restore_ring_config(const struct ethtool_rings_get_rsp *config) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ethtool_rings_set_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + if (!config) + return; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return; + } + + req = ethtool_rings_set_req_alloc(); + ethtool_rings_set_req_set_header_dev_index(req, ifindex); + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + if (config->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh); + + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg); + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + /* use explicit value if UKNOWN didn't give us the previous */ + if (get_rsp->tcp_data_split != config->tcp_data_split) { + ethtool_rings_set_req_set_tcp_data_split(req, + config->tcp_data_split); + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring expl HDS cfg: %s\n", + ys->err.msg); + } + + ethtool_rings_get_rsp_free(get_rsp); + ethtool_rings_set_req_free(req); + + ynl_sock_destroy(ys); +} + +static int +configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ethtool_rings_set_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = ethtool_rings_set_req_alloc(); + ethtool_rings_set_req_set_header_dev_index(req, ifindex); + if (on) { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_ENABLED); + if (old->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, 0); + } else { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + } + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL failed: %s\n", ys->err.msg); + ethtool_rings_set_req_free(req); + + if (ret == 0) { + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + if (get_rsp) + fprintf(stderr, "TCP header split: %s\n", + tcp_data_split_str(get_rsp->tcp_data_split)); + ethtool_rings_get_rsp_free(get_rsp); + } + + ynl_sock_destroy(ys); + + return ret; +} + +static int configure_rss(void) +{ + return run_command("ethtool -X %s equal %d >&2", ifname, start_queue); +} + +static void reset_rss(void) +{ + run_command("ethtool -X %s default >&2", ifname, start_queue); +} + +static int check_changing_channels(unsigned int rx, unsigned int tx) +{ + struct ethtool_channels_get_req *gchan; + struct ethtool_channels_set_req *schan; + struct ethtool_channels_get_rsp *chan; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx); + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + gchan = ethtool_channels_get_req_alloc(); + if (!gchan) { + ret = -1; + goto exit_close_sock; + } + + ethtool_channels_get_req_set_header_dev_index(gchan, ifindex); + chan = ethtool_channels_get(ys, gchan); + ethtool_channels_get_req_free(gchan); + if (!chan) { + fprintf(stderr, "YNL get channels: %s\n", ys->err.msg); + ret = -1; + goto exit_close_sock; + } + + schan = ethtool_channels_set_req_alloc(); + if (!schan) { + ret = -1; + goto exit_free_chan; + } + + ethtool_channels_set_req_set_header_dev_index(schan, ifindex); + + if (chan->_present.combined_count) { + if (chan->_present.rx_count || chan->_present.tx_count) { + ethtool_channels_set_req_set_rx_count(schan, 0); + ethtool_channels_set_req_set_tx_count(schan, 0); + } + + if (rx == tx) { + ethtool_channels_set_req_set_combined_count(schan, rx); + } else if (rx > tx) { + ethtool_channels_set_req_set_combined_count(schan, tx); + ethtool_channels_set_req_set_rx_count(schan, rx - tx); + } else { + ethtool_channels_set_req_set_combined_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx - rx); + } + + } else if (chan->_present.rx_count) { + ethtool_channels_set_req_set_rx_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx); + } else { + fprintf(stderr, "Error: device has neither combined nor rx channels\n"); + ret = -1; + goto exit_free_schan; + } + + ret = ethtool_channels_set(ys, schan); + if (ret) { + fprintf(stderr, "YNL set channels: %s\n", ys->err.msg); + } else { + /* We were expecting a failure, go back to previous settings */ + ethtool_channels_set_req_set_combined_count(schan, + chan->combined_count); + ethtool_channels_set_req_set_rx_count(schan, chan->rx_count); + ethtool_channels_set_req_set_tx_count(schan, chan->tx_count); + + ret = ethtool_channels_set(ys, schan); + if (ret) + fprintf(stderr, "YNL un-setting channels: %s\n", + ys->err.msg); + } + +exit_free_schan: + ethtool_channels_set_req_free(schan); +exit_free_chan: + ethtool_channels_get_rsp_free(chan); +exit_close_sock: + ynl_sock_destroy(ys); + + return ret; +} + +static int configure_flow_steering(struct sockaddr_in6 *server_sin) +{ + const char *type = "tcp6"; + const char *server_addr; + char buf[40]; + int flow_id; + + inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); + server_addr = buf; + + if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) { + type = "tcp4"; + server_addr = strrchr(server_addr, ':') + 1; + } + + /* Try configure 5-tuple */ + flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d", + type, + client_ip ? "src-ip" : "", + client_ip ?: "", + server_addr, + client_ip ? "src-port" : "", + client_ip ? port : "", + port, start_queue); + if (flow_id < 0) { + /* If that fails, try configure 3-tuple */ + flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d", + type, server_addr, port, start_queue); + if (flow_id < 0) + /* If that fails, return error */ + return -1; + } + + return 0; +} + +static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, + struct netdev_queue_id *queues, + unsigned int n_queue_index, struct ynl_sock **ys) +{ + struct netdev_bind_rx_req *req = NULL; + struct netdev_bind_rx_rsp *rsp = NULL; + struct ynl_error yerr; + + *ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!*ys) { + netdev_queue_id_free(queues); + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = netdev_bind_rx_req_alloc(); + netdev_bind_rx_req_set_ifindex(req, ifindex); + netdev_bind_rx_req_set_fd(req, dmabuf_fd); + __netdev_bind_rx_req_set_queues(req, queues, n_queue_index); + + rsp = netdev_bind_rx(*ys, req); + if (!rsp) { + perror("netdev_bind_rx"); + goto err_close; + } + + if (!rsp->_present.id) { + perror("id not present"); + goto err_close; + } + + fprintf(stderr, "got dmabuf id=%d\n", rsp->id); + dmabuf_id = rsp->id; + + netdev_bind_rx_req_free(req); + netdev_bind_rx_rsp_free(rsp); + + return 0; + +err_close: + fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); + netdev_bind_rx_req_free(req); + ynl_sock_destroy(*ys); + return -1; +} + +static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd, + struct ynl_sock **ys) +{ + struct netdev_bind_tx_req *req = NULL; + struct netdev_bind_tx_rsp *rsp = NULL; + struct ynl_error yerr; + + *ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!*ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = netdev_bind_tx_req_alloc(); + netdev_bind_tx_req_set_ifindex(req, ifindex); + netdev_bind_tx_req_set_fd(req, dmabuf_fd); + + rsp = netdev_bind_tx(*ys, req); + if (!rsp) { + perror("netdev_bind_tx"); + goto err_close; + } + + if (!rsp->_present.id) { + perror("id not present"); + goto err_close; + } + + fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id); + tx_dmabuf_id = rsp->id; + + netdev_bind_tx_req_free(req); + netdev_bind_tx_rsp_free(rsp); + + return 0; + +err_close: + fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); + netdev_bind_tx_req_free(req); + ynl_sock_destroy(*ys); + return -1; +} + +static int enable_reuseaddr(int fd) +{ + int opt = 1; + int ret; + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); + if (ret) { + pr_err("SO_REUSEPORT failed"); + return -1; + } + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + if (ret) { + pr_err("SO_REUSEADDR failed"); + return -1; + } + + return 0; +} + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static struct netdev_queue_id *create_queues(void) +{ + struct netdev_queue_id *queues; + size_t i = 0; + + queues = netdev_queue_id_alloc(num_queues); + for (i = 0; i < num_queues; i++) { + netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX); + netdev_queue_id_set_id(&queues[i], start_queue + i); + } + + return queues; +} + +static int do_server(struct memory_buffer *mem) +{ + struct ethtool_rings_get_rsp *ring_config; + char ctrl_data[sizeof(int) * 20000]; + size_t non_page_aligned_frags = 0; + struct sockaddr_in6 client_addr; + struct sockaddr_in6 server_sin; + size_t page_aligned_frags = 0; + size_t total_received = 0; + socklen_t client_addr_len; + bool is_devmem = false; + char *tmp_mem = NULL; + struct ynl_sock *ys; + char iobuf[819200]; + int ret, err = -1; + char buffer[256]; + int socket_fd; + int client_fd; + + ret = parse_address(server_ip, atoi(port), &server_sin); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } + + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + return -1; + } + + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to enable TCP header split"); + goto err_free_ring_config; + } + + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) { + pr_err("Failed to configure rss"); + goto err_reset_headersplit; + } + + /* Flow steer our devmem flows to start_queue */ + if (configure_flow_steering(&server_sin)) { + pr_err("Failed to configure flow steering"); + goto err_reset_rss; + } + + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_flow_steering; + } + + tmp_mem = malloc(mem->size); + if (!tmp_mem) + goto err_unbind; + + socket_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (socket_fd < 0) { + pr_err("Failed to create socket"); + goto err_free_tmp; + } + + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; + + fprintf(stderr, "binding to address %s:%d\n", server_ip, + ntohs(server_sin.sin6_port)); + + ret = bind(socket_fd, &server_sin, sizeof(server_sin)); + if (ret) { + pr_err("Failed to bind"); + goto err_close_socket; + } + + ret = listen(socket_fd, 1); + if (ret) { + pr_err("Failed to listen"); + goto err_close_socket; + } + + client_addr_len = sizeof(client_addr); + + inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer, + sizeof(buffer)); + fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, + ntohs(server_sin.sin6_port)); + client_fd = accept(socket_fd, &client_addr, &client_addr_len); + if (client_fd < 0) { + pr_err("Failed to accept"); + goto err_close_socket; + } + + inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, + sizeof(buffer)); + fprintf(stderr, "Got connection from %s:%d\n", buffer, + ntohs(client_addr.sin6_port)); + + while (1) { + struct iovec iov = { .iov_base = iobuf, + .iov_len = sizeof(iobuf) }; + struct dmabuf_cmsg *dmabuf_cmsg = NULL; + struct cmsghdr *cm = NULL; + struct msghdr msg = { 0 }; + struct dmabuf_token token; + ssize_t ret; + + is_devmem = false; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = ctrl_data; + msg.msg_controllen = sizeof(ctrl_data); + ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM); + fprintf(stderr, "recvmsg ret=%ld\n", ret); + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) + continue; + if (ret < 0) { + perror("recvmsg"); + if (errno == EFAULT) { + pr_err("received EFAULT, won't recover"); + goto err_close_client; + } + continue; + } + if (ret == 0) { + errno = 0; + pr_err("client exited"); + goto cleanup; + } + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if (cm->cmsg_level != SOL_SOCKET || + (cm->cmsg_type != SCM_DEVMEM_DMABUF && + cm->cmsg_type != SCM_DEVMEM_LINEAR)) { + fprintf(stderr, "skipping non-devmem cmsg\n"); + continue; + } + + dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm); + is_devmem = true; + + if (cm->cmsg_type == SCM_DEVMEM_LINEAR) { + /* TODO: process data copied from skb's linear + * buffer. + */ + fprintf(stderr, + "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", + dmabuf_cmsg->frag_size); + + continue; + } + + token.token_start = dmabuf_cmsg->frag_token; + token.token_count = 1; + + total_received += dmabuf_cmsg->frag_size; + fprintf(stderr, + "received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", + dmabuf_cmsg->frag_offset >> PAGE_SHIFT, + dmabuf_cmsg->frag_offset % getpagesize(), + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, + total_received, dmabuf_cmsg->dmabuf_id); + + if (dmabuf_cmsg->dmabuf_id != dmabuf_id) { + pr_err("received on wrong dmabuf_id: flow steering error"); + goto err_close_client; + } + + if (dmabuf_cmsg->frag_size % getpagesize()) + non_page_aligned_frags++; + else + page_aligned_frags++; + + provider->memcpy_from_device(tmp_mem, mem, + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size); + + if (do_validation) { + if (validate_buffer(tmp_mem, + dmabuf_cmsg->frag_size)) + goto err_close_client; + } else { + print_nonzero_bytes(tmp_mem, + dmabuf_cmsg->frag_size); + } + + ret = setsockopt(client_fd, SOL_SOCKET, + SO_DEVMEM_DONTNEED, &token, + sizeof(token)); + if (ret != 1) { + pr_err("SO_DEVMEM_DONTNEED not enough tokens"); + goto err_close_client; + } + } + if (!is_devmem) { + pr_err("flow steering error"); + goto err_close_client; + } + + fprintf(stderr, "total_received=%lu\n", total_received); + } + + fprintf(stderr, "%s: ok\n", TEST_PREFIX); + + fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + page_aligned_frags, non_page_aligned_frags); + +cleanup: + err = 0; + +err_close_client: + close(client_fd); +err_close_socket: + close(socket_fd); +err_free_tmp: + free(tmp_mem); +err_unbind: + ynl_sock_destroy(ys); +err_reset_flow_steering: + reset_flow_steering(); +err_reset_rss: + reset_rss(); +err_reset_headersplit: + restore_ring_config(ring_config); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); + return err; +} + +int run_devmem_tests(void) +{ + struct ethtool_rings_get_rsp *ring_config; + struct netdev_queue_id *queues; + struct memory_buffer *mem; + struct ynl_sock *ys; + int err = -1; + + mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return -1; + } + + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + goto err_free_mem; + } + + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) { + pr_err("rss error"); + goto err_free_ring_config; + } + + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_rss; + } + + queues = netdev_queue_id_alloc(num_queues); + if (!queues) { + pr_err("Failed to allocate empty queues array"); + goto err_reset_headersplit; + } + + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Binding empty queues array should have failed"); + goto err_unbind; + } + + if (configure_headersplit(ring_config, 0)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } + + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } + + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Configure dmabuf with header split off should have failed"); + goto err_unbind; + } + + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } + + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } + + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_headersplit; + } + + /* Deactivating a bound queue should not be legal */ + if (!check_changing_channels(num_queues, num_queues)) { + pr_err("Deactivating a bound queue should be illegal"); + goto err_unbind; + } + + err = 0; + goto err_unbind; + +err_unbind: + ynl_sock_destroy(ys); +err_reset_headersplit: + restore_ring_config(ring_config); +err_reset_rss: + reset_rss(); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); +err_free_mem: + provider->free(mem); + return err; +} + +static uint64_t gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL); +} + +static int do_poll(int fd) +{ + struct pollfd pfd; + int ret; + + pfd.revents = 0; + pfd.fd = fd; + + ret = poll(&pfd, 1, waittime_ms); + if (ret == -1) { + pr_err("poll"); + return -1; + } + + return ret && (pfd.revents & POLLERR); +} + +static int wait_compl(int fd) +{ + int64_t tstop = gettimeofday_ms() + waittime_ms; + char control[CMSG_SPACE(100)] = {}; + struct sock_extended_err *serr; + struct msghdr msg = {}; + struct cmsghdr *cm; + __u32 hi, lo; + int ret; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + while (gettimeofday_ms() < tstop) { + ret = do_poll(fd); + if (ret < 0) + return ret; + if (!ret) + continue; + + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret < 0) { + if (errno == EAGAIN) + continue; + pr_err("recvmsg(MSG_ERRQUEUE)"); + return -1; + } + if (msg.msg_flags & MSG_CTRUNC) { + pr_err("MSG_CTRUNC"); + return -1; + } + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if (cm->cmsg_level != SOL_IP && + cm->cmsg_level != SOL_IPV6) + continue; + if (cm->cmsg_level == SOL_IP && + cm->cmsg_type != IP_RECVERR) + continue; + if (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type != IPV6_RECVERR) + continue; + + serr = (void *)CMSG_DATA(cm); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { + pr_err("wrong origin %u", serr->ee_origin); + return -1; + } + if (serr->ee_errno != 0) { + pr_err("wrong errno %d", serr->ee_errno); + return -1; + } + + hi = serr->ee_data; + lo = serr->ee_info; + + fprintf(stderr, "tx complete [%d,%d]\n", lo, hi); + return 0; + } + } + + pr_err("did not receive tx completion"); + return -1; +} + +static int do_client(struct memory_buffer *mem) +{ + char ctrl_data[CMSG_SPACE(sizeof(__u32))]; + struct sockaddr_in6 server_sin; + struct sockaddr_in6 client_sin; + struct ynl_sock *ys = NULL; + struct iovec iov[MAX_IOV]; + struct msghdr msg = {}; + ssize_t line_size = 0; + struct cmsghdr *cmsg; + char *line = NULL; + int ret, err = -1; + size_t len = 0; + int socket_fd; + __u32 ddmabuf; + int opt = 1; + + ret = parse_address(server_ip, atoi(port), &server_sin); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } + + if (client_ip) { + ret = parse_address(client_ip, atoi(port), &client_sin); + if (ret < 0) { + pr_err("parse client address"); + return ret; + } + } + + socket_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (socket_fd < 0) { + pr_err("create socket"); + return -1; + } + + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, + strlen(ifname) + 1); + if (ret) { + pr_err("bindtodevice"); + goto err_close_socket; + } + + if (bind_tx_queue(ifindex, mem->fd, &ys)) { + pr_err("Failed to bind"); + goto err_close_socket; + } + + if (client_ip) { + ret = bind(socket_fd, &client_sin, sizeof(client_sin)); + if (ret) { + pr_err("bind"); + goto err_unbind; + } + } + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)); + if (ret) { + pr_err("set sock opt"); + goto err_unbind; + } + + fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip, + ntohs(server_sin.sin6_port), ifname); + + ret = connect(socket_fd, &server_sin, sizeof(server_sin)); + if (ret) { + pr_err("connect"); + goto err_unbind; + } + + while (1) { + free(line); + line = NULL; + line_size = getline(&line, &len, stdin); + + if (line_size < 0) + break; + + if (max_chunk) { + msg.msg_iovlen = + (line_size + max_chunk - 1) / max_chunk; + if (msg.msg_iovlen > MAX_IOV) { + pr_err("can't partition %zd bytes into maximum of %d chunks", + line_size, MAX_IOV); + goto err_free_line; + } + + for (int i = 0; i < msg.msg_iovlen; i++) { + iov[i].iov_base = (void *)(i * max_chunk); + iov[i].iov_len = max_chunk; + } + + iov[msg.msg_iovlen - 1].iov_len = + line_size - (msg.msg_iovlen - 1) * max_chunk; + } else { + iov[0].iov_base = 0; + iov[0].iov_len = line_size; + msg.msg_iovlen = 1; + } + + msg.msg_iov = iov; + provider->memcpy_to_device(mem, 0, line, line_size); + + msg.msg_control = ctrl_data; + msg.msg_controllen = sizeof(ctrl_data); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_DEVMEM_DMABUF; + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); + + ddmabuf = tx_dmabuf_id; + + *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf; + + ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY); + if (ret < 0) { + pr_err("Failed sendmsg"); + goto err_free_line; + } + + fprintf(stderr, "sendmsg_ret=%d\n", ret); + + if (ret != line_size) { + pr_err("Did not send all bytes %d vs %zd", ret, line_size); + goto err_free_line; + } + + if (wait_compl(socket_fd)) + goto err_free_line; + } + + fprintf(stderr, "%s: tx ok\n", TEST_PREFIX); + + err = 0; + +err_free_line: + free(line); +err_unbind: + ynl_sock_destroy(ys); +err_close_socket: + close(socket_fd); + return err; +} + +int main(int argc, char *argv[]) +{ + struct memory_buffer *mem; + int is_server = 0, opt; + int ret, err = 1; + + while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) { + switch (opt) { + case 'l': + is_server = 1; + break; + case 's': + server_ip = optarg; + break; + case 'c': + client_ip = optarg; + break; + case 'p': + port = optarg; + break; + case 'v': + do_validation = atoll(optarg); + break; + case 'q': + num_queues = atoi(optarg); + break; + case 't': + start_queue = atoi(optarg); + break; + case 'f': + ifname = optarg; + break; + case 'z': + max_chunk = atoi(optarg); + break; + case '?': + fprintf(stderr, "unknown option: %c\n", optopt); + break; + } + } + + if (!ifname) { + pr_err("Missing -f argument"); + return 1; + } + + ifindex = if_nametoindex(ifname); + + fprintf(stderr, "using ifindex=%u\n", ifindex); + + if (!server_ip && !client_ip) { + if (start_queue < 0 && num_queues < 0) { + num_queues = rxq_num(ifindex); + if (num_queues < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } + /* make sure can bind to multiple queues */ + start_queue = num_queues / 2; + num_queues /= 2; + } + + if (start_queue < 0 || num_queues < 0) { + pr_err("Both -t and -q are required"); + return 1; + } + + return run_devmem_tests(); + } + + if (start_queue < 0 && num_queues < 0) { + num_queues = rxq_num(ifindex); + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } + + num_queues = 1; + start_queue = rxq_num(ifindex) - num_queues; + + if (start_queue < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } + + fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues); + } + + for (; optind < argc; optind++) + fprintf(stderr, "extra arguments: %s\n", argv[optind]); + + if (start_queue < 0) { + pr_err("Missing -t argument"); + return 1; + } + + if (num_queues < 0) { + pr_err("Missing -q argument"); + return 1; + } + + if (!server_ip) { + pr_err("Missing -s argument"); + return 1; + } + + if (!port) { + pr_err("Missing -p argument"); + return 1; + } + + mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return 1; + } + + ret = is_server ? do_server(mem) : do_client(mem); + if (ret) + goto err_free_mem; + + err = 0; + +err_free_mem: + provider->free(mem); + return err; +} diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py new file mode 100755 index 000000000000..c1e943d53f19 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests related to configuration of HW timestamping +""" + +import errno +from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx +from lib.py import NetDrvEnv, EthtoolFamily, NlError + + +def __get_hwtimestamp_support(cfg): + """ Retrieve supported configuration information """ + + try: + tsinfo = cfg.ethnl.tsinfo_get({'header': {'dev-name': cfg.ifname}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported") from e + raise + + ctx = {} + tx = tsinfo.get('tx-types', {}) + rx = tsinfo.get('rx-filters', {}) + + bits = tx.get('bits', {}) + ctx['tx'] = bits.get('bit', []) + bits = rx.get('bits', {}) + ctx['rx'] = bits.get('bit', []) + return ctx + + +def __get_hwtimestamp_config(cfg): + """ Retrieve current TS configuration information """ + + try: + tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via netlink") from e + raise + return tscfg + + +def __set_hwtimestamp_config(cfg, ts): + """ Setup new TS configuration information """ + + ts['header'] = {'dev-name': cfg.ifname} + try: + res = cfg.ethnl.tsconfig_set(ts) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via netlink") from e + raise + return res + + +def test_hwtstamp_tx(cfg): + """ + Test TX timestamp configuration. + The driver should apply provided config and report back proper state. + """ + + orig_tscfg = __get_hwtimestamp_config(cfg) + ts = __get_hwtimestamp_support(cfg) + tx = ts['tx'] + for t in tx: + tscfg = orig_tscfg + tscfg['tx-types']['bits']['bit'] = [t] + res = __set_hwtimestamp_config(cfg, tscfg) + if res is None: + res = __get_hwtimestamp_config(cfg) + ksft_eq(res['tx-types']['bits']['bit'], [t]) + __set_hwtimestamp_config(cfg, orig_tscfg) + + +def test_hwtstamp_rx(cfg): + """ + Test RX timestamp configuration. + The filter configuration is taken from the list of supported filters. + The driver should apply the config without error and report back proper state. + Some extension of the timestamping scope is allowed for PTP filters. + """ + + orig_tscfg = __get_hwtimestamp_config(cfg) + ts = __get_hwtimestamp_support(cfg) + rx = ts['rx'] + for r in rx: + tscfg = orig_tscfg + tscfg['rx-filters']['bits']['bit'] = [r] + res = __set_hwtimestamp_config(cfg, tscfg) + if res is None: + res = __get_hwtimestamp_config(cfg) + if r['index'] == 0 or r['index'] == 1: + ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index']) + else: + # the driver can fallback to some value which has higher coverage for timestamping + ksft_ge(res['rx-filters']['bits']['bit'][0]['index'], r['index']) + __set_hwtimestamp_config(cfg, orig_tscfg) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run([test_hwtstamp_tx, test_hwtstamp_rx], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py new file mode 100755 index 000000000000..2a51b60df8a1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test driver resilience vs page pool allocation failures. +""" + +import errno +import time +import math +import os +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import NetdevFamily, NlError +from lib.py import NetDrvEpEnv +from lib.py import cmd, tool, GenerateTraffic + + +def _write_fail_config(config): + for key, value in config.items(): + path = "/sys/kernel/debug/fail_function/" + with open(path + key, "w", encoding='ascii') as fp: + fp.write(str(value) + "\n") + + +def _enable_pp_allocation_fail(): + if not os.path.exists("/sys/kernel/debug/fail_function"): + raise KsftSkipEx("Kernel built without function error injection (or DebugFS)") + + if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): + _write_fail_config({"inject": "page_pool_alloc_netmems"}) + + _write_fail_config({ + "verbose": 0, + "interval": 511, + "probability": 100, + "times": -1, + }) + + +def _disable_pp_allocation_fail(): + if not os.path.exists("/sys/kernel/debug/fail_function"): + return + + if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): + _write_fail_config({"inject": ""}) + + _write_fail_config({ + "probability": 0, + "times": 0, + }) + + +def test_pp_alloc(cfg, netdevnl): + """ + Configure page pool allocation fail injection while traffic is running. + """ + + def get_stats(): + return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + def check_traffic_flowing(): + stat1 = get_stats() + time.sleep(1) + stat2 = get_stats() + if stat2['rx-packets'] - stat1['rx-packets'] < 4000: + raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets']) + + + try: + stats = get_stats() + except NlError as e: + if e.nl_msg.error == -errno.EOPNOTSUPP: + stats = {} + else: + raise + if 'rx-alloc-fail' not in stats: + raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats") + + set_g = False + traffic = None + try: + traffic = GenerateTraffic(cfg) + + check_traffic_flowing() + + _enable_pp_allocation_fail() + + s1 = get_stats() + time.sleep(3) + s2 = get_stats() + + seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail'] + if seen_fails < 1: + raise KsftSkipEx("Allocation failures not increasing") + pkts = s2['rx-packets'] - s1['rx-packets'] + # Expecting one failure per 512 buffers, 3.1x safety margin + want_fails = math.floor(pkts / 512 / 3.1) + if seen_fails < want_fails: + raise KsftSkipEx("Allocation increasing too slowly", seen_fails, + "packets:", pkts) + ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})") + + # Basic failures are fine, try to wobble some settings to catch extra failures + check_traffic_flowing() + g = tool("ethtool", "-g " + cfg.ifname, json=True)[0] + if 'rx' in g and g["rx"] * 2 <= g["rx-max"]: + new_g = g['rx'] * 2 + elif 'rx' in g: + new_g = g['rx'] // 2 + else: + new_g = None + + if new_g: + set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0 + if set_g: + ksft_pr("ethtool -G change retval: success") + else: + ksft_pr("ethtool -G change retval: did not succeed", new_g) + else: + ksft_pr("ethtool -G change retval: did not try") + + time.sleep(0.1) + check_traffic_flowing() + finally: + _disable_pp_allocation_fail() + if traffic: + traffic.stop() + time.sleep(0.1) + if set_g: + cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}") + + +def main() -> None: + """ Ksft boiler plate main """ + netdevnl = NetdevFamily() + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + + ksft_run([test_pp_alloc], args=(cfg, netdevnl, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py new file mode 100755 index 000000000000..19847f3d4a00 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -0,0 +1,476 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +API level tests for RSS (mostly Netlink vs IOCTL). +""" + +import errno +import glob +import random +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import defer, ethtool, CmdExitFailure +from lib.py import EthtoolFamily, NlError +from lib.py import NetDrvEnv + + +def _require_2qs(cfg): + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + return qcnt + + +def _ethtool_create(cfg, act, opts): + output = ethtool(f"{act} {cfg.ifname} {opts}").stdout + # Output will be something like: "New RSS context is 1" or + # "Added rule with ID 7", we want the integer from the end + return int(output.split()[-1]) + + +def _ethtool_get_cfg(cfg, fl_type, to_nl=False): + descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + if to_nl: + converter = { + "IP SA": "ip-src", + "IP DA": "ip-dst", + "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3", + } + + ret = set() + else: + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + } + + ret = "" + + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + if to_nl: + ret.add(converter[line]) + else: + ret += converter[line] + return ret + + +def test_rxfh_nl_set_fail(cfg): + """ + Test error path of Netlink SET. + """ + _require_2qs(cfg) + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + with ksft_raises(NlError): + ethnl.rss_set({"header": {"dev-name": "lo"}, + "indir": None}) + + with ksft_raises(NlError): + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [100000]}) + ntf = next(ethnl.poll_ntf(duration=0.2), None) + ksft_is(ntf, None) + + +def test_rxfh_nl_set_indir(cfg): + """ + Test setting indirection table via Netlink. + """ + qcnt = _require_2qs(cfg) + + # Test some SETs with a value + reset = defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, "indir": None}) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), {1}) + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [0, 1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + # Make sure we can't set the queue count below max queue used + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 0 rx 1") + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 1 rx 0") + + # Test reset back to default + reset.exec() + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt))) + + +def test_rxfh_nl_set_indir_ctx(cfg): + """ + Test setting indirection table for a custom context via Netlink. + """ + _require_2qs(cfg) + + # Get setting for ctx 0, we'll make sure they don't get clobbered + dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + + # Create context + ctx_id = _ethtool_create(cfg, "-X", "context new") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id, "indir": [1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id}) + ksft_eq(set(rss.get("indir", [-1])), {1}) + + ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(ctx0, dflt) + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id, "indir": [0, 1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id}) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(ctx0, dflt) + + # Make sure we can't set the queue count below max queue used + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 0 rx 1") + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 1 rx 0") + + +def test_rxfh_indir_ntf(cfg): + """ + Check that Netlink notifications are generated when RSS indirection + table was modified. + """ + _require_2qs(cfg) + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1") + reset = defer(ethtool, f"-X {cfg.ifname} default") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(set(ntf["msg"]["indir"]), {1}) + + reset.exec() + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after reset") + ksft_eq(ntf["name"], "rss-ntf") + ksft_is(ntf["msg"].get("context"), None) + ksft_ne(set(ntf["msg"]["indir"]), {1}) + + +def test_rxfh_indir_ctx_ntf(cfg): + """ + Check that Netlink notifications are generated when RSS indirection + table was modified on an additional RSS context. + """ + _require_2qs(cfg) + + ctx_id = _ethtool_create(cfg, "-X", "context new") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(ntf["msg"].get("context"), ctx_id) + ksft_eq(set(ntf["msg"]["indir"]), {1}) + + +def test_rxfh_nl_set_key(cfg): + """ + Test setting hashing key via Netlink. + """ + + dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, + "hkey": dflt["hkey"], "indir": None}) + + # Empty key should error out + with ksft_raises(NlError) as cm: + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hkey": None}) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey') + + # Set key to random + mod = random.randbytes(len(dflt["hkey"])) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hkey": mod}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(rss.get("hkey", [-1]), mod) + + # Set key to random and indir tbl to something at once + mod = random.randbytes(len(dflt["hkey"])) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [0, 1], "hkey": mod}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(rss.get("hkey", [-1]), mod) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + +def test_rxfh_fields(cfg): + """ + Test reading Rx Flow Hash over Netlink. + """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + ethnl = EthtoolFamily() + + cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + one = _ethtool_get_cfg(cfg, fl_type, to_nl=True) + ksft_eq(one, cfg_nl["flow-hash"][fl_type], + comment="Config for " + fl_type) + + +def test_rxfh_fields_set(cfg): + """ Test configuring Rx Flow Hash over Netlink. """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + + # Collect current settings + cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # symmetric hashing is config-order-sensitive make sure we leave + # symmetric mode, or make the flow-hash sym-compatible first + changes = [{"flow-hash": cfg_old["flow-hash"],}, + {"input-xfrm": cfg_old.get("input-xfrm", {}),}] + if cfg_old.get("input-xfrm"): + changes = list(reversed(changes)) + for old in changes: + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old) + + # symmetric hashing prevents some of the configs below + if cfg_old.get("input-xfrm"): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": {}}) + + for fl_type in flow_types: + cur = _ethtool_get_cfg(cfg, fl_type) + if cur == "sdfn": + change_nl = {"ip-src", "ip-dst"} + change_ic = "sd" + else: + change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"} + change_ic = "sdfn" + + cfg.ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {fl_type: change_nl} + }) + reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} " + f"rx-flow-hash {fl_type} {cur}") + + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type], + comment=f"Config for {fl_type} over Netlink") + cfg_ic = _ethtool_get_cfg(cfg, fl_type) + ksft_eq(change_ic, cfg_ic, + comment=f"Config for {fl_type} over IOCTL") + + reset.exec() + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type], + comment=f"Un-config for {fl_type} over Netlink") + cfg_ic = _ethtool_get_cfg(cfg, fl_type) + ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL") + + # Try to set multiple at once, the defer was already installed at the start + change = {"ip-src"} + if change == cfg_old["flow-hash"]["tcp4"]: + change = {"ip-dst"} + cfg.ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {x: change for x in flow_types} + }) + + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + ksft_eq(change, cfg_nl["flow-hash"][fl_type], + comment=f"multi-config for {fl_type} over Netlink") + + +def test_rxfh_fields_set_xfrm(cfg): + """ Test changing Rx Flow Hash vs xfrm_input at once. """ + + def set_rss(cfg, xfrm, fh): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": xfrm, "flow-hash": fh}) + + # Install the reset handler + cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # symmetric hashing is config-order-sensitive make sure we leave + # symmetric mode, or make the flow-hash sym-compatible first + changes = [{"flow-hash": cfg_old["flow-hash"],}, + {"input-xfrm": cfg_old.get("input-xfrm", {}),}] + if cfg_old.get("input-xfrm"): + changes = list(reversed(changes)) + for old in changes: + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old) + + # Make sure we start with input-xfrm off, and tcp4 config non-sym + set_rss(cfg, {}, {}) + set_rss(cfg, {}, {"tcp4": {"ip-src"}}) + + # Setting sym and fixing tcp4 config not expected to pass right now + with ksft_raises(NlError): + set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}}) + # One at a time should work, hopefully + set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}}) + no_support = False + try: + set_rss(cfg, {"sym-xor"}, {}) + except NlError: + try: + set_rss(cfg, {"sym-or-xor"}, {}) + except NlError: + no_support = True + if no_support: + raise KsftSkipEx("no input-xfrm supported") + # Disabling two at once should not work either without kernel changes + with ksft_raises(NlError): + set_rss(cfg, {}, {"tcp4": {"ip-src"}}) + + +def test_rxfh_fields_ntf(cfg): + """ Test Rx Flow Hash notifications. """ + + cur = _ethtool_get_cfg(cfg, "tcp4") + if cur == "sdfn": + change = {"ip-src", "ip-dst"} + else: + change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"} + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {"tcp4": change} + }) + reset = defer(ethtool, + f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after IOCTL change") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change) + ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) + + reset.exec() + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after Netlink change") + ksft_eq(ntf["name"], "rss-ntf") + ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change) + ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) + + +def test_rss_ctx_add(cfg): + """ Test creating an additional RSS context via Netlink """ + + _require_2qs(cfg) + + # Test basic creation + ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}}) + d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete") + ksft_ne(ctx.get("context", 0), 0) + ksft_ne(set(ctx.get("indir", [0])), {0}, + comment="Driver should init the indirection table") + + # Try requesting the ID we just got allocated + with ksft_raises(NlError) as cm: + ctx = cfg.ethnl.rss_create_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx.get("context"), + }) + ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete") + d.exec() + ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY) + + # Test creating with a specified RSS table, and context ID + ctx_id = ctx.get("context") + ctx = cfg.ethnl.rss_create_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx_id, + "indir": [1], + }) + ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete") + ksft_eq(ctx.get("context"), ctx_id) + ksft_eq(set(ctx.get("indir", [0])), {1}) + + +def test_rss_ctx_ntf(cfg): + """ Test notifications for creating additional RSS contexts """ + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + # Create / delete via Netlink + ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}}) + cfg.ethnl.rss_delete_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx["context"], + }) + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[NL] No notification after context creation") + ksft_eq(ntf["name"], "rss-create-ntf") + ksft_eq(ctx, ntf["msg"]) + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[NL] No notification after context deletion") + ksft_eq(ntf["name"], "rss-delete-ntf") + + # Create / deleve via IOCTL + ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new") + ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete") + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[IOCTL] No notification after context creation") + ksft_eq(ntf["name"], "rss-create-ntf") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[IOCTL] No notification after context deletion") + ksft_eq(ntf["name"], "rss-delete-ntf") + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py new file mode 100755 index 000000000000..ed7e405682f0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -0,0 +1,832 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import datetime +import random +import re +from lib.py import ksft_run, ksft_pr, ksft_exit +from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises +from lib.py import NetDrvEpEnv +from lib.py import EthtoolFamily, NetdevFamily +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import rand_port +from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure + + +def _rss_key_str(key): + return ":".join(["{:02x}".format(x) for x in key]) + + +def _rss_key_rand(length): + return [random.randint(0, 255) for _ in range(length)] + + +def _rss_key_check(cfg, data=None, context=0): + if data is None: + data = get_rss(cfg, context=context) + if 'rss-hash-key' not in data: + return + non_zero = [x for x in data['rss-hash-key'] if x != 0] + ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}") + + +def get_rss(cfg, context=0): + return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0] + + +def get_drop_err_sum(cfg): + stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0] + cnt = 0 + for key in ['errors', 'dropped', 'over_errors', 'fifo_errors', + 'length_errors', 'crc_errors', 'missed_errors', + 'frame_errors']: + cnt += stats["stats64"]["rx"][key] + return cnt, stats["stats64"]["tx"]["carrier_changes"] + + +def ethtool_create(cfg, act, opts): + output = ethtool(f"{act} {cfg.ifname} {opts}").stdout + # Output will be something like: "New RSS context is 1" or + # "Added rule with ID 7", we want the integer from the end + return int(output.split()[-1]) + + +def require_ntuple(cfg): + features = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not features["ntuple-filters"]["active"]: + # ntuple is more of a capability than a config knob, don't bother + # trying to enable it (until some driver actually needs it). + raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"])) + + +def require_context_cnt(cfg, need_cnt): + # There's no good API to get the context count, so the tests + # which try to add a lot opportunisitically set the count they + # discovered. Careful with test ordering! + if need_cnt and cfg.context_cnt and cfg.context_cnt < need_cnt: + raise KsftSkipEx(f"Test requires at least {need_cnt} contexts, but device only has {cfg.context_cnt}") + + +# Get Rx packet counts for all queues, as a simple list of integers +# if @prev is specified the prev counts will be subtracted +def _get_rx_cnts(cfg, prev=None): + cfg.wait_hw_stats_settle() + data = cfg.netdevnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, dump=True) + data = [x for x in data if x['queue-type'] == "rx"] + max_q = max([x["queue-id"] for x in data]) + queue_stats = [0] * (max_q + 1) + for q in data: + queue_stats[q["queue-id"]] = q["rx-packets"] + if prev and q["queue-id"] < len(prev): + queue_stats[q["queue-id"]] -= prev[q["queue-id"]] + return queue_stats + + +def _send_traffic_check(cfg, port, name, params): + # params is a dict with 3 possible keys: + # - "target": required, which queues we expect to get iperf traffic + # - "empty": optional, which queues should see no traffic at all + # - "noise": optional, which queues we expect to see low traffic; + # used for queues of the main context, since some background + # OS activity may use those queues while we're testing + # the value for each is a list, or some other iterable containing queue ids. + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + directed = sum(cnts[i] for i in params['target']) + + ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts)) + if params.get('noise'): + ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2, + f"traffic on other queues ({name})':" + str(cnts)) + if params.get('empty'): + ksft_eq(sum(cnts[i] for i in params['empty']), 0, + f"traffic on inactive queues ({name}): " + str(cnts)) + + +def _ntuple_rule_check(cfg, rule_id, ctx_id): + """Check that ntuple rule references RSS context ID""" + text = ethtool(f"-n {cfg.ifname} rule {rule_id}").stdout + pattern = f"RSS Context (ID: )?{ctx_id}" + ksft_true(re.search(pattern, text), "RSS context not referenced in ntuple rule") + + +def test_rss_key_indir(cfg): + """Test basics like updating the main RSS key and indirection table.""" + + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 3: + raise KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") + + data = get_rss(cfg) + want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] + for k in want_keys: + if k not in data: + raise KsftFailEx("ethtool results missing key: " + k) + if not data[k]: + raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}") + + _rss_key_check(cfg, data=data) + key_len = len(data['rss-hash-key']) + + # Set the key + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key)) + + data = get_rss(cfg) + ksft_eq(key, data['rss-hash-key']) + + # Set the indirection table and the key together + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key)) + reset_indir = defer(ethtool, f"-X {cfg.ifname} default") + + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(2, max(data['rss-indirection-table'])) + + # Reset indirection table and set the key + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key)) + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(qcnt - 1, max(data['rss-indirection-table'])) + + # Set the indirection table + ethtool(f"-X {cfg.ifname} equal 2") + data = get_rss(cfg) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(1, max(data['rss-indirection-table'])) + + # Check we only get traffic on the first 2 queues + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + # 2 queues, 20k packets, must be at least 5k per queue + ksft_ge(cnts[0], 5000, "traffic on main context (1/2): " + str(cnts)) + ksft_ge(cnts[1], 5000, "traffic on main context (2/2): " + str(cnts)) + # The other queues should be unused + ksft_eq(sum(cnts[2:]), 0, "traffic on unused queues: " + str(cnts)) + + # Restore, and check traffic gets spread again + reset_indir.exec() + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + if qcnt > 4: + # First two queues get less traffic than all the rest + ksft_lt(sum(cnts[:2]), sum(cnts[2:]), + "traffic distributed: " + str(cnts)) + else: + # When queue count is low make sure third queue got significant pkts + ksft_ge(cnts[2], 3500, "traffic distributed: " + str(cnts)) + + +def test_rss_queue_reconfigure(cfg, main_ctx=True): + """Make sure queue changes can't override requested RSS config. + + By default main RSS table should change to include all queues. + When user sets a specific RSS config the driver should preserve it, + even when queue count changes. Driver should refuse to deactivate + queues used in the user-set RSS config. + """ + + if not main_ctx: + require_ntuple(cfg) + + # Start with 4 queues, an arbitrary known number. + try: + qcnt = len(_get_rx_cnts(cfg)) + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except: + raise KsftSkipEx("Not enough queues for the test or qstat not supported") + + if main_ctx: + ctx_id = 0 + ctx_ref = "" + else: + ctx_id = ethtool_create(cfg, "-X", "context new") + ctx_ref = f"context {ctx_id}" + defer(ethtool, f"-X {cfg.ifname} {ctx_ref} delete") + + # Indirection table should be distributing to all queues. + data = get_rss(cfg, context=ctx_id) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(3, max(data['rss-indirection-table'])) + + # Increase queues, indirection table should be distributing to all queues. + # It's unclear whether tables of additional contexts should be reset, too. + if main_ctx: + ethtool(f"-L {cfg.ifname} combined 5") + data = get_rss(cfg) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(4, max(data['rss-indirection-table'])) + ethtool(f"-L {cfg.ifname} combined 4") + + # Configure the table explicitly + port = rand_port() + ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 0 1") + if main_ctx: + other_key = 'empty' + defer(ethtool, f"-X {cfg.ifname} default") + else: + other_key = 'noise' + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" + ntuple = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + + _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3), + other_key: (1, 2) }) + + # We should be able to increase queues, but table should be left untouched + ethtool(f"-L {cfg.ifname} combined 5") + data = get_rss(cfg, context=ctx_id) + ksft_eq({0, 3}, set(data['rss-indirection-table'])) + + _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3), + other_key: (1, 2, 4) }) + + # Setting queue count to 3 should fail, queue 3 is used + try: + ethtool(f"-L {cfg.ifname} combined 3") + except CmdExitFailure: + pass + else: + raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})") + + if not main_ctx: + ethtool(f"-L {cfg.ifname} combined 4") + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 1" + try: + # this targets queue 4, which doesn't exist + ntuple2 = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") + except CmdExitFailure: + pass + else: + raise Exception(f"Driver didn't prevent us from targeting a nonexistent queue (context {ctx_id})") + # change the table to target queues 0 and 2 + ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0") + # ntuple rule therefore targets queues 1 and 3 + try: + ntuple2 = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + ksft_pr("Driver does not support rss + queue offset") + return + + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") + # should replace existing filter + ksft_eq(ntuple, ntuple2) + _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3), + 'noise' : (0, 2) }) + # Setting queue count to 3 should fail, queue 3 is used + try: + ethtool(f"-L {cfg.ifname} combined 3") + except CmdExitFailure: + pass + else: + raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})") + + +def test_rss_resize(cfg): + """Test resizing of the RSS table. + + Some devices dynamically increase and decrease the size of the RSS + indirection table based on the number of enabled queues. + When that happens driver must maintain the balance of entries + (preferably duplicating the smaller table). + """ + + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels['combined-max'] + qcnt = channels['combined-count'] + + if ch_max < 2: + raise KsftSkipEx(f"Not enough queues for the test: {ch_max}") + + ethtool(f"-L {cfg.ifname} combined 2") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + ethtool(f"-X {cfg.ifname} weight 1 7") + defer(ethtool, f"-X {cfg.ifname} default") + + ethtool(f"-L {cfg.ifname} combined {ch_max}") + data = get_rss(cfg) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(1, max(data['rss-indirection-table'])) + + ksft_eq(7, + data['rss-indirection-table'].count(1) / + data['rss-indirection-table'].count(0), + f"Table imbalance after resize: {data['rss-indirection-table']}") + + +def test_hitless_key_update(cfg): + """Test that flows may be rehashed without impacting traffic. + + Some workloads may want to rehash the flows in response to an imbalance. + Most effective way to do that is changing the RSS key. Check that changing + the key does not cause link flaps or traffic disruption. + + Disrupting traffic for key update is not a bug, but makes the key + update unusable for rehashing under load. + """ + data = get_rss(cfg) + key_len = len(data['rss-hash-key']) + + ethnl = EthtoolFamily() + key = random.randbytes(key_len) + + tgen = GenerateTraffic(cfg) + try: + errors0, carrier0 = get_drop_err_sum(cfg) + t0 = datetime.datetime.now() + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key}) + t1 = datetime.datetime.now() + errors1, carrier1 = get_drop_err_sum(cfg) + finally: + tgen.wait_pkts_and_stop(5000) + + ksft_lt((t1 - t0).total_seconds(), 0.15) + ksft_eq(errors1 - errors1, 0) + ksft_eq(carrier1 - carrier0, 0) + + +def test_rss_context_dump(cfg): + """ + Test dumping RSS contexts. This tests mostly exercises the kernel APIs. + """ + + # Get a random key of the right size + data = get_rss(cfg) + if 'rss-hash-key' in data: + key_data = _rss_key_rand(len(data['rss-hash-key'])) + key = _rss_key_str(key_data) + else: + key_data = [] + key = "ba:ad" + + ids = [] + try: + ids.append(ethtool_create(cfg, "-X", f"context new")) + defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete") + + ids.append(ethtool_create(cfg, "-X", f"context new weight 1 1")) + defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete") + + ids.append(ethtool_create(cfg, "-X", f"context new hkey {key}")) + defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete") + except CmdExitFailure: + if not ids: + raise KsftSkipEx("Unable to add any contexts") + ksft_pr(f"Added only {len(ids)} out of 3 contexts") + + expect_tuples = set([(cfg.ifname, -1)] + [(cfg.ifname, ctx_id) for ctx_id in ids]) + + # Dump all + ctxs = cfg.ethnl.rss_get({}, dump=True) + tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs] + ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump") + ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname]) + ksft_eq(expect_tuples, ctx_tuples) + + # Sanity-check the results + for data in ctxs: + ksft_ne(set(data.get('indir', [1])), {0}, "indir table is all zero") + ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero") + + # More specific checks + if len(ids) > 1 and data.get('context') == ids[1]: + ksft_eq(set(data['indir']), {0, 1}, + "ctx1 - indir table mismatch") + if len(ids) > 2 and data.get('context') == ids[2]: + ksft_eq(data['hkey'], bytes(key_data), "ctx2 - key mismatch") + + # Ifindex filter + ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True) + tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs] + ctx_tuples = set(tuples) + ksft_eq(len(tuples), len(ctx_tuples), "duplicates in context dump") + ksft_eq(expect_tuples, ctx_tuples) + + # Skip ctx 0 + expect_tuples.remove((cfg.ifname, -1)) + + ctxs = cfg.ethnl.rss_get({'start-context': 1}, dump=True) + tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs] + ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump") + ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname]) + ksft_eq(expect_tuples, ctx_tuples) + + # And finally both with ifindex and skip main + ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}, 'start-context': 1}, dump=True) + ctx_tuples = set([(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]) + ksft_eq(expect_tuples, ctx_tuples) + + +def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): + """ + Test separating traffic into RSS contexts. + The queues will be allocated 2 for each context: + ctx0 ctx1 ctx2 ctx3 + [0 1] [2 3] [4 5] [6 7] ... + """ + + require_ntuple(cfg) + + requested_ctx_cnt = ctx_cnt + + # Try to allocate more queues when necessary + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 2 + 2 * ctx_cnt: + try: + ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}") + ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + ports = [] + + # Use queues 0 and 1 for normal traffic + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + for i in range(ctx_cnt): + want_cfg = f"start {2 + i * 2} equal 2" + create_cfg = want_cfg if create_with_cfg else "" + + try: + ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + except CmdExitFailure: + # try to carry on and skip at the end + if i == 0: + raise + ksft_pr(f"Failed to create context {i + 1}, trying to test what we got") + ctx_cnt = i + if cfg.context_cnt is None: + cfg.context_cnt = ctx_cnt + break + + _rss_key_check(cfg, context=ctx_id) + + if not create_with_cfg: + ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}") + _rss_key_check(cfg, context=ctx_id) + + # Sanity check the context we just created + data = get_rss(cfg, ctx_id) + ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data)) + ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data)) + + ports.append(rand_port()) + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}" + ntuple = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + + _ntuple_rule_check(cfg, ntuple, ctx_id) + + for i in range(ctx_cnt): + _send_traffic_check(cfg, ports[i], f"context {i}", + { 'target': (2+i*2, 3+i*2), + 'noise': (0, 1), + 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) }) + + if requested_ctx_cnt != ctx_cnt: + raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") + + +def test_rss_context4(cfg): + test_rss_context(cfg, 4) + + +def test_rss_context32(cfg): + test_rss_context(cfg, 32) + + +def test_rss_context4_create_with_cfg(cfg): + test_rss_context(cfg, 4, create_with_cfg=True) + + +def test_rss_context_queue_reconfigure(cfg): + test_rss_queue_reconfigure(cfg, main_ctx=False) + + +def test_rss_context_out_of_order(cfg, ctx_cnt=4): + """ + Test separating traffic into RSS contexts. + Contexts are removed in semi-random order, and steering re-tested + to make sure removal doesn't break steering to surviving contexts. + Test requires 3 contexts to work. + """ + + require_ntuple(cfg) + require_context_cnt(cfg, 4) + + # Try to allocate more queues when necessary + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 2 + 2 * ctx_cnt: + try: + ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}") + ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + ntuple = [] + ctx = [] + ports = [] + + def remove_ctx(idx): + ntuple[idx].exec() + ntuple[idx] = None + ctx[idx].exec() + ctx[idx] = None + + def check_traffic(): + for i in range(ctx_cnt): + if ctx[i]: + expected = { + 'target': (2+i*2, 3+i*2), + 'noise': (0, 1), + 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) + } + else: + expected = { + 'target': (0, 1), + 'empty': range(2, 2+2*ctx_cnt) + } + + _send_traffic_check(cfg, ports[i], f"context {i}", expected) + + # Use queues 0 and 1 for normal traffic + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + for i in range(ctx_cnt): + ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2") + ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")) + + ports.append(rand_port()) + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")) + + check_traffic() + + # Remove middle context + remove_ctx(ctx_cnt // 2) + check_traffic() + + # Remove first context + remove_ctx(0) + check_traffic() + + # Remove last context + remove_ctx(-1) + check_traffic() + + +def test_rss_context_overlap(cfg, other_ctx=0): + """ + Test contexts overlapping with each other. + Use 4 queues for the main context, but only queues 2 and 3 for context 1. + """ + + require_ntuple(cfg) + if other_ctx: + require_context_cnt(cfg, 2) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + if other_ctx == 0: + ethtool(f"-X {cfg.ifname} equal 4") + defer(ethtool, f"-X {cfg.ifname} default") + else: + other_ctx = ethtool_create(cfg, "-X", "context new") + ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4") + defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete") + + ctx_id = ethtool_create(cfg, "-X", "context new") + ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + port = rand_port() + if other_ctx: + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {other_ctx}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + # Test the main context + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts)) + ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts)) + ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts)) + if other_ctx == 0: + ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) + + # Now create a rule for context 1 and make sure traffic goes to a subset + if other_ctx: + ntuple.exec() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + directed = sum(cnts[2:4]) + ksft_lt(sum(cnts[ :2]), directed / 2, "traffic on main context: " + str(cnts)) + ksft_ge(directed, 20000, "traffic on extra context: " + str(cnts)) + if other_ctx == 0: + ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) + + +def test_rss_context_overlap2(cfg): + test_rss_context_overlap(cfg, True) + + +def test_flow_add_context_missing(cfg): + """ + Test that we are not allowed to add a rule pointing to an RSS context + which was never created. + """ + + require_ntuple(cfg) + + # Find a context which doesn't exist + for ctx_id in range(1, 100): + try: + get_rss(cfg, context=ctx_id) + except CmdExitFailure: + break + + with ksft_raises(CmdExitFailure) as cm: + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port 1234 context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ethtool(f"-N {cfg.ifname} delete {ntuple_id}") + if cm.exception: + ksft_in('Invalid argument', cm.exception.cmd.stderr) + + +def test_delete_rss_context_busy(cfg): + """ + Test that deletion returns -EBUSY when an rss context is being used + by an ntuple filter. + """ + + require_ntuple(cfg) + + # create additional rss context + ctx_id = ethtool_create(cfg, "-X", "context new") + ctx_deleter = defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # utilize context from ntuple filter + port = rand_port() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + # attempt to delete in-use context + try: + ctx_deleter.exec_only() + ctx_deleter.cancel() + raise KsftFailEx(f"deleted context {ctx_id} used by rule {ntuple_id}") + except CmdExitFailure: + pass + + +def test_rss_ntuple_addition(cfg): + """ + Test that the queue offset (ring_cookie) of an ntuple rule is added + to the queue number read from the indirection table. + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + # Use queue 0 for normal traffic + ethtool(f"-X {cfg.ifname} equal 1") + defer(ethtool, f"-X {cfg.ifname} default") + + # create additional rss context + ctx_id = ethtool_create(cfg, "-X", "context new equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # utilize context from ntuple filter + port = rand_port() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 2" + try: + ntuple_id = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + raise KsftSkipEx("Ntuple filter with RSS and nonzero action not supported") + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + _send_traffic_check(cfg, port, f"context {ctx_id}", { 'target': (2, 3), + 'empty' : (1,), + 'noise' : (0,) }) + + +def test_rss_default_context_rule(cfg): + """ + Allocate a port, direct this port to context 0, then create a new RSS + context and steer all TCP traffic to it (context 1). Verify that: + * Traffic to the specific port continues to use queues of the main + context (0/1). + * Traffic to any other TCP port is redirected to the new context + (queues 2/3). + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except Exception as exc: + raise KsftSkipEx("Not enough queues for the test") from exc + + # Use queues 0 and 1 for the main context + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + # Create a new RSS context that uses queues 2 and 3 + ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # Generic low-priority rule: redirect all TCP traffic to the new context. + # Give it an explicit higher location number (lower priority). + flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1" + ethtool(f"-N {cfg.ifname} {flow_generic}") + defer(ethtool, f"-N {cfg.ifname} delete 1") + + # Specific high-priority rule for a random port that should stay on context 0. + # Assign loc 0 so it is evaluated before the generic rule. + port_main = rand_port() + flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0" + ethtool(f"-N {cfg.ifname} {flow_main}") + defer(ethtool, f"-N {cfg.ifname} delete 0") + + _ntuple_rule_check(cfg, 1, ctx_id) + + # Verify that traffic matching the specific rule still goes to queues 0/1 + _send_traffic_check(cfg, port_main, "context 0", + { 'target': (0, 1), + 'empty' : (2, 3) }) + + # And that traffic for any other port is steered to the new context + port_other = rand_port() + _send_traffic_check(cfg, port_other, f"context {ctx_id}", + { 'target': (2, 3), + 'noise' : (0, 1) }) + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.context_cnt = None + cfg.ethnl = EthtoolFamily() + cfg.netdevnl = NetdevFamily() + + ksft_run([test_rss_key_indir, test_rss_queue_reconfigure, + test_rss_resize, test_hitless_key_update, + test_rss_context, test_rss_context4, test_rss_context32, + test_rss_context_dump, test_rss_context_queue_reconfigure, + test_rss_context_overlap, test_rss_context_overlap2, + test_rss_context_out_of_order, test_rss_context4_create_with_cfg, + test_flow_add_context_missing, + test_delete_rss_context_busy, test_rss_ntuple_addition, + test_rss_default_context_rule], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py new file mode 100755 index 000000000000..6fa95fe27c47 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests for RSS hashing on IPv6 Flow Label. +""" + +import glob +import os +import socket +from lib.py import CmdExitFailure +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \ + ksft_not_in, ksft_raises, KsftSkipEx +from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port +from lib.py import NetDrvEpEnv + + +def _check_system(cfg): + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + + for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt", + f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]: + try: + with open(f, 'r') as fp: + setting = fp.read().strip() + # CPU mask will be zeros and commas + if setting.replace("0", "").replace(",", ""): + raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}") + except FileNotFoundError: + pass + + # 1 is the default, if someone changed it we probably shouldn"t mess with it + af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout + if af.strip() != "1": + raise KsftSkipEx("Remote does not have auto_flowlabels enabled") + + +def _ethtool_get_cfg(cfg, fl_type): + descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + "IPv6 Flow Label": "l", + } + + ret = "" + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + ret += converter[line] + return ret + + +def _traffic(cfg, one_sock, one_cpu): + local_port = rand_port(socket.SOCK_DGRAM) + remote_port = rand_port(socket.SOCK_DGRAM) + + sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + sock.bind(("", local_port)) + sock.connect((cfg.remote_addr_v["6"], 0)) + if one_sock: + send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \ + "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-" + else: + send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \ + f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done" + + cpus = set() + with bkg(send, shell=True, host=cfg.remote, exit_wait=True): + for _ in range(20): + fd_read_timeout(sock.fileno(), 1) + cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) + cpus.add(cpu) + + if one_cpu: + ksft_eq(len(cpus), 1, + f"{one_sock=} - expected one CPU, got traffic on: {cpus=}") + else: + ksft_ge(len(cpus), 2, + f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}") + + +def test_rss_flow_label(cfg): + """ + Test hashing on IPv6 flow label. Send traffic over a single socket + and over multiple sockets. Depend on the remote having auto-label + enabled so that it randomizes the label per socket. + """ + + cfg.require_ipver("6") + cfg.require_cmd("socat", remote=True) + _check_system(cfg) + + # Enable flow label hashing for UDP6 + initial = _ethtool_get_cfg(cfg, "udp6") + no_lbl = initial.replace("l", "") + if "l" not in initial: + try: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}") + except CmdExitFailure as exc: + raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc + + defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _traffic(cfg, one_sock=True, one_cpu=True) + _traffic(cfg, one_sock=False, one_cpu=False) + + # Disable it, we should see no hashing (reset was already defer()ed) + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}") + + _traffic(cfg, one_sock=False, one_cpu=True) + + +def _check_v4_flow_types(cfg): + for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]: + try: + cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + ksft_not_in("Flow Label", cur, + comment=f"{fl_type=} has Flow Label:" + cur) + except CmdExitFailure: + # Probably does not support this flow type + pass + + +def test_rss_flow_label_6only(cfg): + """ + Test interactions with IPv4 flow types. It should not be possible to set + IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also + not appear in the IPv4 "current config". + """ + + with ksft_raises(CmdExitFailure) as cm: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl") + ksft_in("Invalid argument", cm.exception.cmd.stderr) + + _check_v4_flow_types(cfg) + + # Try to enable Flow Labels and check again, in case it leaks thru + initial = _ethtool_get_cfg(cfg, "udp6") + changed = initial.replace("l", "") if "l" in initial else initial + "l" + + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}") + restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _check_v4_flow_types(cfg) + restore.exec() + _check_v4_flow_types(cfg) + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + ksft_run([test_rss_flow_label, + test_rss_flow_label_6only], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py new file mode 100755 index 000000000000..72880e388478 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import multiprocessing +import socket +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout +from lib.py import NetDrvEpEnv +from lib.py import EthtoolFamily, NetdevFamily +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import rand_port + + +def traffic(cfg, local_port, remote_port, ipver): + af_inet = socket.AF_INET if ipver == "4" else socket.AF_INET6 + sock = socket.socket(af_inet, socket.SOCK_DGRAM) + sock.bind(("", local_port)) + sock.connect((cfg.remote_addr_v[ipver], remote_port)) + tgt = f"{ipver}:[{cfg.addr_v[ipver]}]:{local_port},sourceport={remote_port}" + cmd("echo a | socat - UDP" + tgt, host=cfg.remote) + fd_read_timeout(sock.fileno(), 5) + return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) + + +def test_rss_input_xfrm(cfg, ipver): + """ + Test symmetric input_xfrm. + If symmetric RSS hash is configured, send traffic twice, swapping the + src/dst UDP ports, and verify that the same queue is receiving the traffic + in both cases (IPs are constant). + """ + + if multiprocessing.cpu_count() < 2: + raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash") + + cfg.require_cmd("socat", local=False, remote=True) + + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + + rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}) + input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {}))) + + # Check for symmetric xor/or-xor + if not input_xfrm: + raise KsftSkipEx("Symmetric RSS hash not requested") + + cpus = set() + successful = 0 + for _ in range(100): + try: + port1 = rand_port(socket.SOCK_DGRAM) + port2 = rand_port(socket.SOCK_DGRAM) + cpu1 = traffic(cfg, port1, port2, ipver) + cpu2 = traffic(cfg, port2, port1, ipver) + cpus.update([cpu1, cpu2]) + ksft_eq( + cpu1, cpu2, comment=f"Received traffic on different cpus with ports ({port1 = }, {port2 = }) while symmetric hash is configured") + + successful += 1 + if successful == 10: + break + except: + continue + else: + raise KsftFailEx("Failed to run traffic") + + ksft_ge(len(cpus), 2, + comment=f"Received traffic on less than two cpus {cpus = }") + + +def test_rss_input_xfrm_ipv4(cfg): + cfg.require_ipver("4") + test_rss_input_xfrm(cfg, "4") + + +def test_rss_input_xfrm_ipv6(cfg): + cfg.require_ipver("6") + test_rss_input_xfrm(cfg, "6") + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netdevnl = NetdevFamily() + + ksft_run([test_rss_input_xfrm_ipv4, test_rss_input_xfrm_ipv6], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/settings b/tools/testing/selftests/drivers/net/hw/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c new file mode 100644 index 000000000000..d23b3b0c20a3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Toeplitz test + * + * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3 + * 2. Compute the rx_hash in software based on the packet contents + * 3. Compare the two + * + * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given. + * + * If '-C $rx_irq_cpu_list' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the rxqueue that RSS would select based on this rx_hash + * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq + * 7. Compare the cpus from 4 and 6 + * + * Else if '-r $rps_bitmap' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap + * 6. Compare the cpus from 4 and 5 + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <net/if.h> +#include <netdb.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysinfo.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#include <ynl.h> +#include "ethtool-user.h" + +#include "kselftest.h" +#include "../../../net/lib/ksft.h" + +#define TOEPLITZ_KEY_MIN_LEN 40 +#define TOEPLITZ_KEY_MAX_LEN 60 + +#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */ +#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN) +#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN) + +#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2)) + +#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */ +#define RSS_MAX_INDIR (1 << 16) + +#define RPS_MAX_CPUS 16UL /* must be a power of 2 */ + +/* configuration options (cmdline arguments) */ +static uint16_t cfg_dport = 8000; +static int cfg_family = AF_INET6; +static char *cfg_ifname = "eth0"; +static int cfg_num_queues; +static int cfg_num_rps_cpus; +static bool cfg_sink; +static int cfg_type = SOCK_STREAM; +static int cfg_timeout_msec = 1000; +static bool cfg_verbose; + +/* global vars */ +static int num_cpus; +static int ring_block_nr; +static int ring_block_sz; + +/* stats */ +static int frames_received; +static int frames_nohash; +static int frames_error; + +#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0) + +/* tpacket ring */ +struct ring_state { + int fd; + char *mmap; + int idx; + int cpu; +}; + +static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */ +static int rps_silo_to_cpu[RPS_MAX_CPUS]; +static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN]; +static unsigned int rss_indir_tbl[RSS_MAX_INDIR]; +static unsigned int rss_indir_tbl_size; +static struct ring_state rings[RSS_MAX_CPUS]; + +static inline uint32_t toeplitz(const unsigned char *four_tuple, + const unsigned char *key) +{ + int i, bit, ret = 0; + uint32_t key32; + + key32 = ntohl(*((uint32_t *)key)); + key += 4; + + for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) { + for (bit = 7; bit >= 0; bit--) { + if (four_tuple[i] & (1 << bit)) + ret ^= key32; + + key32 <<= 1; + key32 |= !!(key[0] & (1 << bit)); + } + key++; + } + + return ret; +} + +/* Compare computed cpu with arrival cpu from packet_fanout_cpu */ +static void verify_rss(uint32_t rx_hash, int cpu) +{ + int queue; + + if (rss_indir_tbl_size) + queue = rss_indir_tbl[rx_hash % rss_indir_tbl_size]; + else + queue = rx_hash % cfg_num_queues; + + log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]); + if (rx_irq_cpus[queue] != cpu) { + log_verbose(". error: rss cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void verify_rps(uint64_t rx_hash, int cpu) +{ + int silo = (rx_hash * cfg_num_rps_cpus) >> 32; + + log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]); + if (rps_silo_to_cpu[silo] != cpu) { + log_verbose(". error: rps cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void log_rxhash(int cpu, uint32_t rx_hash, + const char *addrs, int addr_len) +{ + char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN]; + uint16_t *ports; + + if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) || + !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr))) + error(1, 0, "address parse error"); + + ports = (void *)addrs + (addr_len * 2); + log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]", + cpu, rx_hash, saddr, daddr, + ntohs(ports[0]), ntohs(ports[1])); +} + +/* Compare computed rxhash with rxhash received from tpacket_v3 */ +static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu) +{ + unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0}; + uint32_t rx_hash_sw; + const char *addrs; + int addr_len; + + if (cfg_family == AF_INET) { + addr_len = sizeof(struct in_addr); + addrs = pkt + offsetof(struct iphdr, saddr); + } else { + addr_len = sizeof(struct in6_addr); + addrs = pkt + offsetof(struct ip6_hdr, ip6_src); + } + + memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2)); + rx_hash_sw = toeplitz(four_tuple, toeplitz_key); + + if (cfg_verbose) + log_rxhash(cpu, rx_hash, addrs, addr_len); + + if (rx_hash != rx_hash_sw) { + log_verbose(" != expected 0x%x\n", rx_hash_sw); + frames_error++; + return; + } + + log_verbose(" OK"); + if (cfg_num_queues) + verify_rss(rx_hash, cpu); + else if (cfg_num_rps_cpus) + verify_rps(rx_hash, cpu); + log_verbose("\n"); +} + +static char *recv_frame(const struct ring_state *ring, char *frame) +{ + struct tpacket3_hdr *hdr = (void *)frame; + + if (hdr->hv1.tp_rxhash) + verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash, + ring->cpu); + else + frames_nohash++; + + return frame + hdr->tp_next_offset; +} + +/* A single TPACKET_V3 block can hold multiple frames */ +static bool recv_block(struct ring_state *ring) +{ + struct tpacket_block_desc *block; + char *frame; + int i; + + block = (void *)(ring->mmap + ring->idx * ring_block_sz); + if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) + return false; + + frame = (char *)block; + frame += block->hdr.bh1.offset_to_first_pkt; + + for (i = 0; i < block->hdr.bh1.num_pkts; i++) { + frame = recv_frame(ring, frame); + frames_received++; + } + + block->hdr.bh1.block_status = TP_STATUS_KERNEL; + ring->idx = (ring->idx + 1) % ring_block_nr; + + return true; +} + +/* simple test: sleep once unconditionally and then process all rings */ +static void process_rings(void) +{ + int i; + + usleep(1000 * cfg_timeout_msec); + + for (i = 0; i < num_cpus; i++) + do {} while (recv_block(&rings[i])); + + fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", + frames_received - frames_nohash - frames_error, + frames_nohash, frames_error); +} + +static char *setup_ring(int fd) +{ + struct tpacket_req3 req3 = {0}; + void *ring; + + req3.tp_retire_blk_tov = cfg_timeout_msec / 8; + req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; + + req3.tp_frame_size = 2048; + req3.tp_frame_nr = 1 << 10; + req3.tp_block_nr = 16; + + req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; + req3.tp_block_size /= req3.tp_block_nr; + + if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3))) + error(1, errno, "setsockopt PACKET_RX_RING"); + + ring_block_sz = req3.tp_block_size; + ring_block_nr = req3.tp_block_nr; + + ring = mmap(0, req3.tp_block_size * req3.tp_block_nr, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0); + if (ring == MAP_FAILED) + error(1, 0, "mmap failed"); + + return ring; +} + +static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = ARRAY_SIZE(filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +/* filter on transport protocol and destination port */ +static void set_filter(int fd) +{ + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ + uint8_t proto; + + proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP; + if (cfg_family == AF_INET) + __set_filter(fd, offsetof(struct iphdr, protocol), proto, + sizeof(struct iphdr) + off_dport); + else + __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto, + sizeof(struct ip6_hdr) + off_dport); +} + +/* drop everything: used temporarily during setup */ +static void set_filter_null(int fd) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET + BPF_K, 0), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = ARRAY_SIZE(filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +static int create_ring(char **ring) +{ + struct fanout_args args = { + .id = 1, + .type_flags = PACKET_FANOUT_CPU, + .max_num_members = RSS_MAX_CPUS + }; + struct sockaddr_ll ll = { 0 }; + int fd, val; + + fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket creation failed"); + + val = TPACKET_V3; + if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val))) + error(1, errno, "setsockopt PACKET_VERSION"); + *ring = setup_ring(fd); + + /* block packets until all rings are added to the fanout group: + * else packets can arrive during setup and get misclassified + */ + set_filter_null(fd); + + ll.sll_family = AF_PACKET; + ll.sll_ifindex = if_nametoindex(cfg_ifname); + ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) : + htons(ETH_P_IPV6); + if (bind(fd, (void *)&ll, sizeof(ll))) + error(1, errno, "bind"); + + /* must come after bind: verifies all programs in group match */ + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) { + /* on failure, retry using old API if that is sufficient: + * it has a hard limit of 256 sockets, so only try if + * (a) only testing rxhash, not RSS or (b) <= 256 cpus. + * in this API, the third argument is left implicit. + */ + if (cfg_num_queues || num_cpus > 256 || + setsockopt(fd, SOL_PACKET, PACKET_FANOUT, + &args, sizeof(uint32_t))) + error(1, errno, "setsockopt PACKET_FANOUT cpu"); + } + + return fd; +} + +/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */ +static int setup_sink(void) +{ + int fd, val; + + fd = socket(cfg_family, cfg_type, 0); + if (fd == -1) + error(1, errno, "socket %d.%d", cfg_family, cfg_type); + + val = 1 << 20; + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val))) + error(1, errno, "setsockopt rcvbuf"); + + return fd; +} + +static void setup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + rings[i].cpu = i; + rings[i].fd = create_ring(&rings[i].mmap); + } + + /* accept packets once all rings in the fanout group are up */ + for (i = 0; i < num_cpus; i++) + set_filter(rings[i].fd); +} + +static void cleanup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz)) + error(1, errno, "munmap"); + if (close(rings[i].fd)) + error(1, errno, "close"); + } +} + +static void parse_cpulist(const char *arg) +{ + do { + rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10); + + arg = strchr(arg, ','); + if (!arg) + break; + arg++; // skip ',' + } while (1); +} + +static void show_cpulist(void) +{ + int i; + + for (i = 0; i < cfg_num_queues; i++) + fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]); +} + +static void show_silos(void) +{ + int i; + + for (i = 0; i < cfg_num_rps_cpus; i++) + fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]); +} + +static void parse_toeplitz_key(const char *str, int slen, unsigned char *key) +{ + int i, ret, off; + + if (slen < TOEPLITZ_STR_MIN_LEN || + slen > TOEPLITZ_STR_MAX_LEN + 1) + error(1, 0, "invalid toeplitz key"); + + for (i = 0, off = 0; off < slen; i++, off += 3) { + ret = sscanf(str + off, "%hhx", &key[i]); + if (ret != 1) + error(1, 0, "key parse error at %d off %d len %d", + i, off, slen); + } +} + +static void parse_rps_bitmap(const char *arg) +{ + unsigned long bitmap; + int i; + + bitmap = strtoul(arg, NULL, 0); + + if (bitmap & ~(RPS_MAX_CPUS - 1)) + error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu", + bitmap, RPS_MAX_CPUS - 1); + + for (i = 0; i < RPS_MAX_CPUS; i++) + if (bitmap & 1UL << i) + rps_silo_to_cpu[cfg_num_rps_cpus++] = i; +} + +static void read_rss_dev_info_ynl(void) +{ + struct ethtool_rss_get_req *req; + struct ethtool_rss_get_rsp *rsp; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_ethtool_family, NULL); + if (!ys) + error(1, errno, "ynl_sock_create failed"); + + req = ethtool_rss_get_req_alloc(); + if (!req) + error(1, errno, "ethtool_rss_get_req_alloc failed"); + + ethtool_rss_get_req_set_header_dev_name(req, cfg_ifname); + + rsp = ethtool_rss_get(ys, req); + if (!rsp) + error(1, ys->err.code, "YNL: %s", ys->err.msg); + + if (!rsp->_len.hkey) + error(1, 0, "RSS key not available for %s", cfg_ifname); + + if (rsp->_len.hkey < TOEPLITZ_KEY_MIN_LEN || + rsp->_len.hkey > TOEPLITZ_KEY_MAX_LEN) + error(1, 0, "RSS key length %u out of bounds [%u, %u]", + rsp->_len.hkey, TOEPLITZ_KEY_MIN_LEN, + TOEPLITZ_KEY_MAX_LEN); + + memcpy(toeplitz_key, rsp->hkey, rsp->_len.hkey); + + if (rsp->_count.indir > RSS_MAX_INDIR) + error(1, 0, "RSS indirection table too large (%u > %u)", + rsp->_count.indir, RSS_MAX_INDIR); + + /* If indir table not available we'll fallback to simple modulo math */ + if (rsp->_count.indir) { + memcpy(rss_indir_tbl, rsp->indir, + rsp->_count.indir * sizeof(rss_indir_tbl[0])); + rss_indir_tbl_size = rsp->_count.indir; + + log_verbose("RSS indirection table size: %u\n", + rss_indir_tbl_size); + } + + ethtool_rss_get_rsp_free(rsp); + ethtool_rss_get_req_free(req); + ynl_sock_destroy(ys); +} + +static void parse_opts(int argc, char **argv) +{ + static struct option long_options[] = { + {"dport", required_argument, 0, 'd'}, + {"cpus", required_argument, 0, 'C'}, + {"key", required_argument, 0, 'k'}, + {"iface", required_argument, 0, 'i'}, + {"ipv4", no_argument, 0, '4'}, + {"ipv6", no_argument, 0, '6'}, + {"sink", no_argument, 0, 's'}, + {"tcp", no_argument, 0, 't'}, + {"timeout", required_argument, 0, 'T'}, + {"udp", no_argument, 0, 'u'}, + {"verbose", no_argument, 0, 'v'}, + {"rps", required_argument, 0, 'r'}, + {0, 0, 0, 0} + }; + bool have_toeplitz = false; + int index, c; + + while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) { + switch (c) { + case '4': + cfg_family = AF_INET; + break; + case '6': + cfg_family = AF_INET6; + break; + case 'C': + parse_cpulist(optarg); + break; + case 'd': + cfg_dport = strtol(optarg, NULL, 0); + break; + case 'i': + cfg_ifname = optarg; + break; + case 'k': + parse_toeplitz_key(optarg, strlen(optarg), + toeplitz_key); + have_toeplitz = true; + break; + case 'r': + parse_rps_bitmap(optarg); + break; + case 's': + cfg_sink = true; + break; + case 't': + cfg_type = SOCK_STREAM; + break; + case 'T': + cfg_timeout_msec = strtol(optarg, NULL, 0); + break; + case 'u': + cfg_type = SOCK_DGRAM; + break; + case 'v': + cfg_verbose = true; + break; + + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!have_toeplitz) + read_rss_dev_info_ynl(); + + num_cpus = get_nprocs(); + if (num_cpus > RSS_MAX_CPUS) + error(1, 0, "increase RSS_MAX_CPUS"); + + if (cfg_num_queues && cfg_num_rps_cpus) + error(1, 0, + "Can't supply both RSS cpus ('-C') and RPS map ('-r')"); + if (cfg_verbose) { + show_cpulist(); + show_silos(); + } +} + +int main(int argc, char **argv) +{ + const int min_tests = 10; + int fd_sink = -1; + + parse_opts(argc, argv); + + if (cfg_sink) + fd_sink = setup_sink(); + + setup_rings(); + + /* Signal to test framework that we're ready to receive */ + ksft_ready(); + + process_rings(); + cleanup_rings(); + + if (cfg_sink && close(fd_sink)) + error(1, errno, "close sink"); + + if (frames_received - frames_nohash < min_tests) + error(1, 0, "too few frames for verification"); + + return frames_error; +} diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.py b/tools/testing/selftests/drivers/net/hw/toeplitz.py new file mode 100755 index 000000000000..d2db5ee9e358 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/toeplitz.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Toeplitz Rx hashing test: + - rxhash (the hash value calculation itself); + - RSS mapping from rxhash to rx queue; + - RPS mapping from rxhash to cpu. +""" + +import glob +import os +import socket +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import NetDrvEpEnv, EthtoolFamily, NetdevFamily +from lib.py import cmd, bkg, rand_port, defer +from lib.py import ksft_in +from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, KsftFailEx + +# "define" for the ID of the Toeplitz hash function +ETH_RSS_HASH_TOP = 1 + + +def _check_rps_and_rfs_not_configured(cfg): + """Verify that RPS is not already configured.""" + + for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"): + with open(rps_file, "r", encoding="utf-8") as fp: + val = fp.read().strip() + if set(val) - {"0", ","}: + raise KsftSkipEx(f"RPS already configured on {rps_file}: {val}") + + rfs_file = "/proc/sys/net/core/rps_sock_flow_entries" + with open(rfs_file, "r", encoding="utf-8") as fp: + val = fp.read().strip() + if val != "0": + raise KsftSkipEx(f"RFS already configured {rfs_file}: {val}") + + +def _get_cpu_for_irq(irq): + with open(f"/proc/irq/{irq}/smp_affinity_list", "r", + encoding="utf-8") as fp: + data = fp.read().strip() + if "," in data or "-" in data: + raise KsftFailEx(f"IRQ{irq} is not mapped to a single core: {data}") + return int(data) + + +def _get_irq_cpus(cfg): + """ + Read the list of IRQs for the device Rx queues. + """ + queues = cfg.netnl.queue_get({"ifindex": cfg.ifindex}, dump=True) + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + + # Remap into ID-based dicts + napis = {n["id"]: n for n in napis} + queues = {f"{q['type']}{q['id']}": q for q in queues} + + cpus = [] + for rx in range(9999): + name = f"rx{rx}" + if name not in queues: + break + cpus.append(_get_cpu_for_irq(napis[queues[name]["napi-id"]]["irq"])) + + return cpus + + +def _get_unused_cpus(cfg, count=2): + """ + Get CPUs that are not used by Rx queues. + Returns a list of at least 'count' CPU numbers. + """ + + # Get CPUs used by Rx queues + rx_cpus = set(_get_irq_cpus(cfg)) + + # Get total number of CPUs + num_cpus = os.cpu_count() + + # Find unused CPUs + unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus] + + if len(unused_cpus) < count: + raise KsftSkipEx(f"Need at {count} CPUs not used by Rx queues, found {len(unused_cpus)}") + + return unused_cpus[:count] + + +def _configure_rps(cfg, rps_cpus): + """Configure RPS for all Rx queues.""" + + mask = 0 + for cpu in rps_cpus: + mask |= (1 << cpu) + mask = hex(mask)[2:] + + # Set RPS bitmap for all rx queues + for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"): + with open(rps_file, "w", encoding="utf-8") as fp: + fp.write(mask) + + return mask + + +def _send_traffic(cfg, proto_flag, ipver, port): + """Send 20 packets of requested type.""" + + # Determine protocol and IP version for socat + if proto_flag == "-u": + proto = "UDP" + else: + proto = "TCP" + + baddr = f"[{cfg.addr_v['6']}]" if ipver == "6" else cfg.addr_v["4"] + + # Run socat in a loop to send traffic periodically + # Use sh -c with a loop similar to toeplitz_client.sh + socat_cmd = f""" + for i in `seq 20`; do + echo "msg $i" | socat -{ipver} -t 0.1 - {proto}:{baddr}:{port}; + sleep 0.001; + done + """ + + cmd(socat_cmd, shell=True, host=cfg.remote) + + +def _test_variants(): + for grp in ["", "rss", "rps"]: + for l4 in ["tcp", "udp"]: + for l3 in ["4", "6"]: + name = f"{l4}_ipv{l3}" + if grp: + name = f"{grp}_{name}" + yield KsftNamedVariant(name, "-" + l4[0], l3, grp) + + +@ksft_variants(_test_variants()) +def test(cfg, proto_flag, ipver, grp): + """Run a single toeplitz test.""" + + cfg.require_ipver(ipver) + + # Check that rxhash is enabled + ksft_in("receive-hashing: on", cmd(f"ethtool -k {cfg.ifname}").stdout) + + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # Make sure NIC is configured to use Toeplitz hash, and no key xfrm. + if rss.get('hfunc') != ETH_RSS_HASH_TOP or rss.get('input-xfrm'): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hfunc": ETH_RSS_HASH_TOP, + "input-xfrm": {}}) + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex}, + "hfunc": rss.get('hfunc'), + "input-xfrm": rss.get('input-xfrm', {}) + }) + + port = rand_port(socket.SOCK_DGRAM) + + toeplitz_path = cfg.test_dir / "toeplitz" + rx_cmd = [ + str(toeplitz_path), + "-" + ipver, + proto_flag, + "-d", str(port), + "-i", cfg.ifname, + "-T", "4000", + "-s", + "-v" + ] + + if grp: + _check_rps_and_rfs_not_configured(cfg) + if grp == "rss": + irq_cpus = ",".join([str(x) for x in _get_irq_cpus(cfg)]) + rx_cmd += ["-C", irq_cpus] + ksft_pr(f"RSS using CPUs: {irq_cpus}") + elif grp == "rps": + # Get CPUs not used by Rx queues and configure them for RPS + rps_cpus = _get_unused_cpus(cfg, count=2) + rps_mask = _configure_rps(cfg, rps_cpus) + defer(_configure_rps, cfg, []) + rx_cmd += ["-r", rps_mask] + ksft_pr(f"RPS using CPUs: {rps_cpus}, mask: {rps_mask}") + + # Run rx in background, it will exit once it has seen enough packets + with bkg(" ".join(rx_cmd), ksft_ready=True, exit_wait=True) as rx_proc: + while rx_proc.proc.poll() is None: + _send_traffic(cfg, proto_flag, ipver, port) + + # Check rx result + ksft_pr("Receiver output:") + ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# ')) + if rx_proc.stderr: + ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# ')) + + +def main() -> None: + """Ksft boilerplate main.""" + + with NetDrvEpEnv(__file__) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + ksft_run(cases=[test], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py new file mode 100755 index 000000000000..0998e68ebaf0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Run the tools/testing/selftests/net/csum testsuite.""" + +import fcntl +import socket +import struct +import termios +import time + +from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx +from lib.py import ksft_eq, ksft_ge, ksft_lt +from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv +from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen + + +def sock_wait_drain(sock, max_wait=1000): + """Wait for all pending write data on the socket to get ACKed.""" + for _ in range(max_wait): + one = b'\0' * 4 + outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one) + outq = struct.unpack("I", outq)[0] + if outq == 0: + break + time.sleep(0.01) + ksft_eq(outq, 0) + + +def tcp_sock_get_retrans(sock): + """Get the number of retransmissions for the TCP socket.""" + info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512) + return struct.unpack("I", info[100:104])[0] + + +def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): + cfg.require_cmd("socat", local=False, remote=True) + + port = rand_port() + listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof" + + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: + wait_port_listen(port, host=cfg.remote) + + if ipver == "4": + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((remote_v4, port)) + else: + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + sock.connect((remote_v6, port)) + + # Small send to make sure the connection is working. + sock.send("ping".encode()) + sock_wait_drain(sock) + + # Send 4MB of data, record the LSO packet count. + qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + buf = b"0" * 1024 * 1024 * 4 + sock.send(buf) + sock_wait_drain(sock) + qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + # Check that at least 90% of the data was sent as LSO packets. + # System noise may cause false negatives. Also header overheads + # will add up to 5% of extra packes... The check is best effort. + total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"] + total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"] + + # Make sure we have order of magnitude more LSO packets than + # retransmits, in case TCP retransmitted all the LSO packets. + ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4) + sock.close() + + if should_lso: + if cfg.have_stat_super_count: + ksft_ge(qstat_new['tx-hw-gso-packets'] - + qstat_old['tx-hw-gso-packets'], + total_lso_super, + comment="Number of LSO super-packets with LSO enabled") + if cfg.have_stat_wire_count: + ksft_ge(qstat_new['tx-hw-gso-wire-packets'] - + qstat_old['tx-hw-gso-wire-packets'], + total_lso_wire, + comment="Number of LSO wire-packets with LSO enabled") + else: + if cfg.have_stat_super_count: + ksft_lt(qstat_new['tx-hw-gso-packets'] - + qstat_old['tx-hw-gso-packets'], + 15, comment="Number of LSO super-packets with LSO disabled") + if cfg.have_stat_wire_count: + ksft_lt(qstat_new['tx-hw-gso-wire-packets'] - + qstat_old['tx-hw-gso-wire-packets'], + 500, comment="Number of LSO wire-packets with LSO disabled") + + +def build_tunnel(cfg, outer_ipver, tun_info): + local_v4 = NetDrvEpEnv.nsim_v4_pfx + "1" + local_v6 = NetDrvEpEnv.nsim_v6_pfx + "1" + remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2" + remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2" + + local_addr = cfg.addr_v[outer_ipver] + remote_addr = cfg.remote_addr_v[outer_ipver] + + tun_type = tun_info[0] + tun_arg = tun_info[1] + ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}") + defer(ip, f"link del {tun_type}-ksft") + ip(f"link set dev {tun_type}-ksft up") + ip(f"addr add {local_v4}/24 dev {tun_type}-ksft") + ip(f"addr add {local_v6}/64 dev {tun_type}-ksft") + + ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}", + host=cfg.remote) + defer(ip, f"link del {tun_type}-ksft", host=cfg.remote) + ip(f"link set dev {tun_type}-ksft up", host=cfg.remote) + ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote) + ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote) + + return remote_v4, remote_v6 + + +def restore_wanted_features(cfg): + features_cmd = "" + for feature in cfg.hw_features: + setting = "on" if feature in cfg.wanted_features else "off" + features_cmd += f" {feature} {setting}" + try: + ethtool(f"-K {cfg.ifname} {features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure restoring wanted features: {e}") + + +def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): + """Construct specific tests from the common template.""" + def f(cfg): + cfg.require_ipver(outer_ipver) + defer(restore_wanted_features, cfg) + + if not cfg.have_stat_super_count and \ + not cfg.have_stat_wire_count: + raise KsftSkipEx(f"Device does not support LSO queue stats") + + if feature not in cfg.hw_features: + raise KsftSkipEx(f"Device does not support {feature}") + + ipver = outer_ipver + if tun: + remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun) + ipver = inner_ipver + else: + remote_v4 = cfg.remote_addr_v["4"] + remote_v6 = cfg.remote_addr_v["6"] + + # First test without the feature enabled. + ethtool(f"-K {cfg.ifname} {feature} off") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False) + + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") + if feature in cfg.partial_features: + ethtool(f"-K {cfg.ifname} tx-gso-partial on") + if ipver == "4": + ksft_pr("Testing with mangleid enabled") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") + + # Full feature enabled. + ethtool(f"-K {cfg.ifname} {feature} on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) + + f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver + return f + + +def query_nic_features(cfg) -> None: + """Query and cache the NIC features.""" + cfg.have_stat_super_count = False + cfg.have_stat_wire_count = False + + features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + + cfg.wanted_features = set() + for f in features["wanted"]["bits"]["bit"]: + cfg.wanted_features.add(f["name"]) + + cfg.hw_features = set() + hw_all_features_cmd = "" + for f in features["hw"]["bits"]["bit"]: + if f.get("value", False): + feature = f["name"] + cfg.hw_features.add(feature) + hw_all_features_cmd += f" {feature} on" + try: + ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure enabling all hw features: {e}") + ksft_pr("partial gso feature detection may be impacted") + + # Check which features are supported via GSO partial + cfg.partial_features = set() + if 'tx-gso-partial' in cfg.hw_features: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + + no_partial = set() + features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + for f in features["active"]["bits"]["bit"]: + no_partial.add(f["name"]) + cfg.partial_features = cfg.hw_features - no_partial + ethtool(f"-K {cfg.ifname} tx-gso-partial on") + + restore_wanted_features(cfg) + + stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True) + if stats: + if 'tx-hw-gso-packets' in stats[0]: + ksft_pr("Detected qstat for LSO super-packets") + cfg.have_stat_super_count = True + if 'tx-hw-gso-wire-packets' in stats[0]: + ksft_pr("Detected qstat for LSO wire-packets") + cfg.have_stat_wire_count = True + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + + query_nic_features(cfg) + + test_info = ( + # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions) + ("", "4", "tx-tcp-segmentation", None), + ("", "6", "tx-tcp6-segmentation", None), + ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))), + ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))), + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))), + ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))), + ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))), + ) + + cases = [] + for outer_ipver in ["4", "6"]: + for info in test_info: + # Skip if test which only works for a specific IP version + if info[1] and outer_ipver != info[1]: + continue + + if info[3]: + cases += [ + test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver) + for inner_ipver in info[3][2] + ] + else: + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver)) + + ksft_run(cases=cases, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py new file mode 100755 index 000000000000..d19d1d518208 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +# This is intended to be run on a virtio-net guest interface. +# The test binds the XDP socket to the interface without setting +# the fill ring to trigger delayed refill_work. This helps to +# make it easier to reproduce the deadlock when XDP program, +# XDP socket bind/unbind, rx ring resize race with refill_work on +# the buggy kernel. +# +# The Qemu command to setup virtio-net +# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no +# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on + +from lib.py import ksft_exit, ksft_run +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import NetDrvEnv +from lib.py import bkg, ip, cmd, ethtool +import time + +def _get_rx_ring_entries(cfg): + output = ethtool(f"-g {cfg.ifname}", json=True) + return output[0]["rx"] + +def setup_xsk(cfg, xdp_queue_id = 0) -> bkg: + # Probe for support + xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False) + if xdp.ret == 255: + raise KsftSkipEx('AF_XDP unsupported') + elif xdp.ret > 0: + raise KsftFailEx('unable to create AF_XDP socket') + + try: + return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \ + '{xdp_queue_id} -z', ksft_wait=3) + except: + raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \ + 'Please consider adding iommu_platform=on ' \ + 'when setting up virtio-net-pci') + +def check_xdp_bind(cfg): + with setup_xsk(cfg): + ip(f"link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o")) + ip(f"link set dev %s xdp off" % cfg.ifname) + +def check_rx_resize(cfg): + with setup_xsk(cfg): + rx_ring = _get_rx_ring_entries(cfg) + ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2)) + ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring)) + +def main(): + with NetDrvEnv(__file__, nsim_test=False) as cfg: + ksft_run([check_xdp_bind, check_rx_resize], + args=(cfg, )) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py new file mode 100644 index 000000000000..8b75faa9af6d --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: GPL-2.0 + +""" +Driver test environment. +NetDrvEnv and NetDrvEpEnv are the main environment classes. +Former is for local host only tests, latter creates / connects +to a remote endpoint. See NIPA wiki for more information about +running and writing driver tests. +""" + +import sys +from pathlib import Path + +KSFT_DIR = (Path(__file__).parent / "../../../..").resolve() + +try: + sys.path.append(KSFT_DIR.as_posix()) + + # Import one by one to avoid pylint false positives + from net.lib.py import NetNS, NetNSEnter, NetdevSimDev + from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ + NlError, RtnlFamily, DevlinkFamily, PSPFamily + from net.lib.py import CmdExitFailure + from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ + fd_read_timeout, ip, rand_port, wait_port_listen, wait_file + from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx + from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ + ksft_setup, ksft_variants, KsftNamedVariant + from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ + ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none + + __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", + "EthtoolFamily", "NetdevFamily", "NetshaperFamily", + "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", + "CmdExitFailure", + "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", + "fd_read_timeout", "ip", "rand_port", + "wait_port_listen", "wait_file", + "KsftSkipEx", "KsftFailEx", "KsftXfailEx", + "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", + "ksft_setup", "ksft_variants", "KsftNamedVariant", + "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", + "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", + "ksft_not_none", "ksft_not_none"] + + from .env import NetDrvEnv, NetDrvEpEnv + from .load import GenerateTraffic, Iperf3Runner + from .remote import Remote + + __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote", + "Iperf3Runner"] +except ModuleNotFoundError as e: + print("Failed importing `net` library from kernel sources") + print(str(e)) + sys.exit(4) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py new file mode 100644 index 000000000000..8b644fd84ff2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -0,0 +1,287 @@ +# SPDX-License-Identifier: GPL-2.0 + +import os +import time +from pathlib import Path +from lib.py import KsftSkipEx, KsftXfailEx +from lib.py import ksft_setup, wait_file +from lib.py import cmd, ethtool, ip, CmdExitFailure +from lib.py import NetNS, NetdevSimDev +from .remote import Remote + + +class NetDrvEnvBase: + """ + Base class for a NIC / host environments + + Attributes: + test_dir: Path to the source directory of the test + net_lib_dir: Path to the net/lib directory + """ + def __init__(self, src_path): + self.src_path = Path(src_path) + self.test_dir = self.src_path.parent.resolve() + self.net_lib_dir = (Path(__file__).parent / "../../../../net/lib").resolve() + + self.env = self._load_env_file() + + # Following attrs must be set be inheriting classes + self.dev = None + + def _load_env_file(self): + env = os.environ.copy() + + src_dir = Path(self.src_path).parent.resolve() + if not (src_dir / "net.config").exists(): + return ksft_setup(env) + + with open((src_dir / "net.config").as_posix(), 'r') as fp: + for line in fp.readlines(): + full_file = line + # Strip comments + pos = line.find("#") + if pos >= 0: + line = line[:pos] + line = line.strip() + if not line: + continue + pair = line.split('=', maxsplit=1) + if len(pair) != 2: + raise Exception("Can't parse configuration line:", full_file) + env[pair[0]] = pair[1] + return ksft_setup(env) + + def __del__(self): + pass + + def __enter__(self): + ip(f"link set dev {self.dev['ifname']} up") + wait_file(f"/sys/class/net/{self.dev['ifname']}/carrier", + lambda x: x.strip() == "1") + + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + """ + __exit__ gets called at the end of a "with" block. + """ + self.__del__() + + +class NetDrvEnv(NetDrvEnvBase): + """ + Class for a single NIC / host env, with no remote end + """ + def __init__(self, src_path, nsim_test=None, **kwargs): + super().__init__(src_path) + + self._ns = None + + if 'NETIF' in self.env: + if nsim_test is True: + raise KsftXfailEx("Test only works on netdevsim") + + self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0] + else: + if nsim_test is False: + raise KsftXfailEx("Test does not work on netdevsim") + + self._ns = NetdevSimDev(**kwargs) + self.dev = self._ns.nsims[0].dev + self.ifname = self.dev['ifname'] + self.ifindex = self.dev['ifindex'] + + def __del__(self): + if self._ns: + self._ns.remove() + self._ns = None + + +class NetDrvEpEnv(NetDrvEnvBase): + """ + Class for an environment with a local device and "remote endpoint" + which can be used to send traffic in. + + For local testing it creates two network namespaces and a pair + of netdevsim devices. + """ + + # Network prefixes used for local tests + nsim_v4_pfx = "192.0.2." + nsim_v6_pfx = "2001:db8::" + + def __init__(self, src_path, nsim_test=None): + super().__init__(src_path) + + self._stats_settle_time = None + + # Things we try to destroy + self.remote = None + # These are for local testing state + self._netns = None + self._ns = None + self._ns_peer = None + + self.addr_v = { "4": None, "6": None } + self.remote_addr_v = { "4": None, "6": None } + + if "NETIF" in self.env: + if nsim_test is True: + raise KsftXfailEx("Test only works on netdevsim") + self._check_env() + + self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0] + + self.addr_v["4"] = self.env.get("LOCAL_V4") + self.addr_v["6"] = self.env.get("LOCAL_V6") + self.remote_addr_v["4"] = self.env.get("REMOTE_V4") + self.remote_addr_v["6"] = self.env.get("REMOTE_V6") + kind = self.env["REMOTE_TYPE"] + args = self.env["REMOTE_ARGS"] + else: + if nsim_test is False: + raise KsftXfailEx("Test does not work on netdevsim") + + self.create_local() + + self.dev = self._ns.nsims[0].dev + + self.addr_v["4"] = self.nsim_v4_pfx + "1" + self.addr_v["6"] = self.nsim_v6_pfx + "1" + self.remote_addr_v["4"] = self.nsim_v4_pfx + "2" + self.remote_addr_v["6"] = self.nsim_v6_pfx + "2" + kind = "netns" + args = self._netns.name + + self.remote = Remote(kind, args, src_path) + + self.addr_ipver = "6" if self.addr_v["6"] else "4" + self.addr = self.addr_v[self.addr_ipver] + self.remote_addr = self.remote_addr_v[self.addr_ipver] + + # Bracketed addresses, some commands need IPv6 to be inside [] + self.baddr = f"[{self.addr_v['6']}]" if self.addr_v["6"] else self.addr_v["4"] + self.remote_baddr = f"[{self.remote_addr_v['6']}]" if self.remote_addr_v["6"] else self.remote_addr_v["4"] + + self.ifname = self.dev['ifname'] + self.ifindex = self.dev['ifindex'] + + # resolve remote interface name + self.remote_ifname = self.resolve_remote_ifc() + self.remote_dev = ip("-d link show dev " + self.remote_ifname, + host=self.remote, json=True)[0] + + self._required_cmd = {} + + def create_local(self): + self._netns = NetNS() + self._ns = NetdevSimDev() + self._ns_peer = NetdevSimDev(ns=self._netns) + + with open("/proc/self/ns/net") as nsfd0, \ + open("/var/run/netns/" + self._netns.name) as nsfd1: + ifi0 = self._ns.nsims[0].ifindex + ifi1 = self._ns_peer.nsims[0].ifindex + NetdevSimDev.ctrl_write('link_device', + f'{nsfd0.fileno()}:{ifi0} {nsfd1.fileno()}:{ifi1}') + + ip(f" addr add dev {self._ns.nsims[0].ifname} {self.nsim_v4_pfx}1/24") + ip(f"-6 addr add dev {self._ns.nsims[0].ifname} {self.nsim_v6_pfx}1/64 nodad") + ip(f" link set dev {self._ns.nsims[0].ifname} up") + + ip(f" addr add dev {self._ns_peer.nsims[0].ifname} {self.nsim_v4_pfx}2/24", ns=self._netns) + ip(f"-6 addr add dev {self._ns_peer.nsims[0].ifname} {self.nsim_v6_pfx}2/64 nodad", ns=self._netns) + ip(f" link set dev {self._ns_peer.nsims[0].ifname} up", ns=self._netns) + + def _check_env(self): + vars_needed = [ + ["LOCAL_V4", "LOCAL_V6"], + ["REMOTE_V4", "REMOTE_V6"], + ["REMOTE_TYPE"], + ["REMOTE_ARGS"] + ] + missing = [] + + for choice in vars_needed: + for entry in choice: + if entry in self.env: + break + else: + missing.append(choice) + # Make sure v4 / v6 configs are symmetric + if ("LOCAL_V6" in self.env) != ("REMOTE_V6" in self.env): + missing.append(["LOCAL_V6", "REMOTE_V6"]) + if ("LOCAL_V4" in self.env) != ("REMOTE_V4" in self.env): + missing.append(["LOCAL_V4", "REMOTE_V4"]) + if missing: + raise Exception("Invalid environment, missing configuration:", missing, + "Please see tools/testing/selftests/drivers/net/README.rst") + + def resolve_remote_ifc(self): + v4 = v6 = None + if self.remote_addr_v["4"]: + v4 = ip("addr show to " + self.remote_addr_v["4"], json=True, host=self.remote) + if self.remote_addr_v["6"]: + v6 = ip("addr show to " + self.remote_addr_v["6"], json=True, host=self.remote) + if v4 and v6 and v4[0]["ifname"] != v6[0]["ifname"]: + raise Exception("Can't resolve remote interface name, v4 and v6 don't match") + if (v4 and len(v4) > 1) or (v6 and len(v6) > 1): + raise Exception("Can't resolve remote interface name, multiple interfaces match") + return v6[0]["ifname"] if v6 else v4[0]["ifname"] + + def __del__(self): + if self._ns: + self._ns.remove() + self._ns = None + if self._ns_peer: + self._ns_peer.remove() + self._ns_peer = None + if self._netns: + del self._netns + self._netns = None + if self.remote: + del self.remote + self.remote = None + + def require_ipver(self, ipver): + if not self.addr_v[ipver] or not self.remote_addr_v[ipver]: + raise KsftSkipEx(f"Test requires IPv{ipver} connectivity") + + def require_nsim(self): + if self._ns is None: + raise KsftXfailEx("Test only works on netdevsim") + + def _require_cmd(self, comm, key, host=None): + cached = self._required_cmd.get(comm, {}) + if cached.get(key) is None: + cached[key] = cmd("command -v -- " + comm, fail=False, + shell=True, host=host).ret == 0 + self._required_cmd[comm] = cached + return cached[key] + + def require_cmd(self, comm, local=True, remote=False): + if local: + if not self._require_cmd(comm, "local"): + raise KsftSkipEx("Test requires command: " + comm) + if remote: + if not self._require_cmd(comm, "remote", host=self.remote): + raise KsftSkipEx("Test requires (remote) command: " + comm) + + def wait_hw_stats_settle(self): + """ + Wait for HW stats to become consistent, some devices DMA HW stats + periodically so events won't be reflected until next sync. + Good drivers will tell us via ethtool what their sync period is. + """ + if self._stats_settle_time is None: + data = {} + try: + data = ethtool("-c " + self.ifname, json=True)[0] + except CmdExitFailure as e: + if "Operation not supported" not in e.cmd.stderr: + raise + + self._stats_settle_time = 0.025 + \ + data.get('stats-block-usecs', 0) / 1000 / 1000 + + time.sleep(self._stats_settle_time) diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py new file mode 100644 index 000000000000..f181fa2d38fc --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -0,0 +1,139 @@ +# SPDX-License-Identifier: GPL-2.0 + +import re +import time +import json + +from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen + + +class Iperf3Runner: + """ + Sets up and runs iperf3 traffic. + """ + def __init__(self, env, port=None, server_ip=None, client_ip=None): + env.require_cmd("iperf3", local=True, remote=True) + self.env = env + self.port = rand_port() if port is None else port + self.server_ip = server_ip + self.client_ip = client_ip + + def _build_server(self): + cmdline = f"iperf3 -s -1 -p {self.port}" + if self.server_ip: + cmdline += f" -B {self.server_ip}" + return cmdline + + def _build_client(self, streams, duration, reverse): + host = self.env.addr if self.server_ip is None else self.server_ip + cmdline = f"iperf3 -c {host} -p {self.port} -P {streams} -t {duration} -J" + if self.client_ip: + cmdline += f" -B {self.client_ip}" + if reverse: + cmdline += " --reverse" + return cmdline + + def start_server(self): + """ + Starts an iperf3 server with optional bind IP. + """ + cmdline = self._build_server() + proc = cmd(cmdline, background=True) + wait_port_listen(self.port) + time.sleep(0.1) + return proc + + def start_client(self, background=False, streams=1, duration=10, reverse=False): + """ + Starts the iperf3 client with the configured options. + """ + cmdline = self._build_client(streams, duration, reverse) + return cmd(cmdline, background=background, host=self.env.remote) + + def measure_bandwidth(self, reverse=False): + """ + Runs an iperf3 measurement and returns the average bandwidth (Gbps). + Discards the first and last few reporting intervals and uses only the + middle part of the run where throughput is typically stable. + """ + self.start_server() + result = self.start_client(duration=10, reverse=reverse) + + if result.ret != 0: + raise RuntimeError("iperf3 failed to run successfully") + try: + out = json.loads(result.stdout) + except json.JSONDecodeError as exc: + raise ValueError("Failed to parse iperf3 JSON output") from exc + + intervals = out.get("intervals", []) + samples = [i["sum"]["bits_per_second"] / 1e9 for i in intervals] + if len(samples) < 10: + raise ValueError(f"iperf3 returned too few intervals: {len(samples)}") + # Discard potentially unstable first and last 3 seconds. + stable = samples[3:-3] + + avg = sum(stable) / len(stable) + + return avg + + +class GenerateTraffic: + def __init__(self, env, port=None): + self.env = env + self.runner = Iperf3Runner(env, port) + + self._iperf_server = self.runner.start_server() + self._iperf_client = self.runner.start_client(background=True, streams=16, duration=86400) + + # Wait for traffic to ramp up + if not self._wait_pkts(pps=1000): + self.stop(verbose=True) + raise Exception("iperf3 traffic did not ramp up") + + def _wait_pkts(self, pkt_cnt=None, pps=None): + """ + Wait until we've seen pkt_cnt or until traffic ramps up to pps. + Only one of pkt_cnt or pss can be specified. + """ + pkt_start = ip("-s link show dev " + self.env.ifname, json=True)[0]["stats64"]["rx"]["packets"] + for _ in range(50): + time.sleep(0.1) + pkt_now = ip("-s link show dev " + self.env.ifname, json=True)[0]["stats64"]["rx"]["packets"] + if pps: + if pkt_now - pkt_start > pps / 10: + return True + pkt_start = pkt_now + elif pkt_cnt: + if pkt_now - pkt_start > pkt_cnt: + return True + return False + + def wait_pkts_and_stop(self, pkt_cnt): + failed = not self._wait_pkts(pkt_cnt=pkt_cnt) + self.stop(verbose=failed) + + def stop(self, verbose=None): + self._iperf_client.process(terminate=True) + if verbose: + ksft_pr(">> Client:") + ksft_pr(self._iperf_client.stdout) + ksft_pr(self._iperf_client.stderr) + self._iperf_server.process(terminate=True) + if verbose: + ksft_pr(">> Server:") + ksft_pr(self._iperf_server.stdout) + ksft_pr(self._iperf_server.stderr) + self._wait_client_stopped() + + def _wait_client_stopped(self, sleep=0.005, timeout=5): + end = time.monotonic() + timeout + + live_port_pattern = re.compile(fr":{self.runner.port:04X} 0[^6] ") + + while time.monotonic() < end: + data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout + if not live_port_pattern.search(data): + return + time.sleep(sleep) + raise Exception(f"Waiting for client to stop timed out after {timeout}s") diff --git a/tools/testing/selftests/drivers/net/lib/py/remote.py b/tools/testing/selftests/drivers/net/lib/py/remote.py new file mode 100644 index 000000000000..b1780b987722 --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/remote.py @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 + +import os +import importlib + +_modules = {} + +def Remote(kind, args, src_path): + global _modules + + if kind not in _modules: + _modules[kind] = importlib.import_module("..remote_" + kind, __name__) + + dir_path = os.path.abspath(src_path + "/../") + return getattr(_modules[kind], "Remote")(args, dir_path) diff --git a/tools/testing/selftests/drivers/net/lib/py/remote_netns.py b/tools/testing/selftests/drivers/net/lib/py/remote_netns.py new file mode 100644 index 000000000000..7d5eeb0271bc --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/remote_netns.py @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 + +import os +import subprocess + +from lib.py import cmd + + +class Remote: + def __init__(self, name, dir_path): + self.name = name + self.dir_path = dir_path + + def cmd(self, comm): + return subprocess.Popen(["ip", "netns", "exec", self.name, "bash", "-c", comm], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + def deploy(self, what): + if os.path.isabs(what): + return what + return os.path.abspath(self.dir_path + "/" + what) diff --git a/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py b/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py new file mode 100644 index 000000000000..924addde19a3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0 + +import os +import string +import subprocess +import random + +from lib.py import cmd + + +class Remote: + def __init__(self, name, dir_path): + self.name = name + self.dir_path = dir_path + self._tmpdir = None + + def __del__(self): + if self._tmpdir: + cmd("rm -rf " + self._tmpdir, host=self) + self._tmpdir = None + + def cmd(self, comm): + return subprocess.Popen(["ssh", "-q", self.name, comm], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + def _mktmp(self): + return ''.join(random.choice(string.ascii_lowercase) for _ in range(8)) + + def deploy(self, what): + if not self._tmpdir: + self._tmpdir = "/tmp/" + self._mktmp() + cmd("mkdir " + self._tmpdir, host=self) + file_name = self._tmpdir + "/" + self._mktmp() + os.path.basename(what) + + if not os.path.isabs(what): + what = os.path.abspath(self.dir_path + "/" + what) + + cmd(f"scp {what} {self.name}:{file_name}") + return file_name diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh new file mode 100644 index 000000000000..ae8abff4be40 --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -0,0 +1,419 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This file contains functions and helpers to support the netconsole +# selftests +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +SRCIF="" # to be populated later +SRCIP="" # to be populated later +SRCIP4="192.0.2.1" +SRCIP6="fc00::1" +DSTIF="" # to be populated later +DSTIP="" # to be populated later +DSTIP4="192.0.2.2" +DSTIP6="fc00::2" + +PORT="6666" +MSG="netconsole selftest" +USERDATA_KEY="key" +USERDATA_VALUE="value" +TARGET=$(mktemp -u netcons_XXXXX) +DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk) +NETCONS_CONFIGFS="/sys/kernel/config/netconsole" +NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" +# NAMESPACE will be populated by setup_ns with a random value +NAMESPACE="" + +# IDs for netdevsim. We either use NSIM_DEV_{1,2}_ID for standard test +# or NSIM_BOND_{T,R}X_{1,2} for the bonding tests. Not both at the +# same time. +NSIM_DEV_1_ID=$((256 + RANDOM % 256)) +NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_BOND_TX_1=$((768 + RANDOM % 256)) +NSIM_BOND_TX_2=$((1024 + RANDOM % 256)) +NSIM_BOND_RX_1=$((1280 + RANDOM % 256)) +NSIM_BOND_RX_2=$((1536 + RANDOM % 256)) +NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" +NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" + +# Used to create and delete namespaces +source "${LIBDIR}"/../../../../net/lib.sh + +# Create netdevsim interfaces +create_ifaces() { + echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" + echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" + udevadm settle 2> /dev/null || true + + local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID" + local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID" + + # These are global variables + SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \ + -path "$NSIM1"/net -exec basename {} \;) + DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \ + -path "$NSIM2"/net -exec basename {} \;) +} + +link_ifaces() { + local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" + local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex) + local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex) + + exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}" + exec {INITNS_FD}</proc/self/ns/net + + # Bind the dst interface to namespace + ip link set "${DSTIF}" netns "${NAMESPACE}" + + # Linking one device to the other one (on the other namespace} + if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK + then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup + exit "${ksft_skip}" + fi +} + +function configure_ip() { + # Configure the IPs for both interfaces + ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}" + ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up + + ip addr add "${SRCIP}"/24 dev "${SRCIF}" + ip link set "${SRCIF}" up +} + +function select_ipv4_or_ipv6() +{ + local VERSION=${1} + + if [[ "$VERSION" == "ipv6" ]] + then + DSTIP="${DSTIP6}" + SRCIP="${SRCIP6}" + else + DSTIP="${DSTIP4}" + SRCIP="${SRCIP4}" + fi +} + +function set_network() { + local IP_VERSION=${1:-"ipv4"} + + # setup_ns function is coming from lib.sh + setup_ns NAMESPACE + + # Create both interfaces, and assign the destination to a different + # namespace + create_ifaces + + # Link both interfaces back to back + link_ifaces + + select_ipv4_or_ipv6 "${IP_VERSION}" + configure_ip +} + +function _create_dynamic_target() { + local FORMAT="${1:?FORMAT parameter required}" + local NCPATH="${2:?NCPATH parameter required}" + + DSTMAC=$(ip netns exec "${NAMESPACE}" \ + ip link show "${DSTIF}" | awk '/ether/ {print $2}') + + # Create a dynamic target + mkdir "${NCPATH}" + + echo "${DSTIP}" > "${NCPATH}"/remote_ip + echo "${SRCIP}" > "${NCPATH}"/local_ip + echo "${DSTMAC}" > "${NCPATH}"/remote_mac + echo "${SRCIF}" > "${NCPATH}"/dev_name + + if [ "${FORMAT}" == "basic" ] + then + # Basic target does not support release + echo 0 > "${NCPATH}"/release + echo 0 > "${NCPATH}"/extended + elif [ "${FORMAT}" == "extended" ] + then + echo 1 > "${NCPATH}"/extended + fi +} + +function create_dynamic_target() { + local FORMAT=${1:-"extended"} + local NCPATH=${2:-"$NETCONS_PATH"} + _create_dynamic_target "${FORMAT}" "${NCPATH}" + + echo 1 > "${NCPATH}"/enabled + + # This will make sure that the kernel was able to + # load the netconsole driver configuration. The console message + # gets more organized/sequential as well. + sleep 1 +} + +# Generate the command line argument for netconsole following: +# netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr] +function create_cmdline_str() { + local BINDMODE=${1:-"ifname"} + if [ "${BINDMODE}" == "ifname" ] + then + SRCDEV=${SRCIF} + else + SRCDEV=$(mac_get "${SRCIF}") + fi + + DSTMAC=$(ip netns exec "${NAMESPACE}" \ + ip link show "${DSTIF}" | awk '/ether/ {print $2}') + SRCPORT="1514" + TGTPORT="6666" + + echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCDEV},${TGTPORT}@${DSTIP}/${DSTMAC}\"" +} + +# Do not append the release to the header of the message +function disable_release_append() { + echo 0 > "${NETCONS_PATH}"/enabled + echo 0 > "${NETCONS_PATH}"/release + echo 1 > "${NETCONS_PATH}"/enabled +} + +function do_cleanup() { + local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" + + # Delete netdevsim devices + echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL" + echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL" + + # this is coming from lib.sh + cleanup_all_ns + + # Restoring printk configurations + echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk +} + +function cleanup_netcons() { + # delete netconsole dynamic reconfiguration + # do not fail if the target is already disabled + if [[ ! -d "${NETCONS_PATH}" ]] + then + # in some cases this is called before netcons path is created + return + fi + if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]] + then + echo 0 > "${NETCONS_PATH}"/enabled || true + fi + # Remove all the keys that got created during the selftest + find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete + # Remove the configfs entry + rmdir "${NETCONS_PATH}" +} + +function cleanup() { + cleanup_netcons + do_cleanup +} + +function set_user_data() { + if [[ ! -d "${NETCONS_PATH}""/userdata" ]] + then + echo "Userdata path not available in ${NETCONS_PATH}/userdata" + exit "${ksft_skip}" + fi + + KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}" + mkdir -p "${KEY_PATH}" + VALUE_PATH="${KEY_PATH}""/value" + echo "${USERDATA_VALUE}" > "${VALUE_PATH}" +} + +function listen_port_and_save_to() { + local OUTPUT=${1} + local IPVERSION=${2:-"ipv4"} + + if [ "${IPVERSION}" == "ipv4" ] + then + SOCAT_MODE="UDP-LISTEN" + else + SOCAT_MODE="UDP6-LISTEN" + fi + + # Just wait for 2 seconds + timeout 2 ip netns exec "${NAMESPACE}" \ + socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" 2> /dev/null +} + +# Only validate that the message arrived properly +function validate_msg() { + local TMPFILENAME="$1" + + # Check if the file exists + if [ ! -f "$TMPFILENAME" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${TMPFILENAME}"; then + echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi +} + +# Validate the message and userdata +function validate_result() { + local TMPFILENAME="$1" + + # TMPFILENAME will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # key=value + + validate_msg "${TMPFILENAME}" + + # userdata is not supported on basic format target, + # thus, do not validate it. + if [ "${FORMAT}" != "basic" ]; + then + if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then + echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi + fi + + # Delete the file once it is validated, otherwise keep it + # for debugging purposes + rm "${TMPFILENAME}" +} + +function check_for_dependencies() { + if [ "$(id -u)" -ne 0 ]; then + echo "This test must be run as root" >&2 + exit "${ksft_skip}" + fi + + if ! which socat > /dev/null ; then + echo "SKIP: socat(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if ! which ip > /dev/null ; then + echo "SKIP: ip(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if ! which udevadm > /dev/null ; then + echo "SKIP: udevadm(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if [ ! -f /proc/net/if_inet6 ]; then + echo "SKIP: IPv6 not configured. Check if CONFIG_IPV6 is enabled" >&2 + exit "${ksft_skip}" + fi + + if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then + echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2 + exit "${ksft_skip}" + fi + + if [ ! -d "${NETCONS_CONFIGFS}" ]; then + echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2 + exit "${ksft_skip}" + fi + + if ip link show "${DSTIF}" 2> /dev/null; then + echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2 + exit "${ksft_skip}" + fi + + REGEXP4="inet.*(${SRCIP4}|${DSTIP4})" + REGEXP6="inet.*(${SRCIP6}|${DSTIP6})" + if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then + echo "SKIP: IPv4s already in use. Skipping it" >&2 + exit "${ksft_skip}" + fi + + if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then + echo "SKIP: IPv6s already in use. Skipping it" >&2 + exit "${ksft_skip}" + fi +} + +function check_for_taskset() { + if ! which taskset > /dev/null ; then + echo "SKIP: taskset(1) is not available" >&2 + exit "${ksft_skip}" + fi +} + +# This is necessary if running multiple tests in a row +function pkill_socat() { + PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" + PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}" + # socat runs under timeout(1), kill it if it is still alive + # do not fail if socat doesn't exist anymore + set +e + pkill -f "${PROCESS_NAME4}" + pkill -f "${PROCESS_NAME6}" + set -e +} + +# Check if netconsole was compiled as a module, otherwise exit +function check_netconsole_module() { + if modinfo netconsole | grep filename: | grep -q builtin + then + echo "SKIP: netconsole should be compiled as a module" >&2 + exit "${ksft_skip}" + fi +} + +# A wrapper to translate protocol version to udp version +function wait_for_port() { + local NAMESPACE=${1} + local PORT=${2} + IP_VERSION=${3} + + if [ "${IP_VERSION}" == "ipv6" ] + then + PROTOCOL="udp6" + else + PROTOCOL="udp" + fi + + wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}" + # even after the port is open, let's wait 1 second before writing + # otherwise the packet could be missed, and the test will fail. Happens + # more frequently on IPv6 + sleep 1 +} + +# Clean up netdevsim ifaces created for bonding test +function cleanup_bond_nsim() { + ip -n "${TXNS}" \ + link delete "${BOND_TX_MAIN_IF}" type bond || true + ip -n "${RXNS}" \ + link delete "${BOND_RX_MAIN_IF}" type bond || true + + cleanup_netdevsim "$NSIM_BOND_TX_1" + cleanup_netdevsim "$NSIM_BOND_TX_2" + cleanup_netdevsim "$NSIM_BOND_RX_1" + cleanup_netdevsim "$NSIM_BOND_RX_2" +} + +# cleanup tests that use bonding interfaces +function cleanup_bond() { + cleanup_netcons + cleanup_bond_nsim + cleanup_all_ns + ip link delete "${VETH0}" || true +} diff --git a/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh b/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh new file mode 100755 index 000000000000..82be5d013330 --- /dev/null +++ b/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh @@ -0,0 +1,668 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> + +# The script is adopted to work with the Microchip KSZ switch driver. + +ETH_FCS_LEN=4 + +WAIT_TIME=1 +NUM_NETIFS=4 +REQUIRE_JQ="yes" +REQUIRE_MZ="yes" +STABLE_MAC_ADDRS=yes +NETIF_CREATE=no +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +require_command dcb + +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +# On h1_ and h2_create do not set IP addresses to avoid interaction with the +# system, to keep packet counters clean. +h1_create() +{ + simple_if_init $h1 + sysctl_set net.ipv6.conf.${h1}.disable_ipv6 1 + # Get the MAC address of the interface to use it with mausezahn + h1_mac=$(ip -j link show dev ${h1} | jq -e '.[].address') +} + +h1_destroy() +{ + sysctl_restore net.ipv6.conf.${h1}.disable_ipv6 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + sysctl_set net.ipv6.conf.${h2}.disable_ipv6 1 + h2_mac=$(ip -j link show dev ${h2} | jq -e '.[].address') +} + +h2_destroy() +{ + sysctl_restore net.ipv6.conf.${h2}.disable_ipv6 + simple_if_fini $h2 +} + +switch_create() +{ + ip link set ${swp1} up + ip link set ${swp2} up + sysctl_set net.ipv6.conf.${swp1}.disable_ipv6 1 + sysctl_set net.ipv6.conf.${swp2}.disable_ipv6 1 + + # Ports should trust VLAN PCP even with vlan_filtering=0 + ip link add br0 type bridge + ip link set ${swp1} master br0 + ip link set ${swp2} master br0 + ip link set br0 up + sysctl_set net.ipv6.conf.br0.disable_ipv6 1 +} + +switch_destroy() +{ + sysctl_restore net.ipv6.conf.${swp2}.disable_ipv6 + sysctl_restore net.ipv6.conf.${swp1}.disable_ipv6 + + ip link del br0 +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + switch_destroy + + vrf_cleanup +} + +set_apptrust_order() +{ + local if_name=$1 + local order=$2 + + dcb apptrust set dev ${if_name} order ${order} +} + +# Function to extract a specified field from a given JSON stats string +extract_network_stat() { + local stats_json=$1 + local field_name=$2 + + echo $(echo "$stats_json" | jq -r "$field_name") +} + +run_test() +{ + local test_name=$1; + local apptrust_order=$2; + local port_prio=$3; + local dscp_ipv=$4; + local dscp=$5; + local have_vlan=$6; + local pcp_ipv=$7; + local vlan_pcp=$8; + local ip_v6=$9 + + local rx_ipv + local tx_ipv + + RET=0 + + # Send some packet to populate the switch MAC table + $MZ ${h2} -a ${h2_mac} -b ${h1_mac} -p 64 -t icmp echores -c 1 + + # Based on the apptrust order, set the expected Internal Priority values + # for the RX and TX paths. + if [ "${apptrust_order}" == "" ]; then + echo "Apptrust order not set." + rx_ipv=${port_prio} + tx_ipv=${port_prio} + elif [ "${apptrust_order}" == "dscp" ]; then + echo "Apptrust order is DSCP." + rx_ipv=${dscp_ipv} + tx_ipv=${dscp_ipv} + elif [ "${apptrust_order}" == "pcp" ]; then + echo "Apptrust order is PCP." + rx_ipv=${pcp_ipv} + tx_ipv=${pcp_ipv} + elif [ "${apptrust_order}" == "pcp dscp" ]; then + echo "Apptrust order is PCP DSCP." + if [ ${have_vlan} -eq 1 ]; then + rx_ipv=$((dscp_ipv > pcp_ipv ? dscp_ipv : pcp_ipv)) + tx_ipv=${pcp_ipv} + else + rx_ipv=${dscp_ipv} + tx_ipv=${dscp_ipv} + fi + else + RET=1 + echo "Error: Unknown apptrust order ${apptrust_order}" + log_test "${test_name}" + return + fi + + # Most/all? of the KSZ switches do not provide per-TC counters. There + # are only tx_hi and rx_hi counters, which are used to count packets + # which are considered as high priority and most likely not assigned + # to the queue 0. + # On the ingress path, packets seem to get high priority status + # independently of the DSCP or PCP global mapping. On the egress path, + # the high priority status is assigned based on the DSCP or PCP global + # map configuration. + # The thresholds for the high priority status are not documented, but + # it seems that the switch considers packets as high priority on the + # ingress path if detected Internal Priority is greater than 0. On the + # egress path, the switch considers packets as high priority if + # detected Internal Priority is greater than 1. + if [ ${rx_ipv} -ge 1 ]; then + local expect_rx_high_prio=1 + else + local expect_rx_high_prio=0 + fi + + if [ ${tx_ipv} -ge 2 ]; then + local expect_tx_high_prio=1 + else + local expect_tx_high_prio=0 + fi + + # Use ip tool to get the current switch packet counters. ethool stats + # need to be recalculated to get the correct values. + local swp1_stats=$(ip -s -j link show dev ${swp1}) + local swp2_stats=$(ip -s -j link show dev ${swp2}) + local swp1_rx_packets_before=$(extract_network_stat "$swp1_stats" \ + '.[0].stats64.rx.packets') + local swp1_rx_bytes_before=$(extract_network_stat "$swp1_stats" \ + '.[0].stats64.rx.bytes') + local swp2_tx_packets_before=$(extract_network_stat "$swp2_stats" \ + '.[0].stats64.tx.packets') + local swp2_tx_bytes_before=$(extract_network_stat "$swp2_stats" \ + '.[0].stats64.tx.bytes') + local swp1_rx_hi_before=$(ethtool_stats_get ${swp1} "rx_hi") + local swp2_tx_hi_before=$(ethtool_stats_get ${swp2} "tx_hi") + + # Assamble the mausezahn command based on the test parameters + # For the testis with ipv4 or ipv6, use icmp response packets, + # to avoid interaction with the system, to keep packet counters + # clean. + if [ ${ip_v6} -eq 0 ]; then + local ip="-a ${h1_mac} -b ${h2_mac} -A ${H1_IPV4} \ + -B ${H2_IPV4} -t icmp unreach,code=1,dscp=${dscp}" + else + local ip="-6 -a ${h1_mac} -b ${h2_mac} -A ${H1_IPV6} \ + -B ${H2_IPV6} -t icmp6 type=1,code=0,dscp=${dscp}" + fi + + if [ ${have_vlan} -eq 1 ]; then + local vlan_pcp_opt="-Q ${vlan_pcp}:0" + else + local vlan_pcp_opt="" + fi + $MZ ${h1} ${ip} -c ${PING_COUNT} -d 10msec ${vlan_pcp_opt} + + # Wait until the switch packet counters are updated + sleep 6 + + local swp1_stats=$(ip -s -j link show dev ${swp1}) + local swp2_stats=$(ip -s -j link show dev ${swp2}) + + local swp1_rx_packets_after=$(extract_network_stat "$swp1_stats" \ + '.[0].stats64.rx.packets') + local swp1_rx_bytes_after=$(extract_network_stat "$swp1_stats" \ + '.[0].stats64.rx.bytes') + local swp2_tx_packets_after=$(extract_network_stat "$swp2_stats" \ + '.[0].stats64.tx.packets') + local swp2_tx_bytes_after=$(extract_network_stat "$swp2_stats" \ + '.[0].stats64.tx.bytes') + + local swp1_rx_packets_diff=$((${swp1_rx_packets_after} - \ + ${swp1_rx_packets_before})) + local swp2_tx_packets_diff=$((${swp2_tx_packets_after} - \ + ${swp2_tx_packets_before})) + + local swp1_rx_hi_after=$(ethtool_stats_get ${swp1} "rx_hi") + local swp2_tx_hi_after=$(ethtool_stats_get ${swp2} "tx_hi") + + # Test if any packets were received on swp1, we will rx before and after + if [ ${swp1_rx_packets_diff} -lt ${PING_COUNT} ]; then + echo "Not expected amount of received packets on ${swp1}" + echo "before ${swp1_rx_packets_before} after ${swp1_rx_packets_after}" + RET=1 + fi + + # Test if any packets were transmitted on swp2, we will tx before and after + if [ ${swp2_tx_packets_diff} -lt ${PING_COUNT} ]; then + echo "Not expected amount of transmitted packets on ${swp2}" + echo "before ${swp2_tx_packets_before} after ${swp2_tx_packets_after}" + RET=1 + fi + + # tx/rx_hi counted in bytes. So, we need to compare the difference in bytes + local swp1_rx_bytes_diff=$(($swp1_rx_bytes_after - $swp1_rx_bytes_before)) + local swp2_tx_bytes_diff=$(($swp2_tx_bytes_after - $swp2_tx_bytes_before)) + local swp1_rx_hi_diff=$(($swp1_rx_hi_after - $swp1_rx_hi_before)) + local swp2_tx_hi_diff=$(($swp2_tx_hi_after - $swp2_tx_hi_before)) + + if [ ${expect_rx_high_prio} -eq 1 ]; then + swp1_rx_hi_diff=$((${swp1_rx_hi_diff} - \ + ${swp1_rx_packets_diff} * ${ETH_FCS_LEN})) + if [ ${swp1_rx_hi_diff} -ne ${swp1_rx_bytes_diff} ]; then + echo "Not expected amount of high priority packets received on ${swp1}" + echo "RX hi diff: ${swp1_rx_hi_diff}, expected RX bytes diff: ${swp1_rx_bytes_diff}" + RET=1 + fi + else + if [ ${swp1_rx_hi_diff} -ne 0 ]; then + echo "Unexpected amount of high priority packets received on ${swp1}" + echo "RX hi diff: ${swp1_rx_hi_diff}, expected 0" + RET=1 + fi + fi + + if [ ${expect_tx_high_prio} -eq 1 ]; then + swp2_tx_hi_diff=$((${swp2_tx_hi_diff} - \ + ${swp2_tx_packets_diff} * ${ETH_FCS_LEN})) + if [ ${swp2_tx_hi_diff} -ne ${swp2_tx_bytes_diff} ]; then + echo "Not expected amount of high priority packets transmitted on ${swp2}" + echo "TX hi diff: ${swp2_tx_hi_diff}, expected TX bytes diff: ${swp2_tx_bytes_diff}" + RET=1 + fi + else + if [ ${swp2_tx_hi_diff} -ne 0 ]; then + echo "Unexpected amount of high priority packets transmitted on ${swp2}" + echo "TX hi diff: ${swp2_tx_hi_diff}, expected 0" + RET=1 + fi + fi + + log_test "${test_name}" +} + +run_test_dscp() +{ + # IPv4 test + run_test "$1" "$2" "$3" "$4" "$5" 0 0 0 0 + # IPv6 test + run_test "$1" "$2" "$3" "$4" "$5" 0 0 0 1 +} + +run_test_dscp_pcp() +{ + # IPv4 test + run_test "$1" "$2" "$3" "$4" "$5" 1 "$6" "$7" 0 + # IPv6 test + run_test "$1" "$2" "$3" "$4" "$5" 1 "$6" "$7" 1 +} + +port_default_prio_get() +{ + local if_name=$1 + local prio + + prio="$(dcb -j app show dev ${if_name} default-prio | \ + jq '.default_prio[]')" + if [ -z "${prio}" ]; then + prio=0 + fi + + echo ${prio} +} + +test_port_default() +{ + local orig_apptrust=$(port_get_default_apptrust ${swp1}) + local orig_prio=$(port_default_prio_get ${swp1}) + local apptrust_order="" + + RET=0 + + # Make sure no other priority sources will interfere with the test + set_apptrust_order ${swp1} "${apptrust_order}" + + for val in $(seq 0 7); do + dcb app replace dev ${swp1} default-prio ${val} + if [ $val -ne $(port_default_prio_get ${swp1}) ]; then + RET=1 + break + fi + + run_test_dscp "Port-default QoS classification, prio: ${val}" \ + "${apptrust_order}" ${val} 0 0 + done + + set_apptrust_order ${swp1} "${orig_apptrust}" + if [[ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]]; then + RET=1 + fi + + dcb app replace dev ${swp1} default-prio ${orig_prio} + if [ $orig_prio -ne $(port_default_prio_get ${swp1}) ]; then + RET=1 + fi + + log_test "Port-default QoS classification" +} + +port_get_default_apptrust() +{ + local if_name=$1 + + dcb -j apptrust show dev ${if_name} | jq -r '.order[]' | \ + tr '\n' ' ' | xargs +} + +test_port_apptrust() +{ + local original_dscp_prios_swp1=$(get_dscp_prios ${swp1}) + local orig_apptrust=$(port_get_default_apptrust ${swp1}) + local orig_port_prio=$(port_default_prio_get ${swp1}) + local order_variants=("pcp dscp" "dscp" "pcp") + local apptrust_order + local port_prio + local dscp_prio + local pcp_prio + local dscp + local pcp + + RET=0 + + # First, test if apptrust configuration as taken by the kernel + for order in "${order_variants[@]}"; do + set_apptrust_order ${swp1} "${order}" + if [[ "$order" != "$(port_get_default_apptrust ${swp1})" ]]; then + RET=1 + break + fi + done + + log_test "Apptrust, supported variants" + + # To test if the apptrust configuration is working as expected, we need + # to set DSCP priorities for the switch port. + init_dscp_prios "${swp1}" "${original_dscp_prios_swp1}" + + # Start with a simple test where all apptrust sources are disabled + # default port priority is 0, DSCP priority is mapped to 7. + # No high priority packets should be received or transmitted. + port_prio=0 + dscp_prio=7 + dscp=4 + + dcb app replace dev ${swp1} default-prio ${port_prio} + dcb app replace dev ${swp1} dscp-prio ${dscp}:${dscp_prio} + + apptrust_order="" + set_apptrust_order ${swp1} "${apptrust_order}" + # Test with apptrust sources disabled, Packets should get port default + # priority which is 0 + run_test_dscp "Apptrust, all disabled. DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} + + apptrust_order="pcp" + set_apptrust_order ${swp1} "${apptrust_order}" + # If PCP is enabled, packets should get PCP priority, which is not + # set in this test (no VLAN tags are present in the packet). No high + # priority packets should be received or transmitted. + run_test_dscp "Apptrust, PCP enabled. DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} + + apptrust_order="dscp" + set_apptrust_order ${swp1} "${apptrust_order}" + # If DSCP is enabled, packets should get DSCP priority which is set to 7 + # in this test. High priority packets should be received and transmitted. + run_test_dscp "Apptrust, DSCP enabled. DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} + + apptrust_order="pcp dscp" + set_apptrust_order ${swp1} "${apptrust_order}" + # If PCP and DSCP are enabled, PCP would have higher apptrust priority + # so packets should get PCP priority. But in this test VLAN PCP is not + # set, so it should get DSCP priority which is set to 7. High priority + # packets should be received and transmitted. + run_test_dscp "Apptrust, PCP and DSCP are enabled. DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} + + # If VLAN PCP is set, it should have higher apptrust priority than DSCP + # so packets should get VLAN PCP priority. Send packets with VLAN PCP + # set to 0, DSCP set to 7. Packets should get VLAN PCP priority. + # No high priority packets should be transmitted. Due to nature of the + # switch, high priority packets will be received. + pcp_prio=0 + pcp=0 + run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp} + + # If VLAN PCP is set to 7, it should have higher apptrust priority than + # DSCP so packets should get VLAN PCP priority. Send packets with VLAN + # PCP set to 7, DSCP set to 7. Packets should get VLAN PCP priority. + # High priority packets should be received and transmitted. + pcp_prio=7 + pcp=7 + run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp} + # Now make sure that the switch is able to handle the case where DSCP + # priority is set to 0 and PCP priority is set to 7. Packets should get + # PCP priority. High priority packets should be received and transmitted. + dscp_prio=0 + dcb app replace dev ${swp1} dscp-prio ${dscp}:${dscp_prio} + run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp} + # If both VLAN PCP and DSCP are set to 0, packets should get 0 priority. + # No high priority packets should be received or transmitted. + pcp_prio=0 + pcp=0 + run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp} + + # Restore original priorities + if ! restore_priorities "${swp1}" "${original_dscp_prios_swp1}"; then + RET=1 + fi + + set_apptrust_order ${swp1} "${orig_apptrust}" + if [ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]; then + RET=1 + fi + + dcb app replace dev ${swp1} default-prio ${orig_port_prio} + if [ $orig_port_prio -ne $(port_default_prio_get ${swp1}) ]; then + RET=1 + fi + + log_test "Apptrust, restore original settings" +} + +# Function to get current DSCP priorities +get_dscp_prios() { + local if_name=$1 + dcb -j app show dev ${if_name} | jq -c '.dscp_prio' +} + +# Function to set a specific DSCP priority on a device +replace_dscp_prio() { + local if_name=$1 + local dscp=$2 + local prio=$3 + dcb app replace dev ${if_name} dscp-prio ${dscp}:${prio} +} + +# Function to compare DSCP maps +compare_dscp_maps() { + local old_json=$1 + local new_json=$2 + local dscp=$3 + local prio=$4 + + # Create a modified old_json with the expected change for comparison + local modified_old_json=$(echo "$old_json" | + jq --argjson dscp $dscp --argjson prio $prio \ + 'map(if .[0] == $dscp then [$dscp, $prio] else . end)' | + tr -d " \n") + + # Compare new_json with the modified_old_json + if [[ "$modified_old_json" == "$new_json" ]]; then + return 0 + else + return 1 + fi +} + +# Function to set DSCP priorities +set_and_verify_dscp() { + local port=$1 + local dscp=$2 + local new_prio=$3 + + local old_prios=$(get_dscp_prios $port) + + replace_dscp_prio "$port" $dscp $new_prio + + # Fetch current settings and compare + local current_prios=$(get_dscp_prios $port) + if ! compare_dscp_maps "$old_prios" "$current_prios" $dscp $new_prio; then + echo "Error: Unintended changes detected in DSCP map for $port after setting DSCP $dscp to $new_prio." + return 1 + fi + return 0 +} + +# Function to restore original priorities +restore_priorities() { + local port=$1 + local original_prios=$2 + + echo "Removing test artifacts for $port" + local current_prios=$(get_dscp_prios $port) + local prio_str=$(echo "$current_prios" | + jq -r 'map("\(.[0]):\(.[1])") | join(" ")') + dcb app del dev $port dscp-prio $prio_str + + echo "Restoring original DSCP priorities for $port" + local restore_str=$(echo "$original_prios" | + jq -r 'map("\(.[0]):\(.[1])") | join(" ")') + dcb app add dev $port dscp-prio $restore_str + + local current_prios=$(get_dscp_prios $port) + if [[ "$original_prios" != "$current_prios" ]]; then + echo "Error: Failed to restore original DSCP priorities for $port" + return 1 + fi + return 0 +} + +# Initialize DSCP priorities. Set them to predictable values for testing. +init_dscp_prios() { + local port=$1 + local original_prios=$2 + + echo "Removing any existing DSCP priority mappins for $port" + local prio_str=$(echo "$original_prios" | + jq -r 'map("\(.[0]):\(.[1])") | join(" ")') + dcb app del dev $port dscp-prio $prio_str + + # Initialize DSCP priorities list + local dscp_prios="" + for dscp in {0..63}; do + dscp_prios+=("$dscp:0") + done + + echo "Setting initial DSCP priorities map to 0 for $port" + dcb app add dev $port dscp-prio ${dscp_prios[@]} +} + +# Main function to test global DSCP map across specified ports +test_global_dscp_map() { + local ports=("$swp1" "$swp2") + local original_dscp_prios_port0=$(get_dscp_prios ${ports[0]}) + local orig_apptrust=$(port_get_default_apptrust ${swp1}) + local orig_port_prio=$(port_default_prio_get ${swp1}) + local apptrust_order="dscp" + local port_prio=0 + local dscp_prio + local dscp + + RET=0 + + set_apptrust_order ${swp1} "${apptrust_order}" + dcb app replace dev ${swp1} default-prio ${port_prio} + + # Initialize DSCP priorities + init_dscp_prios "${ports[0]}" "$original_dscp_prios_port0" + + # Loop over each DSCP index + for dscp in {0..63}; do + # and test each Internal Priority value + for dscp_prio in {0..7}; do + # do it for each port. This is to test if the global DSCP map + # is accessible from all ports. + for port in "${ports[@]}"; do + if ! set_and_verify_dscp "$port" $dscp $dscp_prio; then + RET=1 + fi + done + + # Test if the DSCP priority is correctly applied to the packets + run_test_dscp "DSCP (${dscp}) QoS classification, prio: ${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} + if [ ${RET} -eq 1 ]; then + break + fi + done + done + + # Restore original priorities + if ! restore_priorities "${ports[0]}" "${original_dscp_prios_port0}"; then + RET=1 + fi + + set_apptrust_order ${swp1} "${orig_apptrust}" + if [[ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]]; then + RET=1 + fi + + dcb app replace dev ${swp1} default-prio ${orig_port_prio} + if [ $orig_port_prio -ne $(port_default_prio_get ${swp1}) ]; then + RET=1 + fi + + log_test "DSCP global map" +} + +trap cleanup EXIT + +ALL_TESTS=" + test_port_default + test_port_apptrust + test_global_dscp_map +" + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh new file mode 100755 index 000000000000..bdffe698e1d1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh @@ -0,0 +1,201 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that blackhole routes are marked as offloaded and that packets hitting +# them are dropped by the ASIC and not by the kernel. +# +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | + $rp1 | +# | 192.0.2.2/24 | +# | 2001:db8:1::2/64 | +# | | +# | 2001:db8:2::2/64 | +# | 198.51.100.2/24 | +# | + $rp2 | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + blackhole_ipv4 + blackhole_ipv6 +" +NUM_NETIFS=4 +: ${TIMEOUT:=20000} # ms +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + tc qdisc add dev $rp1 clsact + + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64 +} + +router_destroy() +{ + __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $rp1 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.1 ": h1->h2" +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": h1->h2" +} + +blackhole_ipv4() +{ + # Transmit packets from H1 to H2 and make sure they are dropped by the + # ASIC and not by the kernel + RET=0 + + ip -4 route add blackhole 198.51.100.0/30 + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 src_ip 192.0.2.1 ip_proto icmp \ + action pass + + busywait "$TIMEOUT" wait_for_offload ip -4 route show 198.51.100.0/30 + check_err $? "route not marked as offloaded when should" + + ping_do $h1 198.51.100.1 + check_fail $? "ping passed when should not" + + tc_check_packets "dev $rp1 ingress" 101 0 + check_err $? "packets trapped and not dropped by ASIC" + + log_test "IPv4 blackhole route" + + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower + ip -4 route del blackhole 198.51.100.0/30 +} + +blackhole_ipv6() +{ + RET=0 + + ip -6 route add blackhole 2001:db8:2::/120 + tc filter add dev $rp1 ingress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 src_ip 2001:db8:1::1 \ + ip_proto icmpv6 action pass + + busywait "$TIMEOUT" wait_for_offload ip -6 route show 2001:db8:2::/120 + check_err $? "route not marked as offloaded when should" + + ping6_do $h1 2001:db8:2::1 + check_fail $? "ping passed when should not" + + tc_check_packets "dev $rp1 ingress" 101 0 + check_err $? "packets trapped and not dropped by ASIC" + + log_test "IPv6 blackhole route" + + tc filter del dev $rp1 ingress protocol ipv6 pref 1 handle 101 flower + ip -6 route del blackhole 2001:db8:2::/120 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + router_create +} + +cleanup() +{ + pre_cleanup + + router_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh new file mode 100755 index 000000000000..224ca3695c89 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh @@ -0,0 +1,334 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# In addition to the common variables, user might use: +# LC_SLOT - If not set, all probed line cards are going to be tested, +# with an exception of the "activation_16x100G_test". +# It set, only the selected line card is going to be used +# for tests, including "activation_16x100G_test". + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + unprovision_test + provision_test + activation_16x100G_test +" + +NUM_NETIFS=0 + +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +until_lc_state_is() +{ + local state=$1; shift + local current=$("$@") + + echo "$current" + [ "$current" == "$state" ] +} + +until_lc_state_is_not() +{ + ! until_lc_state_is "$@" +} + +lc_state_get() +{ + local lc=$1 + + devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].state" +} + +lc_wait_until_state_changes() +{ + local lc=$1 + local state=$2 + local timeout=$3 # ms + + busywait "$timeout" until_lc_state_is_not "$state" lc_state_get "$lc" +} + +lc_wait_until_state_becomes() +{ + local lc=$1 + local state=$2 + local timeout=$3 # ms + + busywait "$timeout" until_lc_state_is "$state" lc_state_get "$lc" +} + +until_lc_port_count_is() +{ + local port_count=$1; shift + local current=$("$@") + + echo "$current" + [ $current == $port_count ] +} + +lc_port_count_get() +{ + local lc=$1 + + devlink port -j | jq -e -r ".[][] | select(.lc==$lc) | .port" | wc -l +} + +lc_wait_until_port_count_is() +{ + local lc=$1 + local port_count=$2 + local timeout=$3 # ms + + busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc" +} + +lc_nested_devlink_dev_get() +{ + local lc=$1 + + devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].nested_devlink" +} + +PROV_UNPROV_TIMEOUT=8000 # ms +POST_PROV_ACT_TIMEOUT=2000 # ms +PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms + +unprovision_one() +{ + local lc=$1 + local state + + state=$(lc_state_get $lc) + check_err $? "Failed to get state of linecard $lc" + if [[ "$state" == "unprovisioned" ]]; then + return + fi + + log_info "Unprovisioning linecard $lc" + + devlink lc set $DEVLINK_DEV lc $lc notype + check_err $? "Failed to trigger linecard $lc unprovisioning" + + state=$(lc_wait_until_state_changes $lc "unprovisioning" \ + $PROV_UNPROV_TIMEOUT) + check_err $? "Failed to unprovision linecard $lc (timeout)" + + [ "$state" == "unprovisioned" ] + check_err $? "Failed to unprovision linecard $lc (state=$state)" +} + +provision_one() +{ + local lc=$1 + local type=$2 + local state + + log_info "Provisioning linecard $lc" + + devlink lc set $DEVLINK_DEV lc $lc type $type + check_err $? "Failed trigger linecard $lc provisioning" + + state=$(lc_wait_until_state_changes $lc "provisioning" \ + $PROV_UNPROV_TIMEOUT) + check_err $? "Failed to provision linecard $lc (timeout)" + + [ "$state" == "provisioned" ] || [ "$state" == "active" ] + check_err $? "Failed to provision linecard $lc (state=$state)" + + provisioned_type=$(devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].type") + [ "$provisioned_type" == "$type" ] + check_err $? "Wrong provision type returned for linecard $lc (got \"$provisioned_type\", expected \"$type\")" + + # Wait for possible activation to make sure the state + # won't change after return from this function. + state=$(lc_wait_until_state_becomes $lc "active" \ + $POST_PROV_ACT_TIMEOUT) +} + +unprovision_test() +{ + RET=0 + local lc + + lc=$LC_SLOT + unprovision_one $lc + log_test "Unprovision" +} + +LC_16X100G_TYPE="16x100G" +LC_16X100G_PORT_COUNT=16 + +supported_types_check() +{ + local lc=$1 + local supported_types_count + local type_index + local lc_16x100_found=false + + supported_types_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \ + jq -e -r ".[][][].supported_types | length") + [ $supported_types_count != 0 ] + check_err $? "No supported types found for linecard $lc" + for (( type_index=0; type_index<$supported_types_count; type_index++ )) + do + type=$(devlink lc show $DEVLINK_DEV lc $lc -j | \ + jq -e -r ".[][][].supported_types[$type_index]") + if [[ "$type" == "$LC_16X100G_TYPE" ]]; then + lc_16x100_found=true + break + fi + done + [ $lc_16x100_found = true ] + check_err $? "16X100G not found between supported types of linecard $lc" +} + +ports_check() +{ + local lc=$1 + local expected_port_count=$2 + local port_count + + port_count=$(lc_wait_until_port_count_is $lc $expected_port_count \ + $PROV_PORTS_INSTANTIATION_TIMEOUT) + [ $port_count != 0 ] + check_err $? "No port associated with linecard $lc" + [ $port_count == $expected_port_count ] + check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)" +} + +lc_dev_info_provisioned_check() +{ + local lc=$1 + local nested_devlink_dev=$2 + local fixed_hw_revision + local running_ini_version + + fixed_hw_revision=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r '.[][].versions.fixed."hw.revision"') + check_err $? "Failed to get linecard $lc fixed.hw.revision" + log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\"" + running_ini_version=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r '.[][].versions.running."ini.version"') + check_err $? "Failed to get linecard $lc running.ini.version" + log_info "Linecard $lc running.ini.version: \"$running_ini_version\"" +} + +provision_test() +{ + RET=0 + local lc + local type + local state + local nested_devlink_dev + + lc=$LC_SLOT + supported_types_check $lc + state=$(lc_state_get $lc) + check_err $? "Failed to get state of linecard $lc" + if [[ "$state" != "unprovisioned" ]]; then + unprovision_one $lc + fi + provision_one $lc $LC_16X100G_TYPE + ports_check $lc $LC_16X100G_PORT_COUNT + + nested_devlink_dev=$(lc_nested_devlink_dev_get $lc) + check_err $? "Failed to get nested devlink handle of linecard $lc" + lc_dev_info_provisioned_check $lc $nested_devlink_dev + + log_test "Provision" +} + +ACTIVATION_TIMEOUT=20000 # ms + +interface_check() +{ + ip link set $h1 up + ip link set $h2 up + ifaces_upped=true + setup_wait +} + +lc_dev_info_active_check() +{ + local lc=$1 + local nested_devlink_dev=$2 + local fixed_device_fw_psid + local running_device_fw + + fixed_device_fw_psid=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r ".[][].versions.fixed" | \ + jq -e -r '."fw.psid"') + check_err $? "Failed to get linecard $lc fixed fw PSID" + log_info "Linecard $lc fixed.fw.psid: \"$fixed_device_fw_psid\"" + + running_device_fw=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r ".[][].versions.running.fw") + check_err $? "Failed to get linecard $lc running.fw.version" + log_info "Linecard $lc running.fw: \"$running_device_fw\"" +} + +activation_16x100G_test() +{ + RET=0 + local lc + local type + local state + local nested_devlink_dev + + lc=$LC_SLOT + type=$LC_16X100G_TYPE + + unprovision_one $lc + provision_one $lc $type + state=$(lc_wait_until_state_becomes $lc "active" \ + $ACTIVATION_TIMEOUT) + check_err $? "Failed to get linecard $lc activated (timeout)" + + interface_check + + nested_devlink_dev=$(lc_nested_devlink_dev_get $lc) + check_err $? "Failed to get nested devlink handle of linecard $lc" + lc_dev_info_active_check $lc $nested_devlink_dev + + log_test "Activation 16x100G" +} + +setup_prepare() +{ + local lc_num=$(devlink lc show -j | jq -e -r ".[][\"$DEVLINK_DEV\"] |length") + if [[ $? -ne 0 ]] || [[ $lc_num -eq 0 ]]; then + echo "SKIP: No linecard support found" + exit $ksft_skip + fi + + if [ -z "$LC_SLOT" ]; then + echo "SKIP: \"LC_SLOT\" variable not provided" + exit $ksft_skip + fi + + # Interfaces are not present during the script start, + # that's why we define NUM_NETIFS here so dummy + # implicit veth pairs are not created. + NUM_NETIFS=2 + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + ifaces_upped=false +} + +cleanup() +{ + if [ "$ifaces_upped" = true ] ; then + ip link set $h1 down + ip link set $h2 down + fi +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh new file mode 100755 index 000000000000..36055279ba92 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test generic devlink-trap functionality over mlxsw. These tests are not +# specific to a single trap, but do not check the devlink-trap common +# infrastructure either. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + dev_del_test +" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 +} + +h2_destroy() +{ + simple_if_fini $h2 +} + +switch_create() +{ + ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +dev_del_test() +{ + local trap_name="source_mac_is_multicast" + local smac=01:02:03:04:05:06 + local num_iter=5 + local mz_pid + local i + + $MZ $h1 -c 0 -p 100 -a $smac -b bcast -t ip -q & + mz_pid=$! + + # The purpose of this test is to make sure we correctly dismantle a + # port while packets are trapped from it. This is done by reloading the + # the driver while the 'ingress_smac_mc_drop' trap is triggered. + RET=0 + + for i in $(seq 1 $num_iter); do + log_info "Iteration $i / $num_iter" + + devlink_trap_action_set $trap_name "trap" + sleep 1 + + devlink_reload + # Allow netdevices to be re-created following the reload + sleep 20 + + cleanup + setup_prepare + setup_wait + done + + log_test "Device delete" + + kill_process $mz_pid +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh new file mode 100755 index 000000000000..b32ba5fec59d --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh @@ -0,0 +1,151 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap ACL drops functionality over mlxsw. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ingress_flow_action_drop_test + egress_flow_action_drop_test +" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 +} + +h2_destroy() +{ + simple_if_fini $h2 +} + +switch_create() +{ + ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ingress_flow_action_drop_test() +{ + local mz_pid + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower src_mac $h1mac action pass + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \ + flower dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -d 1msec -q & + mz_pid=$! + + RET=0 + + devlink_trap_drop_test ingress_flow_action_drop $swp2 101 + + log_test "ingress_flow_action_drop" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 +} + +egress_flow_action_drop_test() +{ + local mz_pid + + tc filter add dev $swp2 egress protocol ip pref 2 handle 102 \ + flower src_mac $h1mac action pass + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -d 1msec -q & + mz_pid=$! + + RET=0 + + devlink_trap_drop_test egress_flow_action_drop $swp2 102 + + log_test "egress_flow_action_drop" + + tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 2 102 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh new file mode 100755 index 000000000000..64153bbf95df --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh @@ -0,0 +1,709 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap control trap functionality over mlxsw. Each registered +# control packet trap is tested to make sure it is triggered under the right +# conditions. +# +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | + $rp1 | +# | 192.0.2.2/24 | +# | 2001:db8:1::2/64 | +# | | +# | 2001:db8:2::2/64 | +# | 198.51.100.2/24 | +# | + $rp2 | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + stp_test + lacp_test + lldp_test + igmp_query_test + igmp_v1_report_test + igmp_v2_report_test + igmp_v3_report_test + igmp_v2_leave_test + mld_query_test + mld_v1_report_test + mld_v2_report_test + mld_v1_done_test + ipv4_dhcp_test + ipv6_dhcp_test + arp_request_test + arp_response_test + ipv6_neigh_solicit_test + ipv6_neigh_advert_test + ipv4_bfd_test + ipv6_bfd_test + ipv4_ospf_test + ipv6_ospf_test + ipv4_bgp_test + ipv6_bgp_test + ipv4_vrrp_test + ipv6_vrrp_test + ipv4_pim_test + ipv6_pim_test + uc_loopback_test + local_route_test + external_route_test + ipv6_uc_dip_link_local_scope_test + ipv4_router_alert_test + ipv6_router_alert_test + ipv6_dip_all_nodes_test + ipv6_dip_all_routers_test + ipv6_router_solicit_test + ipv6_router_advert_test + ipv6_redirect_test + ptp_event_test + ptp_general_test + flow_action_sample_test + flow_action_trap_test + eapol_test +" +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source mlxsw_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64 +} + +router_destroy() +{ + __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + router_create +} + +cleanup() +{ + pre_cleanup + + router_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +stp_test() +{ + devlink_trap_stats_test "STP" "stp" $MZ $h1 -c 1 -t bpdu -q +} + +lacp_payload_get() +{ + local source_mac=$1; shift + local p + + p=$(: + )"01:80:C2:00:00:02:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"88:09:"$( : ETH type + ) + echo $p +} + +lacp_test() +{ + local h1mac=$(mac_get $h1) + + devlink_trap_stats_test "LACP" "lacp" $MZ $h1 -c 1 \ + $(lacp_payload_get $h1mac) -p 100 -q +} + +lldp_payload_get() +{ + local source_mac=$1; shift + local p + + p=$(: + )"01:80:C2:00:00:0E:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"88:CC:"$( : ETH type + ) + echo $p +} + +lldp_test() +{ + local h1mac=$(mac_get $h1) + + devlink_trap_stats_test "LLDP" "lldp" $MZ $h1 -c 1 \ + $(lldp_payload_get $h1mac) -p 100 -q +} + +igmp_query_test() +{ + # IGMP (IP Protocol 2) Membership Query (Type 0x11) + devlink_trap_stats_test "IGMP Membership Query" "igmp_query" \ + $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \ + -A 192.0.2.1 -B 224.0.0.1 -t ip proto=2,p=11 -p 100 -q +} + +igmp_v1_report_test() +{ + # IGMP (IP Protocol 2) Version 1 Membership Report (Type 0x12) + devlink_trap_stats_test "IGMP Version 1 Membership Report" \ + "igmp_v1_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \ + -A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=12 -p 100 -q +} + +igmp_v2_report_test() +{ + # IGMP (IP Protocol 2) Version 2 Membership Report (Type 0x16) + devlink_trap_stats_test "IGMP Version 2 Membership Report" \ + "igmp_v2_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \ + -A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=16 -p 100 -q +} + +igmp_v3_report_test() +{ + # IGMP (IP Protocol 2) Version 3 Membership Report (Type 0x22) + devlink_trap_stats_test "IGMP Version 3 Membership Report" \ + "igmp_v3_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \ + -A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=22 -p 100 -q +} + +igmp_v2_leave_test() +{ + # IGMP (IP Protocol 2) Version 2 Leave Group (Type 0x17) + devlink_trap_stats_test "IGMP Version 2 Leave Group" \ + "igmp_v2_leave" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:02 \ + -A 192.0.2.1 -B 224.0.0.2 -t ip proto=2,p=17 -p 100 -q +} + +mld_payload_get() +{ + local type=$1; shift + local p + + type=$(printf "%x" $type) + p=$(: + )"3A:"$( : Next Header - ICMPv6 + )"00:"$( : Hdr Ext Len + )"00:00:00:00:00:00:"$( : Options and Padding + )"$type:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +mld_query_test() +{ + # MLD Multicast Listener Query (Type 130) + devlink_trap_stats_test "MLD Multicast Listener Query" "mld_query" \ + $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1 \ + -t ip hop=1,next=0,payload=$(mld_payload_get 130) -p 100 -q +} + +mld_v1_report_test() +{ + # MLD Version 1 Multicast Listener Report (Type 131) + devlink_trap_stats_test "MLD Version 1 Multicast Listener Report" \ + "mld_v1_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \ + -t ip hop=1,next=0,payload=$(mld_payload_get 131) -p 100 -q +} + +mld_v2_report_test() +{ + # MLD Version 2 Multicast Listener Report (Type 143) + devlink_trap_stats_test "MLD Version 2 Multicast Listener Report" \ + "mld_v2_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \ + -t ip hop=1,next=0,payload=$(mld_payload_get 143) -p 100 -q +} + +mld_v1_done_test() +{ + # MLD Version 1 Multicast Listener Done (Type 132) + devlink_trap_stats_test "MLD Version 1 Multicast Listener Done" \ + "mld_v1_done" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \ + -t ip hop=1,next=0,payload=$(mld_payload_get 132) -p 100 -q +} + +ipv4_dhcp_test() +{ + devlink_trap_stats_test "IPv4 DHCP Port 67" "ipv4_dhcp" \ + $MZ $h1 -c 1 -a own -b bcast -A 0.0.0.0 -B 255.255.255.255 \ + -t udp sp=68,dp=67 -p 100 -q + + devlink_trap_stats_test "IPv4 DHCP Port 68" "ipv4_dhcp" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) -A 192.0.2.1 \ + -B 255.255.255.255 -t udp sp=67,dp=68 -p 100 -q +} + +ipv6_dhcp_test() +{ + devlink_trap_stats_test "IPv6 DHCP Port 547" "ipv6_dhcp" \ + $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=546,dp=547 \ + -p 100 -q + + devlink_trap_stats_test "IPv6 DHCP Port 546" "ipv6_dhcp" \ + $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=547,dp=546 \ + -p 100 -q +} + +arp_request_test() +{ + devlink_trap_stats_test "ARP Request" "arp_request" \ + $MZ $h1 -c 1 -a own -b bcast -t arp request -p 100 -q +} + +arp_response_test() +{ + devlink_trap_stats_test "ARP Response" "arp_response" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) -t arp reply -p 100 -q +} + +icmpv6_header_get() +{ + local type=$1; shift + local p + + type=$(printf "%x" $type) + p=$(: + )"$type:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +ipv6_neigh_solicit_test() +{ + devlink_trap_stats_test "IPv6 Neighbour Solicitation" \ + "ipv6_neigh_solicit" $MZ $h1 -6 -c 1 \ + -A fe80::1 -B ff02::1:ff00:02 \ + -t ip hop=1,next=58,payload=$(icmpv6_header_get 135) -p 100 -q +} + +ipv6_neigh_advert_test() +{ + devlink_trap_stats_test "IPv6 Neighbour Advertisement" \ + "ipv6_neigh_advert" $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A fe80::1 -B 2001:db8:1::2 \ + -t ip hop=1,next=58,payload=$(icmpv6_header_get 136) -p 100 -q +} + +ipv4_bfd_test() +{ + devlink_trap_stats_test "IPv4 BFD Control - Port 3784" "ipv4_bfd" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3784 -p 100 -q + + devlink_trap_stats_test "IPv4 BFD Echo - Port 3785" "ipv4_bfd" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3785 -p 100 -q +} + +ipv6_bfd_test() +{ + devlink_trap_stats_test "IPv6 BFD Control - Port 3784" "ipv6_bfd" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:1::2 \ + -t udp sp=49153,dp=3784 -p 100 -q + + devlink_trap_stats_test "IPv6 BFD Echo - Port 3785" "ipv6_bfd" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:1::2 \ + -t udp sp=49153,dp=3785 -p 100 -q +} + +ipv4_ospf_test() +{ + devlink_trap_stats_test "IPv4 OSPF - Multicast" "ipv4_ospf" \ + $MZ $h1 -c 1 -a own -b 01:00:5e:00:00:05 \ + -A 192.0.2.1 -B 224.0.0.5 -t ip proto=89 -p 100 -q + + devlink_trap_stats_test "IPv4 OSPF - Unicast" "ipv4_ospf" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 192.0.2.2 -t ip proto=89 -p 100 -q +} + +ipv6_ospf_test() +{ + devlink_trap_stats_test "IPv6 OSPF - Multicast" "ipv6_ospf" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:05 \ + -A fe80::1 -B ff02::5 -t ip next=89 -p 100 -q + + devlink_trap_stats_test "IPv6 OSPF - Unicast" "ipv6_ospf" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:1::2 -t ip next=89 -p 100 -q +} + +ipv4_bgp_test() +{ + devlink_trap_stats_test "IPv4 BGP" "ipv4_bgp" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 192.0.2.2 -t tcp sp=54321,dp=179,flags=rst \ + -p 100 -q +} + +ipv6_bgp_test() +{ + devlink_trap_stats_test "IPv6 BGP" "ipv6_bgp" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:1::2 \ + -t tcp sp=54321,dp=179,flags=rst -p 100 -q +} + +ipv4_vrrp_test() +{ + devlink_trap_stats_test "IPv4 VRRP" "ipv4_vrrp" \ + $MZ $h1 -c 1 -a own -b 01:00:5e:00:00:12 \ + -A 192.0.2.1 -B 224.0.0.18 -t ip proto=112 -p 100 -q +} + +ipv6_vrrp_test() +{ + devlink_trap_stats_test "IPv6 VRRP" "ipv6_vrrp" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:12 \ + -A fe80::1 -B ff02::12 -t ip next=112 -p 100 -q +} + +ipv4_pim_test() +{ + devlink_trap_stats_test "IPv4 PIM - Multicast" "ipv4_pim" \ + $MZ $h1 -c 1 -a own -b 01:00:5e:00:00:0d \ + -A 192.0.2.1 -B 224.0.0.13 -t ip proto=103 -p 100 -q + + devlink_trap_stats_test "IPv4 PIM - Unicast" "ipv4_pim" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 192.0.2.2 -t ip proto=103 -p 100 -q +} + +ipv6_pim_test() +{ + devlink_trap_stats_test "IPv6 PIM - Multicast" "ipv6_pim" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:0d \ + -A fe80::1 -B ff02::d -t ip next=103 -p 100 -q + + devlink_trap_stats_test "IPv6 PIM - Unicast" "ipv6_pim" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A fe80::1 -B 2001:db8:1::2 -t ip next=103 -p 100 -q +} + +uc_loopback_test() +{ + # Add neighbours to the fake destination IPs, so that the packets are + # routed in the device and not trapped due to an unresolved neighbour + # exception. + ip -4 neigh add 192.0.2.3 lladdr 00:11:22:33:44:55 nud permanent \ + dev $rp1 + ip -6 neigh add 2001:db8:1::3 lladdr 00:11:22:33:44:55 nud permanent \ + dev $rp1 + + devlink_trap_stats_test "IPv4 Unicast Loopback" "uc_loopback" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 192.0.2.3 -t udp sp=54321,dp=12345 -p 100 -q + + devlink_trap_stats_test "IPv6 Unicast Loopback" "uc_loopback" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:1::3 -t udp sp=54321,dp=12345 \ + -p 100 -q + + ip -6 neigh del 2001:db8:1::3 dev $rp1 + ip -4 neigh del 192.0.2.3 dev $rp1 +} + +local_route_test() +{ + # Use a fake source IP to prevent the trap from being triggered twice + # when the router sends back a port unreachable message. + devlink_trap_stats_test "IPv4 Local Route" "local_route" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.3 -B 192.0.2.2 -t udp sp=54321,dp=12345 -p 100 -q + + devlink_trap_stats_test "IPv6 Local Route" "local_route" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::3 -B 2001:db8:1::2 -t udp sp=54321,sp=12345 \ + -p 100 -q +} + +external_route_test() +{ + # Add a dummy device through which the incoming packets should be + # routed. + ip link add name dummy10 up type dummy + ip address add 203.0.113.1/24 dev dummy10 + ip -6 address add 2001:db8:10::1/64 dev dummy10 + + devlink_trap_stats_test "IPv4 External Route" "external_route" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 203.0.113.2 -t udp sp=54321,dp=12345 -p 100 -q + + devlink_trap_stats_test "IPv6 External Route" "external_route" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:10::2 -t udp sp=54321,sp=12345 \ + -p 100 -q + + ip -6 address del 2001:db8:10::1/64 dev dummy10 + ip address del 203.0.113.1/24 dev dummy10 + ip link del dev dummy10 +} + +ipv6_uc_dip_link_local_scope_test() +{ + # Add a dummy link-local prefix route to allow the packet to be routed. + ip -6 route add fe80:1::/64 dev $rp2 + + devlink_trap_stats_test \ + "IPv6 Unicast Destination IP With Link-Local Scope" \ + "ipv6_uc_dip_link_local_scope" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A fe80::1 -B fe80:1::2 -t udp sp=54321,sp=12345 \ + -p 100 -q + + ip -6 route del fe80:1::/64 dev $rp2 +} + +ipv4_router_alert_get() +{ + local p + + # https://en.wikipedia.org/wiki/IPv4#Options + p=$(: + )"94:"$( : Option Number + )"04:"$( : Option Length + )"00:00:"$( : Option Data + ) + echo $p +} + +ipv4_router_alert_test() +{ + devlink_trap_stats_test "IPv4 Router Alert" "ipv4_router_alert" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 198.51.100.3 \ + -t ip option=$(ipv4_router_alert_get) -p 100 -q +} + +ipv6_router_alert_get() +{ + local p + + # https://en.wikipedia.org/wiki/IPv6_packet#Hop-by-hop_options_and_destination_options + # https://tools.ietf.org/html/rfc2711#section-2.1 + p=$(: + )"11:"$( : Next Header - UDP + )"00:"$( : Hdr Ext Len + )"05:02:00:00:00:00:"$( : Option Data + ) + echo $p +} + +ipv6_router_alert_test() +{ + devlink_trap_stats_test "IPv6 Router Alert" "ipv6_router_alert" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A 2001:db8:1::1 -B 2001:db8:1::3 \ + -t ip next=0,payload=$(ipv6_router_alert_get) -p 100 -q +} + +ipv6_dip_all_nodes_test() +{ + devlink_trap_stats_test "IPv6 Destination IP \"All Nodes Address\"" \ + "ipv6_dip_all_nodes" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \ + -A 2001:db8:1::1 -B ff02::1 -t udp sp=12345,dp=54321 -p 100 -q +} + +ipv6_dip_all_routers_test() +{ + devlink_trap_stats_test "IPv6 Destination IP \"All Routers Address\"" \ + "ipv6_dip_all_routers" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \ + -A 2001:db8:1::1 -B ff02::2 -t udp sp=12345,dp=54321 -p 100 -q +} + +ipv6_router_solicit_test() +{ + devlink_trap_stats_test "IPv6 Router Solicitation" \ + "ipv6_router_solicit" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \ + -A fe80::1 -B ff02::2 \ + -t ip hop=1,next=58,payload=$(icmpv6_header_get 133) -p 100 -q +} + +ipv6_router_advert_test() +{ + devlink_trap_stats_test "IPv6 Router Advertisement" \ + "ipv6_router_advert" \ + $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \ + -A fe80::1 -B ff02::1 \ + -t ip hop=1,next=58,payload=$(icmpv6_header_get 134) -p 100 -q +} + +ipv6_redirect_test() +{ + devlink_trap_stats_test "IPv6 Redirect Message" \ + "ipv6_redirect" \ + $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \ + -A fe80::1 -B 2001:db8:1::2 \ + -t ip hop=1,next=58,payload=$(icmpv6_header_get 137) -p 100 -q +} + +ptp_event_test() +{ + mlxsw_only_on_spectrum 1 || return + + # PTP Sync (0) + devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \ + $MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \ + -A 192.0.2.1 -B 224.0.1.129 \ + -t udp sp=12345,dp=319,payload=10 -p 100 -q +} + +ptp_general_test() +{ + mlxsw_only_on_spectrum 1 || return + + # PTP Announce (b) + devlink_trap_stats_test "PTP General Message" "ptp_general" \ + $MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \ + -A 192.0.2.1 -B 224.0.1.129 \ + -t udp sp=12345,dp=320,payload=1b -p 100 -q +} + +flow_action_sample_test() +{ + # Install a filter that samples every incoming packet. + tc qdisc add dev $rp1 clsact + tc filter add dev $rp1 ingress proto all pref 1 handle 101 matchall \ + skip_sw action sample rate 1 group 1 + + devlink_trap_stats_test "Flow Sampling" "flow_action_sample" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q + + tc filter del dev $rp1 ingress proto all pref 1 handle 101 matchall + tc qdisc del dev $rp1 clsact +} + +flow_action_trap_test() +{ + # Install a filter that traps a specific flow. + tc qdisc add dev $rp1 clsact + tc filter add dev $rp1 ingress proto ip pref 1 handle 101 flower \ + skip_sw ip_proto udp src_port 12345 dst_port 54321 action trap + + devlink_trap_stats_test "Flow Trapping (Logging)" "flow_action_trap" \ + $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \ + -A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q + + tc filter del dev $rp1 ingress proto ip pref 1 handle 101 flower + tc qdisc del dev $rp1 clsact +} + +eapol_payload_get() +{ + local source_mac=$1; shift + local p + + p=$(: + )"01:80:C2:00:00:03:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"88:8E:"$( : ETH type + ) + echo $p +} + +eapol_test() +{ + local h1mac=$(mac_get $h1) + + devlink_trap_stats_test "EAPOL" "eapol" $MZ $h1 -c 1 \ + $(eapol_payload_get $h1mac) -p 100 -q +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh new file mode 100755 index 000000000000..8d4b2c6265b3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh @@ -0,0 +1,535 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap L2 drops functionality over mlxsw. Each registered L2 drop +# packet trap is tested to make sure it is triggered under the right +# conditions. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + source_mac_is_multicast_test + vlan_tag_mismatch_test + ingress_vlan_filter_test + ingress_stp_filter_test + port_list_is_empty_test + port_loopback_filter_test + locked_port_test +" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 +} + +h2_destroy() +{ + simple_if_fini $h2 +} + +switch_create() +{ + ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +source_mac_is_multicast_test() +{ + local trap_name="source_mac_is_multicast" + local smac=01:02:03:04:05:06 + local mz_pid + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower src_mac $smac action drop + + $MZ $h1 -c 0 -p 100 -a $smac -b bcast -t ip -d 1msec -q & + mz_pid=$! + + RET=0 + + devlink_trap_drop_test $trap_name $swp2 101 + + log_test "Source MAC is multicast" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 +} + +__vlan_tag_mismatch_test() +{ + local trap_name="vlan_tag_mismatch" + local dmac=de:ad:be:ef:13:37 + local opt=$1; shift + local mz_pid + + # Remove PVID flag. This should prevent untagged and prio-tagged + # packets from entering the bridge. + bridge vlan add vid 1 dev $swp1 untagged master + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_mac $dmac action drop + + $MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $swp2 101 + + # Add PVID and make sure packets are no longer dropped. + bridge vlan add vid 1 dev $swp1 pvid untagged master + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle when packets should not be dropped" + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) + check_err $? "Trap group stats not idle with when packets should not be dropped" + + tc_check_packets "dev $swp2 egress" 101 0 + check_fail $? "Packets not forwarded when should" + + devlink_trap_action_set $trap_name "drop" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 +} + +vlan_tag_mismatch_untagged_test() +{ + RET=0 + + __vlan_tag_mismatch_test + + log_test "VLAN tag mismatch - untagged packets" +} + +vlan_tag_mismatch_vid_0_test() +{ + RET=0 + + __vlan_tag_mismatch_test "-Q 0" + + log_test "VLAN tag mismatch - prio-tagged packets" +} + +vlan_tag_mismatch_test() +{ + vlan_tag_mismatch_untagged_test + vlan_tag_mismatch_vid_0_test +} + +ingress_vlan_filter_test() +{ + local trap_name="ingress_vlan_filter" + local dmac=de:ad:be:ef:13:37 + local mz_pid + local vid=10 + + bridge vlan add vid $vid dev $swp2 master + + RET=0 + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_mac $dmac action drop + + $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $swp2 101 + + # Add the VLAN on the bridge port and make sure packets are no longer + # dropped. + bridge vlan add vid $vid dev $swp1 master + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle when packets should not be dropped" + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) + check_err $? "Trap group stats not idle with when packets should not be dropped" + + tc_check_packets "dev $swp2 egress" 101 0 + check_fail $? "Packets not forwarded when should" + + devlink_trap_action_set $trap_name "drop" + + log_test "Ingress VLAN filter" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 + + bridge vlan del vid $vid dev $swp1 master + bridge vlan del vid $vid dev $swp2 master +} + +__ingress_stp_filter_test() +{ + local trap_name="ingress_spanning_tree_filter" + local dmac=de:ad:be:ef:13:37 + local state=$1; shift + local mz_pid + local vid=20 + + bridge vlan add vid $vid dev $swp2 master + bridge vlan add vid $vid dev $swp1 master + ip link set dev $swp1 type bridge_slave state $state + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_mac $dmac action drop + + $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $swp2 101 + + # Change STP state to forwarding and make sure packets are no longer + # dropped. + ip link set dev $swp1 type bridge_slave state 3 + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle when packets should not be dropped" + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) + check_err $? "Trap group stats not idle with when packets should not be dropped" + + tc_check_packets "dev $swp2 egress" 101 0 + check_fail $? "Packets not forwarded when should" + + devlink_trap_action_set $trap_name "drop" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 + + bridge vlan del vid $vid dev $swp1 master + bridge vlan del vid $vid dev $swp2 master +} + +ingress_stp_filter_listening_test() +{ + local state=$1; shift + + RET=0 + + __ingress_stp_filter_test $state + + log_test "Ingress STP filter - listening state" +} + +ingress_stp_filter_learning_test() +{ + local state=$1; shift + + RET=0 + + __ingress_stp_filter_test $state + + log_test "Ingress STP filter - learning state" +} + +ingress_stp_filter_test() +{ + ingress_stp_filter_listening_test 1 + ingress_stp_filter_learning_test 2 +} + +port_list_is_empty_uc_test() +{ + local trap_name="port_list_is_empty" + local dmac=de:ad:be:ef:13:37 + local mz_pid + + # Disable unicast flooding on both ports, so that packets cannot egress + # any port. + ip link set dev $swp1 type bridge_slave flood off + ip link set dev $swp2 type bridge_slave flood off + + RET=0 + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_mac $dmac action drop + + $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $swp2 101 + + # Allow packets to be flooded to one port. + ip link set dev $swp2 type bridge_slave flood on + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle when packets should not be dropped" + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) + check_err $? "Trap group stats not idle with when packets should not be dropped" + + tc_check_packets "dev $swp2 egress" 101 0 + check_fail $? "Packets not forwarded when should" + + devlink_trap_action_set $trap_name "drop" + + log_test "Port list is empty - unicast" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 + + ip link set dev $swp1 type bridge_slave flood on +} + +port_list_is_empty_mc_test() +{ + local trap_name="port_list_is_empty" + local dmac=01:00:5e:00:00:01 + local dip=239.0.0.1 + local mz_pid + + # Disable multicast flooding on both ports, so that packets cannot + # egress any port. We also need to flush IP addresses from the bridge + # in order to prevent packets from being flooded to the router port. + ip link set dev $swp1 type bridge_slave mcast_flood off + ip link set dev $swp2 type bridge_slave mcast_flood off + ip address flush dev br0 + + RET=0 + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_mac $dmac action drop + + $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $swp2 101 + + # Allow packets to be flooded to one port. + ip link set dev $swp2 type bridge_slave mcast_flood on + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle when packets should not be dropped" + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) + check_err $? "Trap group stats not idle with when packets should not be dropped" + + tc_check_packets "dev $swp2 egress" 101 0 + check_fail $? "Packets not forwarded when should" + + devlink_trap_action_set $trap_name "drop" + + log_test "Port list is empty - multicast" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 + + ip link set dev $swp1 type bridge_slave mcast_flood on +} + +port_list_is_empty_test() +{ + port_list_is_empty_uc_test + port_list_is_empty_mc_test +} + +port_loopback_filter_uc_test() +{ + local trap_name="port_loopback_filter" + local dmac=de:ad:be:ef:13:37 + local mz_pid + + # Make sure packets can only egress the input port. + ip link set dev $swp2 type bridge_slave flood off + + RET=0 + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_mac $dmac action drop + + $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $swp2 101 + + # Allow packets to be flooded. + ip link set dev $swp2 type bridge_slave flood on + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle when packets should not be dropped" + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) + check_err $? "Trap group stats not idle with when packets should not be dropped" + + tc_check_packets "dev $swp2 egress" 101 0 + check_fail $? "Packets not forwarded when should" + + devlink_trap_action_set $trap_name "drop" + + log_test "Port loopback filter - unicast" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101 +} + +port_loopback_filter_test() +{ + port_loopback_filter_uc_test +} + +locked_port_miss_test() +{ + local trap_name="locked_port" + local smac=00:11:22:33:44:55 + + bridge link set dev $swp1 learning off + bridge link set dev $swp1 locked on + + RET=0 + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased before setting action to \"trap\"" + + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_err $? "Trap stats did not increase when should" + + devlink_trap_action_set $trap_name "drop" + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after setting action to \"drop\"" + + devlink_trap_action_set $trap_name "trap" + + bridge fdb replace $smac dev $swp1 master static vlan 1 + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after adding an FDB entry" + + bridge fdb del $smac dev $swp1 master static vlan 1 + bridge link set dev $swp1 locked off + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after unlocking port" + + log_test "Locked port - FDB miss" + + devlink_trap_action_set $trap_name "drop" + bridge link set dev $swp1 learning on +} + +locked_port_mismatch_test() +{ + local trap_name="locked_port" + local smac=00:11:22:33:44:55 + + bridge link set dev $swp1 learning off + bridge link set dev $swp1 locked on + + RET=0 + + bridge fdb replace $smac dev $swp2 master static vlan 1 + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased before setting action to \"trap\"" + + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_err $? "Trap stats did not increase when should" + + devlink_trap_action_set $trap_name "drop" + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after setting action to \"drop\"" + + devlink_trap_action_set $trap_name "trap" + bridge link set dev $swp1 locked off + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after unlocking port" + + bridge link set dev $swp1 locked on + bridge fdb replace $smac dev $swp1 master static vlan 1 + + devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \ + -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q + check_fail $? "Trap stats increased after replacing an FDB entry" + + bridge fdb del $smac dev $swp1 master static vlan 1 + devlink_trap_action_set $trap_name "drop" + + log_test "Locked port - FDB mismatch" + + bridge link set dev $swp1 locked off + bridge link set dev $swp1 learning on +} + +locked_port_test() +{ + locked_port_miss_test + locked_port_mismatch_test +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh new file mode 100755 index 000000000000..db5806d189bb --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -0,0 +1,696 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap L3 drops functionality over mlxsw. Each registered L3 drop +# packet trap is tested to make sure it is triggered under the right +# conditions. + +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | + $rp1 | +# | 192.0.2.2/24 | +# | 2001:db8:1::2/64 | +# | | +# | 2001:db8:2::2/64 | +# | 198.51.100.2/24 | +# | + $rp2 | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + non_ip_test + uc_dip_over_mc_dmac_test + dip_is_loopback_test + sip_is_mc_test + sip_is_loopback_test + ip_header_corrupted_test + ipv4_sip_is_limited_bc_test + ipv6_mc_dip_reserved_scope_test + ipv6_mc_dip_interface_local_scope_test + blackhole_route_test + irif_disabled_test + erif_disabled_test + blackhole_nexthop_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 $h2_ipv4/24 $h2_ipv6/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 $h2_ipv4/24 $h2_ipv6/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + tc qdisc add dev $rp2 clsact + + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64 +} + +router_destroy() +{ + __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $rp2 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + rp1mac=$(mac_get $rp1) + + h1_ipv4=192.0.2.1 + h2_ipv4=198.51.100.1 + h1_ipv6=2001:db8:1::1 + h2_ipv6=2001:db8:2::1 + + vrf_prepare + forwarding_enable + + h1_create + h2_create + + router_create +} + +cleanup() +{ + pre_cleanup + + router_destroy + + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_check() +{ + trap_name=$1; shift + + devlink_trap_action_set $trap_name "trap" + ping_do $h1 $h2_ipv4 + check_err $? "Packets that should not be trapped were trapped" + devlink_trap_action_set $trap_name "drop" +} + +non_ip_test() +{ + local trap_name="non_ip" + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \ + flower dst_ip $h2_ipv4 action drop + + # Generate non-IP packets to the router + $MZ $h1 -c 0 -p 100 -d 1msec -B $h2_ipv4 -q "$rp1mac $h1mac \ + 00:00 de:ad:be:ef" & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + + log_test "Non IP" + + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101 +} + +__uc_dip_over_mc_dmac_test() +{ + local desc=$1; shift + local proto=$1; shift + local dip=$1; shift + local flags=${1:-""}; shift + local trap_name="uc_dip_over_mc_dmac" + local dmac=01:02:03:04:05:06 + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower ip_proto udp src_port 54321 dst_port 12345 action drop + + # Generate IP packets with a unicast IP and a multicast destination MAC + $MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $dmac \ + -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + + log_test "Unicast destination IP over multicast destination MAC: $desc" + + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 +} + +uc_dip_over_mc_dmac_test() +{ + __uc_dip_over_mc_dmac_test "IPv4" "ip" $h2_ipv4 + __uc_dip_over_mc_dmac_test "IPv6" "ipv6" $h2_ipv6 "-6" +} + +__sip_is_loopback_test() +{ + local desc=$1; shift + local proto=$1; shift + local sip=$1; shift + local dip=$1; shift + local flags=${1:-""}; shift + local trap_name="sip_is_loopback_address" + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower src_ip $sip action drop + + # Generate packets with loopback source IP + $MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip \ + -b $rp1mac -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + + log_test "Source IP is loopback address: $desc" + + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 +} + +sip_is_loopback_test() +{ + __sip_is_loopback_test "IPv4" "ip" "127.0.0.0/8" $h2_ipv4 + __sip_is_loopback_test "IPv6" "ipv6" "::1" $h2_ipv6 "-6" +} + +__dip_is_loopback_test() +{ + local desc=$1; shift + local proto=$1; shift + local dip=$1; shift + local flags=${1:-""}; shift + local trap_name="dip_is_loopback_address" + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower dst_ip $dip action drop + + # Generate packets with loopback destination IP + $MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \ + -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + + log_test "Destination IP is loopback address: $desc" + + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 +} + +dip_is_loopback_test() +{ + __dip_is_loopback_test "IPv4" "ip" "127.0.0.0/8" + __dip_is_loopback_test "IPv6" "ipv6" "::1" "-6" +} + +__sip_is_mc_test() +{ + local desc=$1; shift + local proto=$1; shift + local sip=$1; shift + local dip=$1; shift + local flags=${1:-""}; shift + local trap_name="sip_is_mc" + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower src_ip $sip action drop + + # Generate packets with multicast source IP + $MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip \ + -b $rp1mac -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + + log_test "Source IP is multicast: $desc" + + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 +} + +sip_is_mc_test() +{ + __sip_is_mc_test "IPv4" "ip" "239.1.1.1" $h2_ipv4 + __sip_is_mc_test "IPv6" "ipv6" "FF02::2" $h2_ipv6 "-6" +} + +ipv4_sip_is_limited_bc_test() +{ + local trap_name="ipv4_sip_is_limited_bc" + local sip=255.255.255.255 + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \ + flower src_ip $sip action drop + + # Generate packets with limited broadcast source IP + $MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip -b $rp1mac \ + -B $h2_ipv4 -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + + log_test "IPv4 source IP is limited broadcast" + + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101 +} + +ipv4_payload_get() +{ + local ipver=$1; shift + local ihl=$1; shift + local checksum=$1; shift + + p=$(: + )"08:00:"$( : ETH type + )"$ipver"$( : IP version + )"$ihl:"$( : IHL + )"00:"$( : IP TOS + )"00:F4:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"30:"$( : IP TTL + )"01:"$( : IP proto + )"$checksum:"$( : IP header csum + )"$h1_ipv4:"$( : IP saddr + )"$h2_ipv4:"$( : IP daddr + ) + echo $p +} + +__ipv4_header_corrupted_test() +{ + local desc=$1; shift + local ipver=$1; shift + local ihl=$1; shift + local checksum=$1; shift + local trap_name="ip_header_corrupted" + local payload + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \ + flower dst_ip $h2_ipv4 action drop + + payload=$(ipv4_payload_get $ipver $ihl $checksum) + + # Generate packets with corrupted IP header + $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + + log_test "IP header corrupted: $desc: IPv4" + + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101 +} + +ipv6_payload_get() +{ + local ipver=$1; shift + + p=$(: + )"86:DD:"$( : ETH type + )"$ipver"$( : IP version + )"0:0:"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:00:"$( : Payload length + )"01:"$( : Next header + )"04:"$( : Hop limit + )"$h1_ipv6:"$( : IP saddr + )"$h2_ipv6:"$( : IP daddr + ) + echo $p +} + +__ipv6_header_corrupted_test() +{ + local desc=$1; shift + local ipver=$1; shift + local trap_name="ip_header_corrupted" + local payload + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \ + flower dst_ip $h2_ipv4 action drop + + payload=$(ipv6_payload_get $ipver) + + # Generate packets with corrupted IP header + $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + + log_test "IP header corrupted: $desc: IPv6" + + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101 +} + +ip_header_corrupted_test() +{ + # Each test uses one wrong value. The three values below are correct. + local ipv="4" + local ihl="5" + local checksum="00:F4" + + __ipv4_header_corrupted_test "wrong IP version" 5 $ihl $checksum + __ipv4_header_corrupted_test "wrong IHL" $ipv 4 $checksum + __ipv4_header_corrupted_test "wrong checksum" $ipv $ihl "00:00" + __ipv6_header_corrupted_test "wrong IP version" 5 +} + +ipv6_mc_dip_reserved_scope_test() +{ + local trap_name="ipv6_mc_dip_reserved_scope" + local dip=FF00:: + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \ + flower dst_ip $dip action drop + + # Generate packets with reserved scope destination IP + $MZ $h1 -6 -t udp "sp=54321,dp=12345" -c 0 -p 100 -b \ + "33:33:00:00:00:00" -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + + log_test "IPv6 multicast destination IP reserved scope" + + devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101 +} + +ipv6_mc_dip_interface_local_scope_test() +{ + local trap_name="ipv6_mc_dip_interface_local_scope" + local dip=FF01:: + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \ + flower dst_ip $dip action drop + + # Generate packets with interface local scope destination IP + $MZ $h1 -6 -t udp "sp=54321,dp=12345" -c 0 -p 100 -b \ + "33:33:00:00:00:00" -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + + log_test "IPv6 multicast destination IP interface-local scope" + + devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101 +} + +__blackhole_route_test() +{ + local flags=$1; shift + local subnet=$1; shift + local proto=$1; shift + local dip=$1; shift + local ip_proto=${1:-"icmp"}; shift + local trap_name="blackhole_route" + local mz_pid + + RET=0 + + ping_check $trap_name + + ip -$flags route add blackhole $subnet + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower skip_hw dst_ip $dip ip_proto $ip_proto action drop + + # Generate packets to the blackhole route + $MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \ + -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + log_test "Blackhole route: IPv$flags" + + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 + ip -$flags route del blackhole $subnet +} + +blackhole_route_test() +{ + __blackhole_route_test "4" "198.51.100.0/30" "ip" $h2_ipv4 + __blackhole_route_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6 "icmpv6" +} + +irif_disabled_test() +{ + local trap_name="irif_disabled" + local t0_packets t0_bytes + local t1_packets t1_bytes + local mz_pid + + RET=0 + + ping_check $trap_name + + devlink_trap_action_set $trap_name "trap" + + # When RIF of a physical port ("Sub-port RIF") is destroyed, we first + # block the STP of the {Port, VLAN} so packets cannot get into the RIF. + # Using bridge enables us to see this trap because when bridge is + # destroyed, there is a small time window that packets can go into the + # RIF, while it is disabled. + ip link add dev br0 type bridge + ip link set dev $rp1 master br0 + ip address flush dev $rp1 + __addr_add_del br0 add 192.0.2.2/24 + ip li set dev br0 up + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + t0_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + # Generate packets to h2 through br0 RIF that will be removed later + $MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -p 100 -a own -b $rp1mac \ + -B $h2_ipv4 -q & + mz_pid=$! + + # Wait before removing br0 RIF to allow packets to go into the bridge. + sleep 1 + + # Flushing address will dismantle the RIF + ip address flush dev br0 + + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + t1_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then + check_err 1 "Trap stats idle when packets should be trapped" + fi + + log_test "Ingress RIF disabled" + + kill_process $mz_pid + ip link set dev $rp1 nomaster + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + ip link del dev br0 type bridge + devlink_trap_action_set $trap_name "drop" +} + +erif_disabled_test() +{ + local trap_name="erif_disabled" + local t0_packets t0_bytes + local t1_packets t1_bytes + local mz_pid + + RET=0 + + ping_check $trap_name + + devlink_trap_action_set $trap_name "trap" + ip link add dev br0 type bridge + ip add flush dev $rp1 + ip link set dev $rp1 master br0 + __addr_add_del br0 add 192.0.2.2/24 + ip link set dev br0 up + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + t0_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + rp2mac=$(mac_get $rp2) + + # Generate packets that should go out through br0 RIF that will be + # removed later + $MZ $h2 -t udp "sp=54321,dp=12345" -c 0 -p 100 -a own -b $rp2mac \ + -B 192.0.2.1 -q & + mz_pid=$! + + sleep 5 + # Unlinking the port from the bridge will disable the RIF associated + # with br0 as it is no longer an upper of any mlxsw port. + ip link set dev $rp1 nomaster + + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + t1_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then + check_err 1 "Trap stats idle when packets should be trapped" + fi + + log_test "Egress RIF disabled" + + kill_process $mz_pid + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + ip link del dev br0 type bridge + devlink_trap_action_set $trap_name "drop" +} + +__blackhole_nexthop_test() +{ + local flags=$1; shift + local subnet=$1; shift + local proto=$1; shift + local dip=$1; shift + local trap_name="blackhole_nexthop" + local mz_pid + + RET=0 + + ip -$flags nexthop add id 1 blackhole + ip -$flags route add $subnet nhid 1 + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower skip_hw dst_ip $dip ip_proto udp action drop + + # Generate packets to the blackhole nexthop + $MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \ + -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $rp2 101 + log_test "Blackhole nexthop: IPv$flags" + + devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 + ip -$flags route del $subnet + ip -$flags nexthop del id 1 +} + +blackhole_nexthop_test() +{ + __blackhole_nexthop_test "4" "198.51.100.0/30" "ip" $h2_ipv4 + __blackhole_nexthop_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh new file mode 100755 index 000000000000..5d6d88b600f0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh @@ -0,0 +1,583 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap L3 exceptions functionality over mlxsw. +# Check all exception traps to make sure they are triggered under the right +# conditions. + +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | + $rp1 | +# | 192.0.2.2/24 | +# | 2001:db8:1::2/64 | +# | | +# | 2001:db8:2::2/64 | +# | 198.51.100.2/24 | +# | + $rp2 | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + mtu_value_is_too_small_test + ttl_value_is_too_small_test + mc_reverse_path_forwarding_test + reject_route_test + unresolved_neigh_test + ipv4_lpm_miss_test + ipv6_lpm_miss_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +require_command $MCD +require_command $MC_CLI +table_name=selftests + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 + + tc qdisc add dev $h1 clsact +} + +h1_destroy() +{ + tc qdisc del dev $h1 clsact + + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + tc qdisc add dev $rp2 clsact + + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64 +} + +router_destroy() +{ + __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $rp2 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1mac=$(mac_get $rp1) + + start_mcd + + vrf_prepare + forwarding_enable + + h1_create + h2_create + + router_create +} + +cleanup() +{ + pre_cleanup + + router_destroy + + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup + + kill_mcd +} + +ping_check() +{ + ping_do $h1 198.51.100.1 + check_err $? "Packets that should not be trapped were trapped" +} + +trap_action_check() +{ + local trap_name=$1; shift + local expected_action=$1; shift + + action=$(devlink_trap_action_get $trap_name) + if [ "$action" != $expected_action ]; then + check_err 1 "Trap $trap_name has wrong action: $action" + fi +} + +mtu_value_is_too_small_test() +{ + local trap_name="mtu_value_is_too_small" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + # type - Destination Unreachable + # code - Fragmentation Needed and Don't Fragment was Set + tc filter add dev $h1 ingress protocol ip pref 1 handle 101 \ + flower skip_hw ip_proto icmp type 3 code 4 action pass + + mtu_set $rp2 1300 + + # Generate IP packets bigger than router's MTU with don't fragment + # flag on. + $MZ $h1 -t udp "sp=54321,dp=12345,df" -p 1400 -c 0 -d 1msec -b $rp1mac \ + -B 198.51.100.1 -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets_hitting "dev $h1 ingress" 101 + check_err $? "Packets were not received to h1" + + log_test "MTU value is too small" + + mtu_restore $rp2 + + kill_process $mz_pid + tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower +} + +__ttl_value_is_too_small_test() +{ + local ttl_val=$1; shift + local trap_name="ttl_value_is_too_small" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + # type - Time Exceeded + # code - Time to Live exceeded in Transit + tc filter add dev $h1 ingress protocol ip pref 1 handle 101 \ + flower skip_hw ip_proto icmp type 11 code 0 action pass + + # Generate IP packets with small TTL + $MZ $h1 -t udp "ttl=$ttl_val,sp=54321,dp=12345" -c 0 -d 1msec \ + -b $rp1mac -B 198.51.100.1 -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets_hitting "dev $h1 ingress" 101 + check_err $? "Packets were not received to h1" + + log_test "TTL value is too small: TTL=$ttl_val" + + kill_process $mz_pid + tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower +} + +ttl_value_is_too_small_test() +{ + __ttl_value_is_too_small_test 0 + __ttl_value_is_too_small_test 1 +} + +start_mcd() +{ + SMCROUTEDIR="$(mktemp -d)" + for ((i = 1; i <= $NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + $SMCROUTEDIR/$table_name.conf + done + + $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ + -P $SMCROUTEDIR/$table_name.pid +} + +kill_mcd() +{ + pkill $MCD + rm -rf $SMCROUTEDIR +} + +__mc_reverse_path_forwarding_test() +{ + local desc=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dst_mac=$1; shift + local proto=$1; shift + local flags=${1:-""}; shift + local trap_name="mc_reverse_path_forwarding" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower dst_ip $dst_ip ip_proto udp action drop + + $MC_CLI -I $table_name add $rp1 $src_ip $dst_ip $rp2 + + # Generate packets to multicast address. + $MZ $h2 $flags -t udp "sp=54321,dp=12345" -c 0 -p 128 \ + -a 00:11:22:33:44:55 -b $dst_mac \ + -A $src_ip -B $dst_ip -q & + + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $rp2 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "Multicast reverse path forwarding: $desc" + + kill_process $mz_pid + tc filter del dev $rp2 egress protocol $proto pref 1 handle 101 flower +} + +mc_reverse_path_forwarding_test() +{ + __mc_reverse_path_forwarding_test "IPv4" "192.0.2.1" "225.1.2.3" \ + "01:00:5e:01:02:03" "ip" + __mc_reverse_path_forwarding_test "IPv6" "2001:db8:1::1" "ff0e::3" \ + "33:33:00:00:00:03" "ipv6" "-6" +} + +__reject_route_test() +{ + local desc=$1; shift + local dst_ip=$1; shift + local proto=$1; shift + local ip_proto=$1; shift + local type=$1; shift + local code=$1; shift + local unreachable=$1; shift + local flags=${1:-""}; shift + local trap_name="reject_route" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + tc filter add dev $h1 ingress protocol $proto pref 1 handle 101 flower \ + skip_hw ip_proto $ip_proto type $type code $code action pass + + ip route add unreachable $unreachable + + # Generate pacekts to h2. The destination IP is unreachable. + $MZ $flags $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \ + -B $dst_ip -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets_hitting "dev $h1 ingress" 101 + check_err $? "ICMP packet was not received to h1" + + log_test "Reject route: $desc" + + kill_process $mz_pid + ip route del unreachable $unreachable + tc filter del dev $h1 ingress protocol $proto pref 1 handle 101 flower +} + +reject_route_test() +{ + # type - Destination Unreachable + # code - Host Unreachable + __reject_route_test "IPv4" 198.51.100.1 "ip" "icmp" 3 1 \ + "198.51.100.0/26" + # type - Destination Unreachable + # code - No Route + __reject_route_test "IPv6" 2001:db8:2::1 "ipv6" "icmpv6" 1 0 \ + "2001:db8:2::0/66" "-6" +} + +__host_miss_test() +{ + local desc=$1; shift + local dip=$1; shift + local trap_name="unresolved_neigh" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + ip neigh flush dev $rp2 + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + + # Generate packets to h2 (will incur a unresolved neighbor). + # The ping should pass and devlink counters should be increased. + ping_do $h1 $dip + check_err $? "ping failed: $desc" + + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + + if [[ $t0_packets -eq $t1_packets ]]; then + check_err 1 "Trap counter did not increase" + fi + + log_test "Unresolved neigh: host miss: $desc" +} + +__invalid_nexthop_test() +{ + local desc=$1; shift + local dip=$1; shift + local extra_add=$1; shift + local subnet=$1; shift + local via_add=$1; shift + local trap_name="unresolved_neigh" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + ip address add $extra_add/$subnet dev $h2 + + # Check that correct route does not trigger unresolved_neigh + ip $flags route add $dip via $extra_add dev $rp2 + + # Generate packets in order to discover all neighbours. + # Without it, counters of unresolved_neigh will be increased + # during neighbours discovery and the check below will fail + # for a wrong reason + ping_do $h1 $dip + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + ping_do $h1 $dip + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + + if [[ $t0_packets -ne $t1_packets ]]; then + check_err 1 "Trap counter increased when it should not" + fi + + ip $flags route del $dip via $extra_add dev $rp2 + + # Check that route to nexthop that does not exist trigger + # unresolved_neigh + ip $flags route add $dip via $via_add dev $h2 + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + ping_do $h1 $dip + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + + if [[ $t0_packets -eq $t1_packets ]]; then + check_err 1 "Trap counter did not increase" + fi + + ip $flags route del $dip via $via_add dev $h2 + ip address del $extra_add/$subnet dev $h2 + log_test "Unresolved neigh: nexthop does not exist: $desc" +} + +__invalid_nexthop_bucket_test() +{ + local desc=$1; shift + local dip=$1; shift + local via_add=$1; shift + local trap_name="unresolved_neigh" + + RET=0 + + # Check that route to nexthop that does not exist triggers + # unresolved_neigh + ip nexthop add id 1 via $via_add dev $rp2 + ip nexthop add id 10 group 1 type resilient buckets 32 + ip route add $dip nhid 10 + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + ping_do $h1 $dip + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + + if [[ $t0_packets -eq $t1_packets ]]; then + check_err 1 "Trap counter did not increase" + fi + + ip route del $dip nhid 10 + ip nexthop del id 10 + ip nexthop del id 1 + log_test "Unresolved neigh: nexthop bucket does not exist: $desc" +} + +unresolved_neigh_test() +{ + __host_miss_test "IPv4" 198.51.100.1 + __host_miss_test "IPv6" 2001:db8:2::1 + __invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4 + __invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \ + 2001:db8:2::4 + __invalid_nexthop_bucket_test "IPv4" 198.51.100.1 198.51.100.4 + __invalid_nexthop_bucket_test "IPv6" 2001:db8:2::1 2001:db8:2::4 +} + +vrf_without_routes_create() +{ + # VRF creating makes the links to be down and then up again. + # By default, IPv6 address is not saved after link becomes down. + # Save IPv6 address using sysctl configuration. + sysctl_set net.ipv6.conf.$rp1.keep_addr_on_down 1 + sysctl_set net.ipv6.conf.$rp2.keep_addr_on_down 1 + + ip link add dev vrf1 type vrf table 101 + ip link set dev $rp1 master vrf1 + ip link set dev $rp2 master vrf1 + ip link set dev vrf1 up + + # Wait for rp1 and rp2 to be up + setup_wait +} + +vrf_without_routes_destroy() +{ + ip link set dev $rp1 nomaster + ip link set dev $rp2 nomaster + ip link del dev vrf1 + + sysctl_restore net.ipv6.conf.$rp2.keep_addr_on_down + sysctl_restore net.ipv6.conf.$rp1.keep_addr_on_down + + # Wait for interfaces to be up + setup_wait +} + +ipv4_lpm_miss_test() +{ + local trap_name="ipv4_lpm_miss" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + # Create a VRF without a default route + vrf_without_routes_create + + # Generate packets through a VRF without a matching route. + $MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \ + -B 203.0.113.1 -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + log_test "LPM miss: IPv4" + + kill_process $mz_pid + vrf_without_routes_destroy +} + +ipv6_lpm_miss_test() +{ + local trap_name="ipv6_lpm_miss" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + # Create a VRF without a default route + vrf_without_routes_create + + # Generate packets through a VRF without a matching route. + $MZ -6 $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \ + -B 2001:db8::1 -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + log_test "LPM miss: IPv6" + + kill_process $mz_pid + vrf_without_routes_destroy +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh new file mode 100755 index 000000000000..e212ad8ccef6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -0,0 +1,353 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap policer functionality over mlxsw. + +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | | +# | | default via 192.0.2.2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | + $rp1 | +# | 192.0.2.2/24 | +# | | +# | 198.51.100.2/24 | +# | + $rp2 | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + rate_limits_test + burst_limits_test + rate_test + burst_test +" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + adf_simple_if_init $h1 192.0.2.1/24 + + mtu_set $h1 10000 + defer mtu_restore $h1 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + defer ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 +} + +h2_create() +{ + adf_simple_if_init $h2 198.51.100.1/24 + + mtu_set $h2 10000 + defer mtu_restore $h2 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + defer ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 +} + +router_create() +{ + ip link set dev $rp1 up + defer ip link set dev $rp1 down + + ip link set dev $rp2 up + defer ip link set dev $rp2 down + + __addr_add_del $rp1 add 192.0.2.2/24 + defer __addr_add_del $rp1 del 192.0.2.2/24 + + __addr_add_del $rp2 add 198.51.100.2/24 + defer __addr_add_del $rp2 del 198.51.100.2/24 + + mtu_set $rp1 10000 + defer mtu_restore $rp1 + + mtu_set $rp2 10000 + defer mtu_restore $rp2 + + ip -4 route add blackhole 198.51.100.100 + defer ip -4 route del blackhole 198.51.100.100 + + devlink trap set $DEVLINK_DEV trap blackhole_route action trap + defer devlink trap set $DEVLINK_DEV trap blackhole_route action drop +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1_mac=$(mac_get $rp1) + + # Reload to ensure devlink-trap settings are back to default. + defer devlink_reload + + adf_vrf_prepare + + h1_create + h2_create + + router_create +} + +rate_limits_test() +{ + RET=0 + + devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null + check_fail $? "Policer rate was changed to rate lower than limit" + devlink trap policer set $DEVLINK_DEV policer 1 \ + rate 2000000001 &> /dev/null + check_fail $? "Policer rate was changed to rate higher than limit" + + devlink trap policer set $DEVLINK_DEV policer 1 rate 1 + check_err $? "Failed to set policer rate to minimum" + devlink trap policer set $DEVLINK_DEV policer 1 rate 2000000000 + check_err $? "Failed to set policer rate to maximum" + + log_test "Trap policer rate limits" +} + +burst_limits_test() +{ + RET=0 + + devlink trap policer set $DEVLINK_DEV policer 1 burst 0 &> /dev/null + check_fail $? "Policer burst size was changed to 0" + devlink trap policer set $DEVLINK_DEV policer 1 burst 17 &> /dev/null + check_fail $? "Policer burst size was changed to burst size that is not power of 2" + devlink trap policer set $DEVLINK_DEV policer 1 burst 8 &> /dev/null + check_fail $? "Policer burst size was changed to burst size lower than limit" + devlink trap policer set $DEVLINK_DEV policer 1 \ + burst $((2**25)) &> /dev/null + check_fail $? "Policer burst size was changed to burst size higher than limit" + + devlink trap policer set $DEVLINK_DEV policer 1 burst 16 + check_err $? "Failed to set policer burst size to minimum" + devlink trap policer set $DEVLINK_DEV policer 1 burst $((2**24)) + check_err $? "Failed to set policer burst size to maximum" + + log_test "Trap policer burst size limits" +} + +trap_rate_get() +{ + local t0 t1 + + t0=$(devlink_trap_rx_packets_get blackhole_route) + sleep 10 + t1=$(devlink_trap_rx_packets_get blackhole_route) + + echo $(((t1 - t0) / 10)) +} + +policer_drop_rate_get() +{ + local id=$1; shift + local t0 t1 + + t0=$(devlink_trap_policer_rx_dropped_get $id) + sleep 10 + t1=$(devlink_trap_policer_rx_dropped_get $id) + + echo $(((t1 - t0) / 10)) +} + +__rate_test() +{ + local rate pct drop_rate + local id=$1; shift + + RET=0 + + devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512 + devlink trap group set $DEVLINK_DEV group l3_drops policer $id + + # Send packets at highest possible rate and make sure they are dropped + # by the policer. Make sure measured received rate is about 1000 pps + log_info "=== Tx rate: Highest, Policer rate: 1000 pps ===" + + defer_scope_push + + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac + defer stop_traffic $! + + sleep 5 # Take measurements when rate is stable + + rate=$(trap_rate_get) + pct=$((100 * (rate - 1000) / 1000)) + ((-10 <= pct && pct <= 10)) + check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-10%" + log_info "Expected rate 1000 pps, measured rate $rate pps" + + drop_rate=$(policer_drop_rate_get $id) + (( drop_rate > 0 )) + check_err $? "Expected non-zero policer drop rate, got 0" + log_info "Measured policer drop rate of $drop_rate pps" + + defer_scope_pop + + # Send packets at a rate of 1000 pps and make sure they are not dropped + # by the policer + log_info "=== Tx rate: 1000 pps, Policer rate: 1000 pps ===" + + defer_scope_push + + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -d 1msec + defer stop_traffic $! + + sleep 5 # Take measurements when rate is stable + + drop_rate=$(policer_drop_rate_get $id) + (( drop_rate == 0 )) + check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps" + log_info "Measured policer drop rate of $drop_rate pps" + + defer_scope_pop + + # Unbind the policer and send packets at highest possible rate. Make + # sure they are not dropped by the policer and that the measured + # received rate is higher than 1000 pps + log_info "=== Tx rate: Highest, Policer rate: No policer ===" + + devlink trap group set $DEVLINK_DEV group l3_drops nopolicer + + defer_scope_push + + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac + defer stop_traffic $! + + rate=$(trap_rate_get) + (( rate > 1000 )) + check_err $? "Expected rate higher than 1000 pps, got $rate pps" + log_info "Measured rate $rate pps" + + drop_rate=$(policer_drop_rate_get $id) + (( drop_rate == 0 )) + check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps" + log_info "Measured policer drop rate of $drop_rate pps" + + defer_scope_pop + + log_test "Trap policer rate" +} + +rate_test() +{ + local last_policer=$(devlink -j -p trap policer show | + jq '[.[]["'$DEVLINK_DEV'"][].policer] | max') + + log_info "Running rate test for policer 1" + __rate_test 1 + + log_info "Running rate test for policer $((last_policer / 2))" + __rate_test $((last_policer / 2)) + + log_info "Running rate test for policer $last_policer" + __rate_test $last_policer +} + +__burst_test() +{ + local t0_rx t0_drop t1_rx t1_drop rx drop + local id=$1; shift + + RET=0 + + devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512 + devlink trap group set $DEVLINK_DEV group l3_drops policer $id + + # Send a burst of 16 packets and make sure that 16 are received + # and that none are dropped by the policer + log_info "=== Tx burst size: 16, Policer burst size: 512 ===" + + t0_rx=$(devlink_trap_rx_packets_get blackhole_route) + t0_drop=$(devlink_trap_policer_rx_dropped_get $id) + + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 16 + + t1_rx=$(devlink_trap_rx_packets_get blackhole_route) + t1_drop=$(devlink_trap_policer_rx_dropped_get $id) + + rx=$((t1_rx - t0_rx)) + (( rx == 16 )) + check_err $? "Expected burst size of 16 packets, got $rx packets" + log_info "Expected burst size of 16 packets, measured burst size of $rx packets" + + drop=$((t1_drop - t0_drop)) + (( drop == 0 )) + check_err $? "Expected zero policer drops, got $drop" + log_info "Measured policer drops of $drop packets" + + # Unbind the policer and send a burst of 64 packets. Make sure that + # 64 packets are received and that none are dropped by the policer + log_info "=== Tx burst size: 64, Policer burst size: No policer ===" + + devlink trap group set $DEVLINK_DEV group l3_drops nopolicer + + t0_rx=$(devlink_trap_rx_packets_get blackhole_route) + t0_drop=$(devlink_trap_policer_rx_dropped_get $id) + + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64 + + t1_rx=$(devlink_trap_rx_packets_get blackhole_route) + t1_drop=$(devlink_trap_policer_rx_dropped_get $id) + + rx=$((t1_rx - t0_rx)) + (( rx == 64 )) + check_err $? "Expected burst size of 64 packets, got $rx packets" + log_info "Expected burst size of 64 packets, measured burst size of $rx packets" + + drop=$((t1_drop - t0_drop)) + (( drop == 0 )) + check_err $? "Expected zero policer drops, got $drop" + log_info "Measured policer drops of $drop packets" + + log_test "Trap policer burst size" +} + +burst_test() +{ + local last_policer=$(devlink -j -p trap policer show | + jq '[.[]["'$DEVLINK_DEV'"][].policer] | max') + + log_info "Running burst test for policer 1" + __burst_test 1 + + log_info "Running burst test for policer $((last_policer / 2))" + __burst_test $((last_policer / 2)) + + log_info "Running burst test for policer $last_policer" + __burst_test $last_policer +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh new file mode 100755 index 000000000000..4ac1dae92d0f --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap tunnel exceptions functionality over mlxsw. +# Check all exception traps to make sure they are triggered under the right +# conditions. + +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# +-------------------|-----+ +# | +# +-------------------|-----+ +# | SW1 | | +# | $swp1 + | +# | 192.0.2.2/28 | +# | | +# | + g1a (gre) | +# | loc=192.0.2.65 | +# | rem=192.0.2.66 | +# | tos=inherit | +# | | +# | + $rp1 | +# | | 198.51.100.1/28 | +# +--|----------------------+ +# | +# +--|----------------------+ +# | | VRF2 | +# | + $rp2 | +# | 198.51.100.2/28 | +# +-------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + decap_error_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +vrf2_create() +{ + simple_if_init $rp2 198.51.100.2/28 +} + +vrf2_destroy() +{ + simple_if_fini $rp2 198.51.100.2/28 +} + +switch_create() +{ + __addr_add_del $swp1 add 192.0.2.2/28 + tc qdisc add dev $swp1 clsact + ip link set dev $swp1 up + + tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit + __addr_add_del g1 add 192.0.2.65/32 + ip link set dev g1 up + + __addr_add_del $rp1 add 198.51.100.1/28 + ip link set dev $rp1 up +} + +switch_destroy() +{ + ip link set dev $rp1 down + __addr_add_del $rp1 del 198.51.100.1/28 + + ip link set dev g1 down + __addr_add_del g1 del 192.0.2.65/32 + tunnel_destroy g1 + + ip link set dev $swp1 down + tc qdisc del dev $swp1 clsact + __addr_add_del $swp1 del 192.0.2.2/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + forwarding_enable + vrf_prepare + h1_create + switch_create + vrf2_create +} + +cleanup() +{ + pre_cleanup + + vrf2_destroy + switch_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +ipip_payload_get() +{ + local flags=$1; shift + local key=$1; shift + + p=$(: + )"$flags"$( : GRE flags + )"0:00:"$( : Reserved + version + )"08:00:"$( : ETH protocol type + )"$key"$( : Key + )"4"$( : IP version + )"5:"$( : IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"30:"$( : IP TTL + )"01:"$( : IP proto + )"E7:E6:"$( : IP header csum + )"C0:00:01:01:"$( : IP saddr : 192.0.1.1 + )"C0:00:02:01:"$( : IP daddr : 192.0.2.1 + ) + echo $p +} + +ecn_payload_get() +{ + echo $(ipip_payload_get "0") +} + +ecn_decap_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local ecn_desc=$1; shift + local outer_tos=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_ip 192.0.1.1 dst_ip 192.0.2.1 action pass + + rp1_mac=$(mac_get $rp1) + rp2_mac=$(mac_get $rp2) + payload=$(ecn_payload_get) + + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \ + -A 192.0.2.66 -B 192.0.2.65 -t ip \ + len=48,tos=$outer_tos,proto=47,p=$payload -q & + + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" + + kill_process $mz_pid + tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower +} + +no_matching_tunnel_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local sip=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_ip 192.0.1.1 dst_ip 192.0.2.1 action pass + + rp1_mac=$(mac_get $rp1) + rp2_mac=$(mac_get $rp2) + payload=$(ipip_payload_get "$@") + + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \ + -A $sip -B 192.0.2.65 -t ip len=48,proto=47,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc" + + kill_process $mz_pid + tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower +} + +decap_error_test() +{ + # Correct source IP - the remote address + local sip=192.0.2.66 + + ecn_decap_test "Decap error" "ECT(1)" 01 + ecn_decap_test "Decap error" "ECT(0)" 02 + ecn_decap_test "Decap error" "CE" 03 + + no_matching_tunnel_test "Decap error: Source IP check failed" \ + 192.0.2.68 "0" + no_matching_tunnel_test \ + "Decap error: Key exists but was not expected" $sip "2" \ + "00:00:00:E9:" + + # Destroy the tunnel and create new one with key + __addr_add_del g1 del 192.0.2.65/32 + tunnel_destroy g1 + + tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit key 233 + __addr_add_del g1 add 192.0.2.65/32 + + no_matching_tunnel_test \ + "Decap error: Key does not exist but was expected" $sip "0" + no_matching_tunnel_test \ + "Decap error: Packet has a wrong key field" $sip "2" \ + "00:00:00:E8:" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh new file mode 100755 index 000000000000..fce885184404 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh @@ -0,0 +1,250 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap tunnel exceptions functionality over mlxsw. +# Check all exception traps to make sure they are triggered under the right +# conditions. + +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 2001:db8:1::1/64 | | +# +-------------------|-----+ +# | +# +-------------------|-----+ +# | SW1 | | +# | $swp1 + | +# | 2001:db8:1::2/64 | +# | | +# | + g1 (ip6gre) | +# | loc=2001:db8:3::1 | +# | rem=2001:db8:3::2 | +# | tos=inherit | +# | | +# | + $rp1 | +# | | 2001:db8:10::1/64 | +# +--|----------------------+ +# | +# +--|----------------------+ +# | | VRF2 | +# | + $rp2 | +# | 2001:db8:10::2/64 | +# +-------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + decap_error_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +vrf2_create() +{ + simple_if_init $rp2 2001:db8:10::2/64 +} + +vrf2_destroy() +{ + simple_if_fini $rp2 2001:db8:10::2/64 +} + +switch_create() +{ + ip link set dev $swp1 up + __addr_add_del $swp1 add 2001:db8:1::2/64 + tc qdisc add dev $swp1 clsact + + tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + ttl inherit + ip link set dev g1 up + __addr_add_del g1 add 2001:db8:3::1/128 + + ip link set dev $rp1 up + __addr_add_del $rp1 add 2001:db8:10::1/64 +} + +switch_destroy() +{ + __addr_add_del $rp1 del 2001:db8:10::1/64 + ip link set dev $rp1 down + + __addr_add_del g1 del 2001:db8:3::1/128 + ip link set dev g1 down + tunnel_destroy g1 + + tc qdisc del dev $swp1 clsact + __addr_add_del $swp1 del 2001:db8:1::2/64 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + forwarding_enable + vrf_prepare + h1_create + switch_create + vrf2_create +} + +cleanup() +{ + pre_cleanup + + vrf2_destroy + switch_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +ipip_payload_get() +{ + local saddr="20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:01" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + local flags=$1; shift + local key=$1; shift + + p=$(: + )"$flags"$( : GRE flags + )"0:00:"$( : Reserved + version + )"86:dd:"$( : ETH protocol type + )"$key"$( : Key + )"6"$( : IP version + )"0:0"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:00:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + ) + echo $p +} + +ecn_payload_get() +{ + echo $(ipip_payload_get "0") +} + +ecn_decap_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local ecn_desc=$1; shift + local outer_tos=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 skip_sw \ + action pass + + rp1_mac=$(mac_get $rp1) + rp2_mac=$(mac_get $rp2) + payload=$(ecn_payload_get) + + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \ + -A 2001:db8:3::2 -B 2001:db8:3::1 -t ip \ + tos=$outer_tos,next=47,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" + + kill_process $mz_pid + tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower +} + +no_matching_tunnel_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local sip=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 action pass + + rp1_mac=$(mac_get $rp1) + rp2_mac=$(mac_get $rp2) + payload=$(ipip_payload_get "$@") + + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \ + -A $sip -B 2001:db8:3::1 -t ip next=47,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc" + + kill_process $mz_pid + tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower +} + +decap_error_test() +{ + # Correct source IP - the remote address + local sip=2001:db8:3::2 + + ecn_decap_test "Decap error" "ECT(1)" 01 + ecn_decap_test "Decap error" "ECT(0)" 02 + ecn_decap_test "Decap error" "CE" 03 + + no_matching_tunnel_test "Decap error: Source IP check failed" \ + 2001:db8:4::2 "0" + no_matching_tunnel_test \ + "Decap error: Key exists but was not expected" $sip "2" \ + "00:00:00:E9:" + + # Destroy the tunnel and create new one with key + __addr_add_del g1 del 2001:db8:3::1/128 + tunnel_destroy g1 + + tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + ttl inherit key 233 + __addr_add_del g1 add 2001:db8:3::1/128 + + no_matching_tunnel_test \ + "Decap error: Key does not exist but was expected" $sip "0" + no_matching_tunnel_test \ + "Decap error: Packet has a wrong key field" $sip "2" \ + "00:00:00:E8:" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh new file mode 100755 index 000000000000..7aca8e5922cf --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh @@ -0,0 +1,330 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap tunnel drops and exceptions functionality over mlxsw. +# Check all traps to make sure they are triggered under the right +# conditions. + +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|---------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR1 (802.1d) | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRF2 | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + decap_error_test + overlay_smac_is_mc_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +: ${VXPORT:=4789} +export VXPORT + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + tc qdisc add dev $swp1 clsact + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link add name vx1 type vxlan id 1000 local 192.0.2.17 \ + dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx1 master br1 + ip link set dev vx1 up + + ip address add dev $rp1 192.0.2.17/28 + ip link set dev $rp1 up +} + +switch_destroy() +{ + ip link set dev $rp1 down + ip address del dev $rp1 192.0.2.17/28 + + ip link set dev vx1 down + ip link set dev vx1 nomaster + ip link del dev vx1 + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + tc qdisc del dev $swp1 clsact + + ip link set dev br1 down + ip link del dev br1 +} + +vrf2_create() +{ + simple_if_init $rp2 192.0.2.18/28 +} + +vrf2_destroy() +{ + simple_if_fini $rp2 192.0.2.18/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + h1_create + switch_create + vrf2_create +} + +cleanup() +{ + pre_cleanup + + vrf2_destroy + switch_destroy + h1_destroy + forwarding_restore + vrf_cleanup +} + +ecn_payload_get() +{ + dest_mac=$(mac_get $h1) + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"00:"$( : IP proto + )"D6:E5:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + ) + echo $p +} + +ecn_decap_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local ecn_desc=$1; shift + local outer_tos=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_ip 192.0.2.3 dst_ip 192.0.2.1 action pass + + rp1_mac=$(mac_get $rp1) + payload=$(ecn_payload_get) + + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac -B 192.0.2.17 \ + -t udp sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" + + kill_process $mz_pid + tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower +} + +reserved_bits_payload_get() +{ + dest_mac=$(mac_get $h1) + p=$(: + )"08:"$( : VXLAN flags + )"01:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"00:"$( : IP proto + )"00:00:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + ) + echo $p +} + +short_payload_get() +{ + dest_mac=$(mac_get $h1) + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + ) + echo $p +} + +corrupted_packet_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local payload_get=$1; shift + local mz_pid + + RET=0 + + # In case of too short packet, there is no any inner packet, + # so the matching will always succeed + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower skip_hw src_ip 192.0.2.3 dst_ip 192.0.2.1 action pass + + rp1_mac=$(mac_get $rp1) + payload=$($payload_get) + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc" + + kill_process $mz_pid + tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower +} + +decap_error_test() +{ + ecn_decap_test "Decap error" "ECT(1)" 01 + ecn_decap_test "Decap error" "ECT(0)" 02 + ecn_decap_test "Decap error" "CE" 03 + + corrupted_packet_test "Decap error: Reserved bits in use" \ + "reserved_bits_payload_get" + corrupted_packet_test "Decap error: Too short inner packet" \ + "short_payload_get" +} + +mc_smac_payload_get() +{ + dest_mac=$(mac_get $h1) + source_mac=01:02:03:04:05:06 + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"00:"$( : IP proto + )"00:00:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + ) + echo $p +} + +overlay_smac_is_mc_test() +{ + local trap_name="overlay_smac_is_mc" + local mz_pid + + RET=0 + + # The matching will be checked on devlink_trap_drop_test() + # and the filter will be removed on devlink_trap_drop_cleanup() + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_mac 01:02:03:04:05:06 action pass + + rp1_mac=$(mac_get $rp1) + payload=$(mc_smac_payload_get) + + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $swp1 101 + + log_test "Overlay source MAC is multicast" + + devlink_trap_drop_cleanup $mz_pid $swp1 "ip" 1 101 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh new file mode 100755 index 000000000000..4599c331240b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh @@ -0,0 +1,342 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap tunnel drops and exceptions functionality over mlxsw. +# Check all traps to make sure they are triggered under the right +# conditions. + +# +------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 2001:db8:1::1/64 | +# +----|-------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR1 (802.1d) | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 2001:db8:3::1 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRF2 | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + decap_error_test + overlay_smac_is_mc_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +: ${VXPORT:=4789} +export VXPORT + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + tc qdisc add dev $swp1 clsact + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link add name vx1 type vxlan id 1000 local 2001:db8:3::1 \ + dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \ + tos inherit ttl 100 + ip link set dev vx1 master br1 + ip link set dev vx1 up + + ip link set dev $rp1 up + ip address add dev $rp1 2001:db8:3::1/64 +} + +switch_destroy() +{ + ip address del dev $rp1 2001:db8:3::1/64 + ip link set dev $rp1 down + + ip link set dev vx1 down + ip link set dev vx1 nomaster + ip link del dev vx1 + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + tc qdisc del dev $swp1 clsact + + ip link set dev br1 down + ip link del dev br1 +} + +vrf2_create() +{ + simple_if_init $rp2 2001:db8:3::2/64 +} + +vrf2_destroy() +{ + simple_if_fini $rp2 2001:db8:3::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + h1_create + switch_create + vrf2_create +} + +cleanup() +{ + pre_cleanup + + vrf2_destroy + switch_destroy + h1_destroy + forwarding_restore + vrf_cleanup +} + +ecn_payload_get() +{ + local dest_mac=$(mac_get $h1) + local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + )"86:dd:"$( : ETH type + )"6"$( : IP version + )"0:0"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:08:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + )"80:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +ecn_decap_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local ecn_desc=$1; shift + local outer_tos=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 action pass + + rp1_mac=$(mac_get $rp1) + payload=$(ecn_payload_get) + + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 2001:db8:3::1 -t udp \ + sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" + + kill_process $mz_pid + tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower +} + +reserved_bits_payload_get() +{ + local dest_mac=$(mac_get $h1) + local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + p=$(: + )"08:"$( : VXLAN flags + )"01:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + )"86:dd:"$( : ETH type + )"6"$( : IP version + )"0:0"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:08:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + )"80:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +short_payload_get() +{ + dest_mac=$(mac_get $h1) + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + ) + echo $p +} + +corrupted_packet_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local payload_get=$1; shift + local mz_pid + + RET=0 + + # In case of too short packet, there is no any inner packet, + # so the matching will always succeed + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower skip_hw src_ip 2001:db8:3::1 dst_ip 2001:db8:1::1 \ + action pass + + rp1_mac=$(mac_get $rp1) + payload=$($payload_get) + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc" + + kill_process $mz_pid + tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower +} + +decap_error_test() +{ + ecn_decap_test "Decap error" "ECT(1)" 01 + ecn_decap_test "Decap error" "ECT(0)" 02 + ecn_decap_test "Decap error" "CE" 03 + + corrupted_packet_test "Decap error: Reserved bits in use" \ + "reserved_bits_payload_get" + corrupted_packet_test "Decap error: Too short inner packet" \ + "short_payload_get" +} + +mc_smac_payload_get() +{ + local dest_mac=$(mac_get $h1) + local source_mac="01:02:03:04:05:06" + local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"86:dd:"$( : ETH type + )"6"$( : IP version + )"0:0"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:08:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + )"80:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +overlay_smac_is_mc_test() +{ + local trap_name="overlay_smac_is_mc" + local mz_pid + + RET=0 + + # The matching will be checked on devlink_trap_drop_test() + # and the filter will be removed on devlink_trap_drop_cleanup() + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_mac 01:02:03:04:05:06 action pass + + rp1_mac=$(mac_get $rp1) + payload=$(mc_smac_payload_get) + + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $swp1 101 + + log_test "Overlay source MAC is multicast" + + devlink_trap_drop_cleanup $mz_pid $swp1 "ipv6" 1 101 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh new file mode 100755 index 000000000000..a5c2aec52898 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh @@ -0,0 +1,272 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test VLAN classification after routing and verify that the order of +# configuration does not impact switch behavior. Verify that {RIF, Port}->VID +# mapping is added correctly for existing {Port, VID}->FID mapping and that +# {RIF, Port}->VID mapping is added correctly for new {Port, VID}->FID mapping. + +# +-------------------+ +--------------------+ +# | H1 | | H2 | +# | | | | +# | $h1.10 + | | + $h2.10 | +# | 192.0.2.1/28 | | | | 192.0.2.3/28 | +# | | | | | | +# | $h1 + | | + $h2 | +# +----------------|--+ +--|-----------------+ +# | | +# +----------------|-------------------------|-----------------+ +# | SW $swp1 + + $swp2 | +# | | | | +# | +--------------|-------------------------|---------------+ | +# | | $swp1.10 + + $swp2.10 | | +# | | | | +# | | br0 | | +# | | 192.0.2.2/28 | | +# | +--------------------------------------------------------+ | +# | | +# | $swp3.20 + | +# | 192.0.2.17/28 | | +# | | | +# | $swp3 + | +# +---------------|--------------------------------------------+ +# | +# +---------------|--+ +# | $h3 + | +# | | | +# | $h3.20 + | +# | 192.0.2.18/28 | +# | | +# | H3 | +# +------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + port_vid_map_rif + rif_port_vid_map +" + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 + + ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 +} + +h1_destroy() +{ + ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 10 v$h2 192.0.2.3/28 +} + +h2_destroy() +{ + vlan_destroy $h2 10 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 + vlan_create $h3 20 v$h3 192.0.2.18/28 + + ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17 +} + +h3_destroy() +{ + ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17 + + vlan_destroy $h3 20 + simple_if_fini $h3 +} + +switch_create() +{ + ip link set dev $swp1 up + tc qdisc add dev $swp1 clsact + + ip link add dev br0 type bridge mcast_snooping 0 + + # By default, a link-local address is generated when netdevice becomes + # up. Adding an address to the bridge will cause creating a RIF for it. + # Prevent generating link-local address to be able to control when the + # RIF is added. + sysctl_set net.ipv6.conf.br0.addr_gen_mode 1 + ip link set dev br0 up + + ip link set dev $swp2 up + vlan_create $swp2 10 + ip link set dev $swp2.10 master br0 + + ip link set dev $swp3 up + vlan_create $swp3 20 "" 192.0.2.17/28 + + # Replace neighbor to avoid 1 packet which is forwarded in software due + # to "unresolved neigh". + ip neigh replace dev $swp3.20 192.0.2.18 lladdr $(mac_get $h3.20) +} + +switch_destroy() +{ + vlan_destroy $swp3 20 + ip link set dev $swp3 down + + ip link set dev $swp2.10 nomaster + vlan_destroy $swp2 10 + ip link set dev $swp2 down + + ip link set dev br0 down + sysctl_restore net.ipv6.conf.br0.addr_gen_mode + ip link del dev br0 + + tc qdisc del dev $swp1 clsact + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + h3_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h3_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +bridge_rif_add() +{ + rifs_occ_t0=$(devlink_resource_occ_get rifs) + __addr_add_del br0 add 192.0.2.2/28 + rifs_occ_t1=$(devlink_resource_occ_get rifs) + + expected_rifs=$((rifs_occ_t0 + 1)) + + [[ $expected_rifs -eq $rifs_occ_t1 ]] + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + sleep 1 +} + +bridge_rif_del() +{ + __addr_add_del br0 del 192.0.2.2/28 +} + +port_vid_map_rif() +{ + RET=0 + + # First add {port, VID}->FID for swp1.10, then add a RIF and verify that + # packets get the correct VID after routing. + vlan_create $swp1 10 + ip link set dev $swp1.10 master br0 + bridge_rif_add + + # Replace neighbor to avoid 1 packet which is forwarded in software due + # to "unresolved neigh". + ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10) + + # The hardware matches on the first ethertype which is not VLAN, + # so the protocol should be IP. + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.1 action pass + + ping_do $h1.10 192.0.2.18 + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add RIF for existing {port, VID}->FID mapping" + + tc filter del dev $swp1 egress + + bridge_rif_del + ip link set dev $swp1.10 nomaster + vlan_destroy $swp1 10 +} + +rif_port_vid_map() +{ + RET=0 + + # First add an address to the bridge, which will create a RIF on top of + # it, then add a new {port, VID}->FID mapping and verify that packets + # get the correct VID after routing. + bridge_rif_add + vlan_create $swp1 10 + ip link set dev $swp1.10 master br0 + + # Replace neighbor to avoid 1 packet which is forwarded in software due + # to "unresolved neigh". + ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10) + + # The hardware matches on the first ethertype which is not VLAN, + # so the protocol should be IP. + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.1 action pass + + ping_do $h1.10 192.0.2.18 + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add {port, VID}->FID mapping for FID with a RIF" + + tc filter del dev $swp1 egress + + ip link set dev $swp1.10 nomaster + vlan_destroy $swp1 10 + bridge_rif_del +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh new file mode 100755 index 000000000000..fe905a7f34b3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh @@ -0,0 +1,190 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding +ethtool_lib_dir=$(dirname $0)/../hw + +ALL_TESTS=" + autoneg + autoneg_force_mode +" + +NUM_NETIFS=2 +: ${TIMEOUT:=30000} # ms +source $lib_dir/lib.sh +source $ethtool_lib_dir/ethtool_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up + + busywait "$TIMEOUT" wait_for_port_up ethtool $swp2 + check_err $? "ports did not come up" + + busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:" + if [[ $? -ne 0 ]]; then + log_test "SKIP: driver does not support lanes setting" + exit 1 + fi + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +check_lanes() +{ + local dev=$1; shift + local lanes=$1; shift + local max_speed=$1; shift + local chosen_lanes + + chosen_lanes=$(ethtool $dev | grep 'Lanes:') + chosen_lanes=${chosen_lanes#*"Lanes: "} + + ((chosen_lanes == lanes)) + check_err $? "swp1 advertise $max_speed and $lanes, devs sync to $chosen_lanes" +} + +check_unsupported_lanes() +{ + local dev=$1; shift + local max_speed=$1; shift + local max_lanes=$1; shift + local autoneg=$1; shift + local autoneg_str="" + + local unsupported_lanes=$((max_lanes *= 2)) + + if [[ $autoneg -eq 0 ]]; then + autoneg_str="autoneg off" + fi + + ethtool -s $swp1 speed $max_speed lanes $unsupported_lanes $autoneg_str &> /dev/null + check_fail $? "Unsuccessful $unsupported_lanes lanes setting was expected" +} + +max_speed_and_lanes_get() +{ + local dev=$1; shift + local arr=("$@") + local max_lanes + local max_speed + local -a lanes_arr + local -a speeds_arr + local -a max_values + + for ((i=0; i<${#arr[@]}; i+=2)); do + speeds_arr+=("${arr[$i]}") + lanes_arr+=("${arr[i+1]}") + done + + max_values+=($(get_max "${speeds_arr[@]}")) + max_values+=($(get_max "${lanes_arr[@]}")) + + echo ${max_values[@]} +} + +search_linkmode() +{ + local speed=$1; shift + local lanes=$1; shift + local arr=("$@") + + for ((i=0; i<${#arr[@]}; i+=2)); do + if [[ $speed -eq ${arr[$i]} && $lanes -eq ${arr[i+1]} ]]; then + return 1 + fi + done + return 0 +} + +autoneg() +{ + RET=0 + + local lanes + local max_speed + local max_lanes + + local -a linkmodes_params=($(dev_linkmodes_params_get $swp1 1)) + local -a max_values=($(max_speed_and_lanes_get $swp1 "${linkmodes_params[@]}")) + max_speed=${max_values[0]} + max_lanes=${max_values[1]} + + lanes=$max_lanes + + while [[ $lanes -ge 1 ]]; do + search_linkmode $max_speed $lanes "${linkmodes_params[@]}" + if [[ $? -eq 1 ]]; then + ethtool_set $swp1 speed $max_speed lanes $lanes + ip link set dev $swp1 up + ip link set dev $swp2 up + + busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:" + check_err $? "Lanes parameter is not presented on time" + + check_lanes $swp1 $lanes $max_speed + log_test "$lanes lanes is autonegotiated" + fi + let $((lanes /= 2)) + done + + check_unsupported_lanes $swp1 $max_speed $max_lanes 1 + log_test "Lanes number larger than max width is not set" + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +autoneg_force_mode() +{ + RET=0 + + local lanes + local max_speed + local max_lanes + + local -a linkmodes_params=($(dev_linkmodes_params_get $swp1 1)) + local -a max_values=($(max_speed_and_lanes_get $swp1 "${linkmodes_params[@]}")) + max_speed=${max_values[0]} + max_lanes=${max_values[1]} + + lanes=$max_lanes + + while [[ $lanes -ge 1 ]]; do + search_linkmode $max_speed $lanes "${linkmodes_params[@]}" + if [[ $? -eq 1 ]]; then + ethtool_set $swp1 speed $max_speed lanes $lanes autoneg off + ethtool_set $swp2 speed $max_speed lanes $lanes autoneg off + ip link set dev $swp1 up + ip link set dev $swp2 up + + busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:" + check_err $? "Lanes parameter is not presented on time" + + check_lanes $swp1 $lanes $max_speed + log_test "Autoneg off, $lanes lanes detected during force mode" + fi + let $((lanes /= 2)) + done + + check_unsupported_lanes $swp1 $max_speed $max_lanes 0 + log_test "Lanes number larger than max width is not set" + + ip link set dev $swp2 down + ip link set dev $swp1 down + + ethtool -s $swp2 autoneg on + ethtool -s $swp1 autoneg on +} + +check_ethtool_lanes_support +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh new file mode 100755 index 000000000000..6fd422d38fe8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh @@ -0,0 +1,182 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test operations that we expect to report extended ack. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + netdev_pre_up_test + vxlan_vlan_add_test + vxlan_bridge_create_test + bridge_create_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +netdev_pre_up_test() +{ + RET=0 + + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + ip link set dev br1 addrgenmode none + ip link set dev br1 up + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 nolearning noudpcsum tos inherit ttl 100 + + ip link set dev vx1 master br1 + check_err $? + + ip link set dev $swp1 master br1 + check_err $? + + ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0 + ip link set dev br2 addrgenmode none + ip link set dev br2 up + ip link add name vx2 up type vxlan id 2000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 nolearning noudpcsum tos inherit ttl 100 + + ip link set dev vx2 master br2 + check_err $? + + ip link set dev $swp2 master br2 + check_err $? + + # Unsupported configuration: mlxsw demands that all offloaded VXLAN + # devices have the same TTL. + ip link set dev vx2 down + ip link set dev vx2 type vxlan ttl 200 + + ip link set dev vx2 up &>/dev/null + check_fail $? + + ip link set dev vx2 up 2>&1 >/dev/null | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - NETDEV_PRE_UP" + + ip link del dev vx2 + ip link del dev br2 + + ip link del dev vx1 + ip link del dev br1 +} + +vxlan_vlan_add_test() +{ + RET=0 + + ip link add name br1 type bridge vlan_filtering 1 mcast_snooping 0 + ip link set dev br1 addrgenmode none + ip link set dev br1 up + + # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 tos inherit ttl 100 + + ip link set dev vx1 master br1 + check_err $? + + bridge vlan add dev vx1 vid 1 + check_err $? + + ip link set dev $swp1 master br1 + check_err $? + + bridge vlan add dev vx1 vid 1 pvid untagged 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - map VLAN at VXLAN device" + + ip link del dev vx1 + ip link del dev br1 +} + +vxlan_bridge_create_test() +{ + RET=0 + + # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 tos inherit ttl 100 + + # Test with VLAN-aware bridge. + ip link add name br1 type bridge vlan_filtering 1 mcast_snooping 0 + ip link set dev br1 addrgenmode none + ip link set dev br1 up + + ip link set dev vx1 master br1 + + ip link set dev $swp1 master br1 2>&1 > /dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + # Test with VLAN-unaware bridge. + ip link set dev br1 type bridge vlan_filtering 0 + + ip link set dev $swp1 master br1 2>&1 > /dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - bridge creation with VXLAN" + + ip link del dev br1 + ip link del dev vx1 +} + +bridge_create_test() +{ + RET=0 + + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none + ip link set dev br1 up + ip link add name br2 type bridge vlan_filtering 1 + ip link set dev br2 addrgenmode none + ip link set dev br2 up + + ip link set dev $swp1 master br1 + check_err $? + + # Only one VLAN-aware bridge is supported, so this should fail with + # an extack. + ip link set dev $swp2 master br2 2>&1 > /dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - multiple VLAN-aware bridges creation" + + ip link del dev br2 + ip link del dev br1 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib.sh b/tools/testing/selftests/drivers/net/mlxsw/fib.sh new file mode 100755 index 000000000000..dcbf32b99bb6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/fib.sh @@ -0,0 +1,270 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the FIB offload API on top of mlxsw. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ipv4_identical_routes + ipv4_tos + ipv4_metric + ipv4_replace + ipv4_delete + ipv4_plen + ipv4_replay + ipv4_flush + ipv4_local_replace + ipv6_add + ipv6_metric + ipv6_append_single + ipv6_replace_single + ipv6_metric_multipath + ipv6_append_multipath + ipv6_replace_multipath + ipv6_append_multipath_to_single + ipv6_delete_single + ipv6_delete_multipath + ipv6_replay_single + ipv6_replay_multipath + ipv6_local_replace +" +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source $lib_dir/fib_offload_lib.sh + +ipv4_identical_routes() +{ + fib_ipv4_identical_routes_test "testns1" +} + +ipv4_tos() +{ + fib_ipv4_tos_test "testns1" +} + +ipv4_metric() +{ + fib_ipv4_metric_test "testns1" +} + +ipv4_replace() +{ + fib_ipv4_replace_test "testns1" +} + +ipv4_delete() +{ + fib_ipv4_delete_test "testns1" +} + +ipv4_plen() +{ + fib_ipv4_plen_test "testns1" +} + +ipv4_replay_metric() +{ + fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay_tos() +{ + fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay_plen() +{ + fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay() +{ + ipv4_replay_metric + ipv4_replay_tos + ipv4_replay_plen +} + +ipv4_flush() +{ + fib_ipv4_flush_test "testns1" +} + +ipv4_local_replace() +{ + local ns="testns1" + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add table local 192.0.2.1/32 dev dummy1 + fib4_trap_check $ns "table local 192.0.2.1/32 dev dummy1" false + check_err $? "Local table route not in hardware when should" + + ip -n $ns route add table main 192.0.2.1/32 dev dummy1 + fib4_trap_check $ns "table main 192.0.2.1/32 dev dummy1" true + check_err $? "Main table route in hardware when should not" + + fib4_trap_check $ns "table local 192.0.2.1/32 dev dummy1" false + check_err $? "Local table route was replaced when should not" + + # Test that local routes can replace routes in main table. + ip -n $ns route add table main 192.0.2.2/32 dev dummy1 + fib4_trap_check $ns "table main 192.0.2.2/32 dev dummy1" false + check_err $? "Main table route not in hardware when should" + + ip -n $ns route add table local 192.0.2.2/32 dev dummy1 + fib4_trap_check $ns "table local 192.0.2.2/32 dev dummy1" false + check_err $? "Local table route did not replace route in main table when should" + + fib4_trap_check $ns "table main 192.0.2.2/32 dev dummy1" true + check_err $? "Main table route was not replaced when should" + + log_test "IPv4 local table route replacement" + + ip -n $ns link del dev dummy1 +} + +ipv6_add() +{ + fib_ipv6_add_test "testns1" +} + +ipv6_metric() +{ + fib_ipv6_metric_test "testns1" +} + +ipv6_append_single() +{ + fib_ipv6_append_single_test "testns1" +} + +ipv6_replace_single() +{ + fib_ipv6_replace_single_test "testns1" +} + +ipv6_metric_multipath() +{ + fib_ipv6_metric_multipath_test "testns1" +} + +ipv6_append_multipath() +{ + fib_ipv6_append_multipath_test "testns1" +} + +ipv6_replace_multipath() +{ + fib_ipv6_replace_multipath_test "testns1" +} + +ipv6_append_multipath_to_single() +{ + fib_ipv6_append_multipath_to_single_test "testns1" +} + +ipv6_delete_single() +{ + fib_ipv6_delete_single_test "testns1" +} + +ipv6_delete_multipath() +{ + fib_ipv6_delete_multipath_test "testns1" +} + +ipv6_replay_single() +{ + fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV" +} + +ipv6_replay_multipath() +{ + fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV" +} + +ipv6_local_replace() +{ + local ns="testns1" + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add table local 2001:db8:1::1/128 dev dummy1 + fib6_trap_check $ns "table local 2001:db8:1::1/128 dev dummy1" false + check_err $? "Local table route not in hardware when should" + + ip -n $ns route add table main 2001:db8:1::1/128 dev dummy1 + fib6_trap_check $ns "table main 2001:db8:1::1/128 dev dummy1" true + check_err $? "Main table route in hardware when should not" + + fib6_trap_check $ns "table local 2001:db8:1::1/128 dev dummy1" false + check_err $? "Local table route was replaced when should not" + + # Test that local routes can replace routes in main table. + ip -n $ns route add table main 2001:db8:1::2/128 dev dummy1 + fib6_trap_check $ns "table main 2001:db8:1::2/128 dev dummy1" false + check_err $? "Main table route not in hardware when should" + + ip -n $ns route add table local 2001:db8:1::2/128 dev dummy1 + fib6_trap_check $ns "table local 2001:db8:1::2/128 dev dummy1" false + check_err $? "Local route route did not replace route in main table when should" + + fib6_trap_check $ns "table main 2001:db8:1::2/128 dev dummy1" true + check_err $? "Main table route was not replaced when should" + + log_test "IPv6 local table route replacement" + + ip -n $ns link del dev dummy1 +} + +fib_notify_on_flag_change_set() +{ + local notify=$1; shift + + ip netns exec testns1 sysctl -qw net.ipv4.fib_notify_on_flag_change=$notify + ip netns exec testns1 sysctl -qw net.ipv6.fib_notify_on_flag_change=$notify + + log_info "Set fib_notify_on_flag_change to $notify" +} + +setup_prepare() +{ + ip netns add testns1 + if [ $? -ne 0 ]; then + echo "Failed to add netns \"testns1\"" + exit 1 + fi + + devlink dev reload $DEVLINK_DEV netns testns1 + if [ $? -ne 0 ]; then + echo "Failed to reload into netns \"testns1\"" + exit 1 + fi +} + +cleanup() +{ + pre_cleanup + devlink -N testns1 dev reload $DEVLINK_DEV netns $$ + ip netns del testns1 +} + +trap cleanup EXIT + +setup_prepare + +fib_notify_on_flag_change_set 1 +tests_run + +fib_notify_on_flag_change_set 0 +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh new file mode 100755 index 000000000000..e99ae500f387 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh @@ -0,0 +1,349 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test unicast FIB offload indication. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ipv6_route_add + ipv6_route_replace + ipv6_route_nexthop_group_share + ipv6_route_rate +" +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +tor1_create() +{ + simple_if_init $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128 +} + +tor1_destroy() +{ + simple_if_fini $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128 +} + +tor2_create() +{ + simple_if_init $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128 +} + +tor2_destroy() +{ + simple_if_fini $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128 +} + +spine_create() +{ + ip link set dev $spine_p1 up + ip link set dev $spine_p2 up + + __addr_add_del $spine_p1 add 2001:db8:1::1/64 + __addr_add_del $spine_p2 add 2001:db8:2::1/64 +} + +spine_destroy() +{ + __addr_add_del $spine_p2 del 2001:db8:2::1/64 + __addr_add_del $spine_p1 del 2001:db8:1::1/64 + + ip link set dev $spine_p2 down + ip link set dev $spine_p1 down +} + +ipv6_offload_check() +{ + local pfx="$1"; shift + local expected_num=$1; shift + local num + + # Try to avoid races with route offload + sleep .1 + + num=$(ip -6 route show match ${pfx} | grep "offload" | wc -l) + + if [ $num -eq $expected_num ]; then + return 0 + fi + + return 1 +} + +ipv6_route_add_prefix() +{ + RET=0 + + # Add a prefix route and check that it is offloaded. + ip -6 route add 2001:db8:3::/64 dev $spine_p1 metric 100 + ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1 + check_err $? "prefix route not offloaded" + + # Append an identical prefix route with an higher metric and check that + # offload indication did not change. + ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 200 + ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1 + check_err $? "lowest metric not offloaded after append" + ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0 + check_err $? "highest metric offloaded when should not" + + # Prepend an identical prefix route with lower metric and check that + # it is offloaded and the others are not. + ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 10 + ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 10" 1 + check_err $? "lowest metric not offloaded after prepend" + ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 0 + check_err $? "mid metric offloaded when should not" + ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0 + check_err $? "highest metric offloaded when should not" + + # Delete the routes and add the same route with a different nexthop + # device. Check that it is offloaded. + ip -6 route flush 2001:db8:3::/64 dev $spine_p1 + ip -6 route add 2001:db8:3::/64 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 dev $spine_p2" 1 + + log_test "IPv6 prefix route add" + + ip -6 route flush 2001:db8:3::/64 +} + +ipv6_route_add_mpath() +{ + RET=0 + + # Add a multipath route and check that it is offloaded. + ip -6 route add 2001:db8:3::/64 metric 100 \ + nexthop via 2001:db8:1::2 dev $spine_p1 \ + nexthop via 2001:db8:2::2 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "multipath route not offloaded when should" + + # Append another nexthop and check that it is offloaded as well. + ip -6 route append 2001:db8:3::/64 metric 100 \ + nexthop via 2001:db8:1::3 dev $spine_p1 + ipv6_offload_check "2001:db8:3::/64 metric 100" 3 + check_err $? "appended nexthop not offloaded when should" + + # Mimic route replace by removing the route and adding it back with + # only two nexthops. + ip -6 route del 2001:db8:3::/64 + ip -6 route add 2001:db8:3::/64 metric 100 \ + nexthop via 2001:db8:1::2 dev $spine_p1 \ + nexthop via 2001:db8:2::2 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "multipath route not offloaded after delete & add" + + # Append a nexthop with an higher metric and check that the offload + # indication did not change. + ip -6 route append 2001:db8:3::/64 metric 200 \ + nexthop via 2001:db8:1::3 dev $spine_p1 + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "lowest metric not offloaded after append" + ipv6_offload_check "2001:db8:3::/64 metric 200" 0 + check_err $? "highest metric offloaded when should not" + + # Prepend a nexthop with a lower metric and check that it is offloaded + # and the others are not. + ip -6 route append 2001:db8:3::/64 metric 10 \ + nexthop via 2001:db8:1::3 dev $spine_p1 + ipv6_offload_check "2001:db8:3::/64 metric 10" 1 + check_err $? "lowest metric not offloaded after prepend" + ipv6_offload_check "2001:db8:3::/64 metric 100" 0 + check_err $? "mid metric offloaded when should not" + ipv6_offload_check "2001:db8:3::/64 metric 200" 0 + check_err $? "highest metric offloaded when should not" + + log_test "IPv6 multipath route add" + + ip -6 route flush 2001:db8:3::/64 +} + +ipv6_route_add() +{ + ipv6_route_add_prefix + ipv6_route_add_mpath +} + +ipv6_route_replace() +{ + RET=0 + + # Replace prefix route with prefix route. + ip -6 route add 2001:db8:3::/64 metric 100 dev $spine_p1 + ipv6_offload_check "2001:db8:3::/64 metric 100" 1 + check_err $? "prefix route not offloaded when should" + ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 metric 100" 1 + check_err $? "prefix route not offloaded after replace" + + # Replace prefix route with multipath route. + ip -6 route replace 2001:db8:3::/64 metric 100 \ + nexthop via 2001:db8:1::2 dev $spine_p1 \ + nexthop via 2001:db8:2::2 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "multipath route not offloaded after replace" + + # Replace multipath route with prefix route. A prefix route cannot + # replace a multipath route, so it is appended. + ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p1 + ipv6_offload_check "2001:db8:3::/64 metric 100 dev $spine_p1" 0 + check_err $? "prefix route offloaded after 'replacing' multipath route" + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "multipath route not offloaded after being 'replaced' by prefix route" + + # Replace multipath route with multipath route. + ip -6 route replace 2001:db8:3::/64 metric 100 \ + nexthop via 2001:db8:1::3 dev $spine_p1 \ + nexthop via 2001:db8:2::3 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "multipath route not offloaded after replacing multipath route" + + # Replace a non-existing multipath route with a multipath route and + # check that it is appended and not offloaded. + ip -6 route replace 2001:db8:3::/64 metric 200 \ + nexthop via 2001:db8:1::3 dev $spine_p1 \ + nexthop via 2001:db8:2::3 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "multipath route not offloaded after non-existing route was 'replaced'" + ipv6_offload_check "2001:db8:3::/64 metric 200" 0 + check_err $? "multipath route offloaded after 'replacing' non-existing route" + + log_test "IPv6 route replace" + + ip -6 route flush 2001:db8:3::/64 +} + +ipv6_route_nexthop_group_share() +{ + RET=0 + + # The driver consolidates identical nexthop groups in order to reduce + # the resource usage in its adjacency table. Check that the deletion + # of one multipath route using the group does not affect the other. + ip -6 route add 2001:db8:3::/64 \ + nexthop via 2001:db8:1::2 dev $spine_p1 \ + nexthop via 2001:db8:2::2 dev $spine_p2 + ip -6 route add 2001:db8:4::/64 \ + nexthop via 2001:db8:1::2 dev $spine_p1 \ + nexthop via 2001:db8:2::2 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64" 2 + check_err $? "multipath route not offloaded when should" + ipv6_offload_check "2001:db8:4::/64" 2 + check_err $? "multipath route not offloaded when should" + ip -6 route del 2001:db8:3::/64 + ipv6_offload_check "2001:db8:4::/64" 2 + check_err $? "multipath route not offloaded after deletion of route sharing the nexthop group" + + # Check that after unsharing a nexthop group the routes are still + # marked as offloaded. + ip -6 route add 2001:db8:3::/64 \ + nexthop via 2001:db8:1::2 dev $spine_p1 \ + nexthop via 2001:db8:2::2 dev $spine_p2 + ip -6 route del 2001:db8:4::/64 \ + nexthop via 2001:db8:1::2 dev $spine_p1 + ipv6_offload_check "2001:db8:4::/64" 1 + check_err $? "singlepath route not offloaded after unsharing the nexthop group" + ipv6_offload_check "2001:db8:3::/64" 2 + check_err $? "multipath route not offloaded after unsharing the nexthop group" + + log_test "IPv6 nexthop group sharing" + + ip -6 route flush 2001:db8:3::/64 + ip -6 route flush 2001:db8:4::/64 +} + +ipv6_route_rate() +{ + local batch_dir=$(mktemp -d) + local num_rts=$((40 * 1024)) + local num_nhs=16 + local total + local start + local diff + local end + local nhs + local i + + RET=0 + + # Prepare 40K /64 multipath routes with 16 nexthops each and check how + # long it takes to add them. A limit of 60 seconds is set. It is much + # higher than insertion should take and meant to flag a serious + # regression. + total=$((nums_nhs * num_rts)) + + for i in $(seq 1 $num_nhs); do + ip -6 address add 2001:db8:1::10:$i/128 dev $tor1_p1 + nexthops+=" nexthop via 2001:db8:1::10:$i dev $spine_p1" + done + + for i in $(seq 1 $num_rts); do + echo "route add 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \ + >> $batch_dir/add.batch + echo "route del 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \ + >> $batch_dir/del.batch + done + + start=$(date +%s.%N) + + ip -batch $batch_dir/add.batch + count=$(ip -6 route show | grep offload | wc -l) + while [ $count -lt $total ]; do + sleep .01 + count=$(ip -6 route show | grep offload | wc -l) + done + + end=$(date +%s.%N) + + diff=$(echo "$end - $start" | bc -l) + test "$(echo "$diff > 60" | bc -l)" -eq 0 + check_err $? "route insertion took too long" + log_info "inserted $num_rts routes in $diff seconds" + + log_test "IPv6 routes insertion rate" + + ip -batch $batch_dir/del.batch + for i in $(seq 1 $num_nhs); do + ip -6 address del 2001:db8:1::10:$i/128 dev $tor1_p1 + done + rm -rf $batch_dir +} + +setup_prepare() +{ + spine_p1=${NETIFS[p1]} + tor1_p1=${NETIFS[p2]} + + spine_p2=${NETIFS[p3]} + tor2_p1=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + tor1_create + tor2_create + spine_create +} + +cleanup() +{ + pre_cleanup + + spine_destroy + tor2_destroy + tor1_destroy + + forwarding_restore + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh new file mode 100755 index 000000000000..941ba4c485c9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + l3_monitor_test +" +NUM_NETIFS=0 +source $lib_dir/lib.sh + +swp=$NETIF_NO_CABLE + +cleanup() +{ + pre_cleanup +} + +l3_monitor_test() +{ + hw_stats_monitor_test $swp l3 \ + "ip addr add dev $swp 192.0.2.1/28" \ + "ip addr del dev $swp 192.0.2.1/28" +} + +trap cleanup EXIT + +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh new file mode 100755 index 000000000000..7d7f862c809c --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh @@ -0,0 +1,263 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test routing over bridge and verify that the order of configuration does not +# impact switch behavior. Verify that RIF is added correctly for existing +# mappings and that new mappings use the correct RIF. + +# +-------------------+ +--------------------+ +# | H1 | | H2 | +# | | | | +# | $h1.10 + | | + $h2.10 | +# | 192.0.2.1/28 | | | | 192.0.2.3/28 | +# | | | | | | +# | $h1 + | | + $h2 | +# +----------------|--+ +--|-----------------+ +# | | +# +----------------|-------------------------|-----------------+ +# | SW $swp1 + + $swp2 | +# | | | | +# | +--------------|-------------------------|---------------+ | +# | | $swp1.10 + + $swp2.10 | | +# | | | | +# | | br0 | | +# | | 192.0.2.2/28 | | +# | +--------------------------------------------------------+ | +# | | +# | $swp3.10 + | +# | 192.0.2.17/28 | | +# | | | +# | $swp3 + | +# +---------------|--------------------------------------------+ +# | +# +---------------|--+ +# | $h3 + | +# | | | +# | $h3.10 + | +# | 192.0.2.18/28 | +# | | +# | H3 | +# +------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + port_vid_map_rif + rif_port_vid_map +" + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 + + ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 +} + +h1_destroy() +{ + ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 10 v$h2 192.0.2.3/28 +} + +h2_destroy() +{ + vlan_destroy $h2 10 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 + vlan_create $h3 10 v$h3 192.0.2.18/28 + + ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17 +} + +h3_destroy() +{ + ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17 + + vlan_destroy $h3 10 + simple_if_fini $h3 +} + +switch_create() +{ + ip link set dev $swp1 up + + ip link add dev br0 type bridge mcast_snooping 0 + + # By default, a link-local address is generated when netdevice becomes + # up. Adding an address to the bridge will cause creating a RIF for it. + # Prevent generating link-local address to be able to control when the + # RIF is added. + sysctl_set net.ipv6.conf.br0.addr_gen_mode 1 + ip link set dev br0 up + + ip link set dev $swp2 up + vlan_create $swp2 10 + ip link set dev $swp2.10 master br0 + + ip link set dev $swp3 up + vlan_create $swp3 10 "" 192.0.2.17/28 + tc qdisc add dev $swp3 clsact + + # Replace neighbor to avoid 1 packet which is forwarded in software due + # to "unresolved neigh". + ip neigh replace dev $swp3.10 192.0.2.18 lladdr $(mac_get $h3.10) +} + +switch_destroy() +{ + tc qdisc del dev $swp3 clsact + vlan_destroy $swp3 10 + ip link set dev $swp3 down + + ip link set dev $swp2.10 nomaster + vlan_destroy $swp2 10 + ip link set dev $swp2 down + + ip link set dev br0 down + sysctl_restore net.ipv6.conf.br0.addr_gen_mode + ip link del dev br0 + + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + h3_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h3_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +bridge_rif_add() +{ + rifs_occ_t0=$(devlink_resource_occ_get rifs) + __addr_add_del br0 add 192.0.2.2/28 + rifs_occ_t1=$(devlink_resource_occ_get rifs) + + expected_rifs=$((rifs_occ_t0 + 1)) + + [[ $expected_rifs -eq $rifs_occ_t1 ]] + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + sleep 1 +} + +bridge_rif_del() +{ + __addr_add_del br0 del 192.0.2.2/28 +} + +port_vid_map_rif() +{ + RET=0 + + # First add {port, VID}->FID for $swp1.10, then add a RIF and verify + # that packets can be routed via the existing mapping. + vlan_create $swp1 10 + ip link set dev $swp1.10 master br0 + bridge_rif_add + + # The hardware matches on the first ethertype which is not VLAN, + # so the protocol should be IP. + tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.18 action pass + + ping_do $h1.10 192.0.2.18 + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $swp3 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add RIF for existing {port, VID}->FID mapping" + + tc filter del dev $swp3 egress + + bridge_rif_del + ip link set dev $swp1.10 nomaster + vlan_destroy $swp1 10 +} + +rif_port_vid_map() +{ + RET=0 + + # First add an address to the bridge, which will create a RIF on top of + # it, then add a new {port, VID}->FID mapping and verify that packets + # can be routed via the new mapping. + bridge_rif_add + vlan_create $swp1 10 + ip link set dev $swp1.10 master br0 + + # The hardware matches on the first ethertype which is not VLAN, + # so the protocol should be IP. + tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.18 action pass + + ping_do $h1.10 192.0.2.18 + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $swp3 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add {port, VID}->FID mapping for FID with a RIF" + + tc filter del dev $swp3 egress + + ip link set dev $swp1.10 nomaster + vlan_destroy $swp1 10 + bridge_rif_del +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh new file mode 100755 index 000000000000..577293bab88b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh @@ -0,0 +1,264 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test routing over bridge and verify that the order of configuration does not +# impact switch behavior. Verify that RIF is added correctly for existing +# mapping and that packets can be routed via port which is added after the FID +# already has a RIF. + +# +-------------------+ +--------------------+ +# | H1 | | H2 | +# | | | | +# | $h1.10 + | | + $h2.10 | +# | 192.0.2.1/28 | | | | 192.0.2.3/28 | +# | | | | | | +# | $h1 + | | + $h2 | +# +----------------|--+ +--|-----------------+ +# | | +# +----------------|-------------------------|-----------------+ +# | SW | | | +# | +--------------|-------------------------|---------------+ | +# | | $swp1 + + $swp2 | | +# | | | | +# | | br0 | | +# | +--------------------------------------------------------+ | +# | | | +# | br0.10 | +# | 192.0.2.2/28 | +# | | +# | | +# | $swp3 + | +# | 192.0.2.17/28 | | +# +----------------|-------------------------------------------+ +# | +# +----------------|--+ +# | $h3 + | +# | 192.0.2.18/28 | +# | | +# | H3 | +# +-------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + vid_map_rif + rif_vid_map +" + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 + + ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 +} + +h1_destroy() +{ + ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 10 v$h2 192.0.2.3/28 +} + +h2_destroy() +{ + vlan_destroy $h2 10 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 192.0.2.18/28 + ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17 +} + +h3_destroy() +{ + ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17 + simple_if_fini $h3 192.0.2.18/28 +} + +switch_create() +{ + ip link set dev $swp1 up + + ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0 + + # By default, a link-local address is generated when netdevice becomes + # up. Adding an address to the bridge will cause creating a RIF for it. + # Prevent generating link-local address to be able to control when the + # RIF is added. + sysctl_set net.ipv6.conf.br0.addr_gen_mode 1 + ip link set dev br0 up + + ip link set dev $swp2 up + ip link set dev $swp2 master br0 + bridge vlan add vid 10 dev $swp2 + + ip link set dev $swp3 up + __addr_add_del $swp3 add 192.0.2.17/28 + tc qdisc add dev $swp3 clsact + + # Replace neighbor to avoid 1 packet which is forwarded in software due + # to "unresolved neigh". + ip neigh replace dev $swp3 192.0.2.18 lladdr $(mac_get $h3) +} + +switch_destroy() +{ + tc qdisc del dev $swp3 clsact + __addr_add_del $swp3 del 192.0.2.17/28 + ip link set dev $swp3 down + + bridge vlan del vid 10 dev $swp2 + ip link set dev $swp2 nomaster + ip link set dev $swp2 down + + ip link set dev br0 down + sysctl_restore net.ipv6.conf.br0.addr_gen_mode + ip link del dev br0 + + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + h3_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h3_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +bridge_rif_add() +{ + rifs_occ_t0=$(devlink_resource_occ_get rifs) + vlan_create br0 10 "" 192.0.2.2/28 + rifs_occ_t1=$(devlink_resource_occ_get rifs) + + expected_rifs=$((rifs_occ_t0 + 1)) + + [[ $expected_rifs -eq $rifs_occ_t1 ]] + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + sleep 1 +} + +bridge_rif_del() +{ + vlan_destroy br0 10 +} + +vid_map_rif() +{ + RET=0 + + # First add VID->FID for vlan 10, then add a RIF and verify that + # packets can be routed via the existing mapping. + bridge vlan add vid 10 dev br0 self + ip link set dev $swp1 master br0 + bridge vlan add vid 10 dev $swp1 + + bridge_rif_add + + tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.18 action pass + + ping_do $h1.10 192.0.2.18 + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $swp3 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add RIF for existing VID->FID mapping" + + tc filter del dev $swp3 egress + + bridge_rif_del + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 nomaster + bridge vlan del vid 10 dev br0 self +} + +rif_vid_map() +{ + RET=0 + + # Using 802.1Q, there is only one VID->FID map for each VID. That means + # that we cannot really check adding a new map for existing FID with a + # RIF. Verify that packets can be routed via port which is added after + # the FID already has a RIF, although in practice there is no new + # mapping in the hardware. + bridge vlan add vid 10 dev br0 self + bridge_rif_add + + ip link set dev $swp1 master br0 + bridge vlan add vid 10 dev $swp1 + + tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.18 action pass + + ping_do $h1.10 192.0.2.18 + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $swp3 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add port to VID->FID mapping for FID with a RIF" + + tc filter del dev $swp3 egress + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 nomaster + + bridge_rif_del + bridge vlan del vid 10 dev br0 self +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh new file mode 100755 index 000000000000..90450216a10d --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh @@ -0,0 +1,311 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test routing after VXLAN decapsulation and verify that the order of +# configuration does not impact switch behavior. Verify that RIF is added +# correctly for existing mapping and that new mapping uses the correct RIF. + +# +---------------------------+ +# | H1 | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|----------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 br1 | | +# | | vid 10 pvid untagged | | +# | | | | +# | | | | +# | | + vx4001 | | +# | | local 192.0.2.17 | | +# | | remote 192.0.2.18 | | +# | | id 104001 | | +# | | dstport $VXPORT | | +# | | vid 4001 pvid untagged | | +# | | | | +# | +----------------------------------+------------------------------------+ | +# | | | +# | +----------------------------------|------------------------------------+ | +# | | | | | +# | | +-------------------------------+---------------------------------+ | | +# | | | | | | +# | | + vlan10 vlan4001 + | | +# | | 192.0.2.2/28 | | +# | | | | +# | | vrf-green | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 +lo | +# | | 198.51.100.1/24 192.0.2.17/32 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | v$rp2 | +# | + $rp2 | +# | 198.51.100.2/24 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + vni_fid_map_rif + rif_vni_fid_map +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +: ${VXPORT:=4789} +export VXPORT + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + ip address add dev $rp1 198.51.100.1/24 + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 pvid untagged + + tc qdisc add dev $swp1 clsact + + ip link add name vx4001 type vxlan id 104001 \ + local 192.0.2.17 dstport $VXPORT \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx4001 up + + ip link set dev vx4001 master br1 + + ip address add 192.0.2.17/32 dev lo + + # Create SVIs. + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + + # Replace neighbor to avoid 1 packet which is forwarded in software due + # to "unresolved neigh". + ip neigh replace dev vlan10 192.0.2.1 lladdr $(mac_get $h1) + + ip address add 192.0.2.2/28 dev vlan10 + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 4001 dev br1 self + + sysctl_set net.ipv4.conf.all.rp_filter 0 +} + +switch_destroy() +{ + sysctl_restore net.ipv4.conf.all.rp_filter + + bridge vlan del vid 4001 dev br1 self + bridge vlan del vid 10 dev br1 self + + ip link del dev vlan10 + + vrf_destroy "vrf-green" + + ip address del 192.0.2.17/32 dev lo + + tc qdisc del dev $swp1 clsact + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx4001 nomaster + + ip link set dev vx4001 down + ip link del dev vx4001 + + ip address del dev $rp1 198.51.100.1/24 + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 198.51.100.2/24 + + ip route add 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1 +} + +vrp2_destroy() +{ + ip route del 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1 + + simple_if_fini $rp2 198.51.100.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + h1_create + switch_create + + vrp2_create +} + +cleanup() +{ + pre_cleanup + + vrp2_destroy + + switch_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +payload_get() +{ + local dest_mac=$(mac_get vlan4001) + local src_mac=$(mac_get $rp1) + + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"01:96:41:"$( : VXLAN VNI : 104001 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$src_mac:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:54:"$( : IP total length + )"3f:49:"$( : IP identification + )"00:00:"$( : IP flags + frag off + )"3f:"$( : IP TTL + )"01:"$( : IP proto + )"50:21:"$( : IP header csum + )"c6:33:64:0a:"$( : IP saddr: 198.51.100.10 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + ) + echo $p +} + +vlan_rif_add() +{ + rifs_occ_t0=$(devlink_resource_occ_get rifs) + + ip link add link br1 name vlan4001 up master vrf-green \ + type vlan id 4001 + + rifs_occ_t1=$(devlink_resource_occ_get rifs) + expected_rifs=$((rifs_occ_t0 + 1)) + + [[ $expected_rifs -eq $rifs_occ_t1 ]] + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" +} + +vlan_rif_del() +{ + ip link del dev vlan4001 +} + +vni_fid_map_rif() +{ + local rp1_mac=$(mac_get $rp1) + + RET=0 + + # First add VNI->FID mapping to the FID of VLAN 4001 + bridge vlan add vid 4001 dev vx4001 pvid untagged + + # Add a RIF to the FID with VNI->FID mapping + vlan_rif_add + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.1 action pass + + payload=$(payload_get) + ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \ + -B 192.0.2.17 -A 192.0.2.18 \ + -t udp sp=12345,dp=$VXPORT,p=$payload -q + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add RIF for existing VNI->FID mapping" + + tc filter del dev $swp1 egress + + bridge vlan del vid 4001 dev vx4001 pvid untagged + vlan_rif_del +} + +rif_vni_fid_map() +{ + local rp1_mac=$(mac_get $rp1) + + RET=0 + + # First add a RIF to the FID of VLAN 4001 + vlan_rif_add + + # Add VNI->FID mapping to FID with a RIF + bridge vlan add vid 4001 dev vx4001 pvid untagged + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.1 action pass + + payload=$(payload_get) + ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \ + -B 192.0.2.17 -A 192.0.2.18 \ + -t udp sp=12345,dp=$VXPORT,p=$payload -q + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add VNI->FID mapping for FID with a RIF" + + tc filter del dev $swp1 egress + + bridge vlan del vid 4001 dev vx4001 pvid untagged + vlan_rif_del +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh new file mode 100755 index 000000000000..e1ad623146d7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh @@ -0,0 +1,202 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# ../../../net/forwarding/mirror_gre_topo_lib.sh for more details. +# +# Test offloading various features of offloading gretap mirrors specific to +# mlxsw. + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/mirror_lib.sh +source $lib_dir/mirror_gre_lib.sh +source $lib_dir/mirror_gre_topo_lib.sh + +ALL_TESTS=" + test_keyful + test_soft + test_tos_fixed + test_ttl_inherit +" + +setup_keyful() +{ + tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \ + ttl 100 tos inherit allow-localremote \ + key 1234 + + tunnel_create h3-gt6-key ip6gretap 2001:db8:3::2 2001:db8:3::1 \ + key 1234 + ip link set h3-gt6-key vrf v$h3 + matchall_sink_create h3-gt6-key + + ip address add dev $swp3 2001:db8:3::1/64 + ip address add dev $h3 2001:db8:3::2/64 +} + +cleanup_keyful() +{ + ip address del dev $h3 2001:db8:3::2/64 + ip address del dev $swp3 2001:db8:3::1/64 + + tunnel_destroy h3-gt6-key + tunnel_destroy gt6-key +} + +setup_soft() +{ + # Set up a topology for testing underlay routes that point at an + # unsupported soft device. + + tunnel_create gt6-soft ip6gretap 2001:db8:4::1 2001:db8:4::2 \ + ttl 100 tos inherit allow-localremote + + tunnel_create h3-gt6-soft ip6gretap 2001:db8:4::2 2001:db8:4::1 + ip link set h3-gt6-soft vrf v$h3 + matchall_sink_create h3-gt6-soft + + ip link add name v1 type veth peer name v2 + ip link set dev v1 up + ip address add dev v1 2001:db8:4::1/64 + + ip link set dev v2 vrf v$h3 + ip link set dev v2 up + ip address add dev v2 2001:db8:4::2/64 +} + +cleanup_soft() +{ + ip link del dev v1 + + tunnel_destroy h3-gt6-soft + tunnel_destroy gt6-soft +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip address add dev $swp3 2001:db8:2::1/64 + ip address add dev $h3 2001:db8:2::2/64 + + ip address add dev $swp3 192.0.2.129/28 + ip address add dev $h3 192.0.2.130/28 + + setup_keyful + setup_soft +} + +cleanup() +{ + pre_cleanup + + cleanup_soft + cleanup_keyful + + ip address del dev $h3 2001:db8:2::2/64 + ip address del dev $swp3 2001:db8:2::1/64 + + ip address del dev $h3 192.0.2.130/28 + ip address del dev $swp3 192.0.2.129/28 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_span_gre_ttl_inherit() +{ + local tundev=$1; shift + local type=$1; shift + local what=$1; shift + + RET=0 + + ip link set dev $tundev type $type ttl inherit + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev + + ip link set dev $tundev type $type ttl 100 + + quick_test_span_gre_dir $tundev + mirror_uninstall $swp1 ingress + + log_test "$what: no offload on TTL of inherit" +} + +test_span_gre_tos_fixed() +{ + local tundev=$1; shift + local type=$1; shift + local what=$1; shift + + RET=0 + + ip link set dev $tundev type $type tos 0x10 + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev + + ip link set dev $tundev type $type tos inherit + quick_test_span_gre_dir $tundev + mirror_uninstall $swp1 ingress + + log_test "$what: no offload on a fixed TOS" +} + +test_span_failable() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev + mirror_uninstall $swp1 ingress + + log_test "fail $what" +} + +test_keyful() +{ + test_span_failable gt6-key "mirror to keyful gretap" +} + +test_soft() +{ + test_span_failable gt6-soft "mirror to gretap w/ soft underlay" +} + +test_tos_fixed() +{ + test_span_gre_tos_fixed gt4 gretap "mirror to gretap" + test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap" +} + + +test_ttl_inherit() +{ + test_span_gre_ttl_inherit gt4 gretap "mirror to gretap" + test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh new file mode 100644 index 000000000000..d43093310e23 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh @@ -0,0 +1,185 @@ +# SPDX-License-Identifier: GPL-2.0 + +# Test offloading a number of mirrors-to-gretap. The test creates a number of +# tunnels. Then it adds one flower mirror for each of the tunnels, matching a +# given host IP. Then it generates traffic at each of the host IPs and checks +# that the traffic has been mirrored at the appropriate tunnel. +# +# +--------------------------+ +--------------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 2001:db8:1:X::1/64 | | 2001:db8:1:X::2/64 | | +# +-----|--------------------+ +--------------------|-----+ +# | | +# +-----|-------------------------------------------------------------|-----+ +# | SW o--> mirrors | | +# | +---|-------------------------------------------------------------|---+ | +# | | + $swp1 BR $swp2 + | | +# | +---------------------------------------------------------------------+ | +# | | +# | + $swp3 + gt6-<X> (ip6gretap) | +# | | 2001:db8:2:X::1/64 : loc=2001:db8:2:X::1 | +# | | : rem=2001:db8:2:X::2 | +# | | : ttl=100 | +# | | : tos=inherit | +# | | : | +# +-----|--------------------------------:----------------------------------+ +# | : +# +-----|--------------------------------:----------------------------------+ +# | H3 + $h3 + h3-gt6-<X> (ip6gretap) | +# | 2001:db8:2:X::2/64 loc=2001:db8:2:X::2 | +# | rem=2001:db8:2:X::1 | +# | ttl=100 | +# | tos=inherit | +# | | +# +-------------------------------------------------------------------------+ + +source ../../../../net/forwarding/mirror_lib.sh + +MIRROR_NUM_NETIFS=6 + +mirror_gre_ipv6_addr() +{ + local net=$1; shift + local num=$1; shift + + printf "2001:db8:%x:%x" $net $num +} + +mirror_gre_tunnels_create() +{ + local count=$1; shift + local should_fail=$1; shift + + MIRROR_GRE_BATCH_FILE="$(mktemp)" + for ((i=0; i < count; ++i)); do + local match_dip=$(mirror_gre_ipv6_addr 1 $i)::2 + local htun=h3-gt6-$i + local tun=gt6-$i + + ((mirror_gre_tunnels++)) + + ip address add dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64 + ip address add dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64 + + ip address add dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64 + ip address add dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64 + + tunnel_create $tun ip6gretap \ + $(mirror_gre_ipv6_addr 2 $i)::1 \ + $(mirror_gre_ipv6_addr 2 $i)::2 \ + ttl 100 tos inherit allow-localremote + + tunnel_create $htun ip6gretap \ + $(mirror_gre_ipv6_addr 2 $i)::2 \ + $(mirror_gre_ipv6_addr 2 $i)::1 + ip link set $htun vrf v$h3 + matchall_sink_create $htun + + cat >> $MIRROR_GRE_BATCH_FILE <<-EOF + filter add dev $swp1 ingress pref 1000 \ + protocol ipv6 \ + flower skip_sw dst_ip $match_dip \ + action mirred egress mirror dev $tun + EOF + done + + tc -b $MIRROR_GRE_BATCH_FILE + check_err_fail $should_fail $? "Mirror rule insertion" +} + +mirror_gre_tunnels_destroy() +{ + local count=$1; shift + + for ((i=0; i < count; ++i)); do + local htun=h3-gt6-$i + local tun=gt6-$i + + ip address del dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64 + ip address del dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64 + + ip address del dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64 + ip address del dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64 + + tunnel_destroy $htun + tunnel_destroy $tun + done +} + +mirror_gre_test() +{ + local count=$1; shift + local should_fail=$1; shift + + mirror_gre_tunnels_create $count $should_fail + if ((should_fail)); then + return + fi + + sleep 5 + + for ((i = 0; i < count; ++i)); do + local sip=$(mirror_gre_ipv6_addr 1 $i)::1 + local dip=$(mirror_gre_ipv6_addr 1 $i)::2 + local htun=h3-gt6-$i + local message + + icmp6_capture_install $htun + mirror_test v$h1 $sip $dip $htun 100 10 + icmp6_capture_uninstall $htun + done +} + +mirror_gre_setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + mirror_gre_tunnels=0 + + vrf_prepare + + simple_if_init $h1 + simple_if_init $h2 + simple_if_init $h3 + + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none + ip link set dev br1 up + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + tc qdisc add dev $swp1 clsact + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + ip link set dev $swp3 up +} + +mirror_gre_cleanup() +{ + mirror_gre_tunnels_destroy $mirror_gre_tunnels + + ip link set dev $swp3 down + + ip link set dev $swp2 down + + tc qdisc del dev $swp1 clsact + ip link set dev $swp1 down + + ip link del dev br1 + + simple_if_fini $h3 + simple_if_fini $h2 + simple_if_fini $h1 + + vrf_cleanup +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh new file mode 100644 index 000000000000..48395cfd4f95 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Defines + +if [[ ! -v MLXSW_CHIP ]]; then + MLXSW_CHIP=$(devlink -j dev info $DEVLINK_DEV | jq -r '.[][]["driver"]') + if [ -z "$MLXSW_CHIP" ]; then + echo "SKIP: Device $DEVLINK_DEV doesn't support devlink info command" + exit 1 + fi +fi + +MLXSW_SPECTRUM_REV=$(case $MLXSW_CHIP in + mlxsw_spectrum) + echo 1 ;; + mlxsw_spectrum*) + echo ${MLXSW_CHIP#mlxsw_spectrum} ;; + *) + echo "Couldn't determine Spectrum chip revision." \ + > /dev/stderr ;; + esac) + +mlxsw_on_spectrum() +{ + local rev=$1; shift + local op="==" + local rev2=${rev%+} + + if [[ $rev2 != $rev ]]; then + op=">=" + fi + + ((MLXSW_SPECTRUM_REV $op rev2)) +} + +__mlxsw_only_on_spectrum() +{ + local rev=$1; shift + local caller=$1; shift + local src=$1; shift + + if ! mlxsw_on_spectrum "$rev"; then + log_test_xfail $src:$caller "(Spectrum-$rev only)" + return 1 + fi +} + +mlxsw_only_on_spectrum() +{ + local caller=${FUNCNAME[1]} + local src=${BASH_SOURCE[1]} + local rev + + for rev in "$@"; do + if __mlxsw_only_on_spectrum "$rev" "$caller" "$src"; then + return 0 + fi + done + + return 1 +} + +mlxsw_max_descriptors_get() +{ + local spectrum_rev=$MLXSW_SPECTRUM_REV + + case $spectrum_rev in + 1) echo 81920 ;; + 2) echo 136960 ;; + 3) echo 204800 ;; + 4) echo 220000 ;; + *) echo "Unknown max descriptors for chip revision." > /dev/stderr + return 1 ;; + esac +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh new file mode 100755 index 000000000000..fca0e1e642c6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh @@ -0,0 +1,260 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test a "one-armed router" [1] scenario. Packets forwarded between H1 and H2 +# should be forwarded by the ASIC, but also trapped so that ICMP redirect +# packets could be potentially generated. +# +# 1. https://en.wikipedia.org/wiki/One-armed_router +# +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | 192.0.2.2/24 | | +# | | 2001:db8:1::2/64 | | +# | | 198.51.100.2/24 | | +# | | 2001:db8:2::2/64 | | +# | | | | +# | | + $swp2 | | +# | +--|--------------------------------------------------------------------+ | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="ping_ipv4 ping_ipv6 fwd_mark_ipv4 fwd_mark_ipv6" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +switch_create() +{ + ip link add name br0 address $(mac_get $swp1) \ + type bridge mcast_snooping 0 + ip link set dev br0 up + + ip link set dev $swp1 master br0 + ip link set dev $swp1 up + ip link set dev $swp2 master br0 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + + __addr_add_del br0 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del br0 add 198.51.100.2/24 2001:db8:2::2/64 +} + +switch_destroy() +{ + __addr_add_del br0 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del br0 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br0 down + ip link del dev br0 +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.1 ": h1->h2" +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": h1->h2" +} + +fwd_mark_ipv4() +{ + # Transmit packets from H1 to H2 and make sure they are trapped at + # swp1 due to loopback error, but only forwarded by the ASIC through + # swp2 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ip pref 2 handle 102 flower \ + skip_sw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + ip vrf exec v$h1 $MZ $h1 -c 10 -d 100msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + RET=0 + + tc_check_packets "dev $swp1 ingress" 101 10 + check_err $? + + log_test "fwd mark: trapping IPv4 packets due to LBERROR" + + RET=0 + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? + + log_test "fwd mark: forwarding IPv4 packets in software" + + RET=0 + + tc_check_packets "dev $swp2 egress" 102 10 + check_err $? + + log_test "fwd mark: forwarding IPv4 packets in hardware" + + tc filter del dev $swp2 egress protocol ip pref 2 handle 102 flower + tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower +} + +fwd_mark_ipv6() +{ + tc filter add dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ipv6 pref 2 handle 102 flower \ + skip_sw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + ip vrf exec v$h1 $MZ $h1 -6 -c 10 -d 100msec -p 64 -A 2001:db8:1::1 \ + -B 2001:db8:2::1 -t udp dp=52768,sp=42768 -q + + RET=0 + + tc_check_packets "dev $swp1 ingress" 101 10 + check_err $? + + log_test "fwd mark: trapping IPv6 packets due to LBERROR" + + RET=0 + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? + + log_test "fwd mark: forwarding IPv6 packets in software" + + RET=0 + + tc_check_packets "dev $swp2 egress" 102 10 + check_err $? + + log_test "fwd mark: forwarding IPv6 packets in hardware" + + tc filter del dev $swp2 egress protocol ipv6 pref 2 handle 102 flower + tc filter del dev $swp2 egress protocol ipv6 pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + sysctl_set net.ipv4.conf.all.accept_redirects 0 + sysctl_set net.ipv6.conf.all.accept_redirects 0 + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + sysctl_restore net.ipv6.conf.all.accept_redirects + sysctl_restore net.ipv4.conf.all.accept_redirects + + forwarding_restore + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh new file mode 100755 index 000000000000..fe0343b95e6c --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that PCI reset works correctly by verifying that only the expected reset +# methods are supported and that after issuing the reset the ifindex of the +# port changes. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + pci_reset_test +" +NUM_NETIFS=1 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +pci_reset_test() +{ + RET=0 + + local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1) + local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2) + + if [ $bus != "pci" ]; then + check_err 1 "devlink device is not a PCI device" + log_test "pci reset" + return + fi + + if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then + check_err 1 "reset is not supported" + log_test "pci reset" + return + fi + + [[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]] + check_err $? "only \"bus\" reset method should be supported" + + local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]') + + echo 1 > /sys/bus/pci/devices/$bdf/reset + check_err $? "reset failed" + + # Wait for udev to rename newly created netdev. + udevadm settle + + local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]') + + [[ $ifindex_pre != $ifindex_post ]] + check_err $? "reset not performed" + + log_test "pci reset" +} + +swp1=${NETIFS[p1]} +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh new file mode 100755 index 000000000000..b1f0781f6b25 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that filters that match on the same port range, but with different +# combination of IPv4/IPv6 and TCP/UDP all use the same port range register by +# observing port range registers' occupancy via devlink-resource. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + port_range_occ_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +switch_create() +{ + simple_if_init $swp1 + tc qdisc add dev $swp1 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +port_range_occ_get() +{ + devlink_resource_occ_get port_range_registers +} + +port_range_occ_test() +{ + RET=0 + + local occ=$(port_range_occ_get) + + # Two port range registers are used, for source and destination port + # ranges. + tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \ + flower skip_sw ip_proto udp src_port 1-100 dst_port 1-100 \ + action pass + (( occ + 2 == $(port_range_occ_get) )) + check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))" + + tc filter add dev $swp1 ingress pref 1 handle 102 proto ip \ + flower skip_sw ip_proto tcp src_port 1-100 dst_port 1-100 \ + action pass + tc filter add dev $swp1 ingress pref 2 handle 103 proto ipv6 \ + flower skip_sw ip_proto udp src_port 1-100 dst_port 1-100 \ + action pass + tc filter add dev $swp1 ingress pref 2 handle 104 proto ipv6 \ + flower skip_sw ip_proto tcp src_port 1-100 dst_port 1-100 \ + action pass + (( occ + 2 == $(port_range_occ_get) )) + check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))" + + tc filter del dev $swp1 ingress pref 2 handle 104 flower + tc filter del dev $swp1 ingress pref 2 handle 103 flower + tc filter del dev $swp1 ingress pref 1 handle 102 flower + (( occ + 2 == $(port_range_occ_get) )) + check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))" + + tc filter del dev $swp1 ingress pref 1 handle 101 flower + (( occ == $(port_range_occ_get) )) + check_err $? "Got occupancy $(port_range_occ_get), expected $occ" + + log_test "port range occupancy" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh new file mode 100644 index 000000000000..2a70840ff14b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: GPL-2.0 + +PORT_RANGE_NUM_NETIFS=2 + +port_range_h1_create() +{ + simple_if_init $h1 +} + +port_range_h1_destroy() +{ + simple_if_fini $h1 +} + +port_range_switch_create() +{ + simple_if_init $swp1 + tc qdisc add dev $swp1 clsact +} + +port_range_switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 +} + +port_range_rules_create() +{ + local count=$1; shift + local should_fail=$1; shift + local batch_file="$(mktemp)" + + for ((i = 0; i < count; ++i)); do + cat >> $batch_file <<-EOF + filter add dev $swp1 ingress \ + prot ipv4 \ + pref 1000 \ + flower skip_sw \ + ip_proto udp dst_port 1-$((100 + i)) \ + action pass + EOF + done + + tc -b $batch_file + check_err_fail $should_fail $? "Rule insertion" + + rm -f $batch_file +} + +__port_range_test() +{ + local count=$1; shift + local should_fail=$1; shift + + port_range_rules_create $count $should_fail + + offload_count=$(tc -j filter show dev $swp1 ingress | + jq "[.[] | select(.options.in_hw == true)] | length") + ((offload_count == count)) + check_err_fail $should_fail $? "port range offload count" +} + +port_range_test() +{ + local count=$1; shift + local should_fail=$1; shift + + if ! tc_offload_check $PORT_RANGE_NUM_NETIFS; then + check_err 1 "Could not test offloaded functionality" + return + fi + + __port_range_test $count $should_fail +} + +port_range_setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + port_range_h1_create + port_range_switch_create +} + +port_range_cleanup() +{ + pre_cleanup + + port_range_switch_destroy + port_range_h1_destroy + + vrf_cleanup +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh new file mode 100644 index 000000000000..1e9a4aff76a2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for physical ports resource. The test splits each splittable port +# to its width and checks that eventually the number of physical ports equals +# the maximum number of physical ports. + +PORT_NUM_NETIFS=0 + +declare -a unsplit + +port_setup_prepare() +{ + : +} + +port_cleanup() +{ + pre_cleanup + + for port in "${unsplit[@]}"; do + devlink port unsplit $port + check_err $? "Did not unsplit $netdev" + done + unsplit=() +} + +split_all_ports() +{ + local should_fail=$1; shift + + # Loop over the splittable netdevs and create tuples of netdev along + # with its width. For example: + # '$netdev1 $count1 $netdev2 $count2...', when: + # $netdev1-2 are splittable netdevs in the device, and + # $count1-2 are the netdevs width respectively. + while read netdev count <<<$( + devlink -j port show | + jq -r '.[][] | select(.splittable==true) | "\(.netdev) \(.lanes)"' + ) + [[ ! -z $netdev ]] + do + devlink port split $netdev count $count + check_err $? "Did not split $netdev into $count" + unsplit+=( "${netdev}s0" ) + done +} + +port_test() +{ + local max_ports=$1; shift + local should_fail=$1; shift + + split_all_ports $should_fail + + occ=$(devlink -j resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="physical_ports") |.["occ"]') + + [[ $occ -eq $max_ports ]] + check_err_fail $should_fail $? "Attempt to create $max_ports ports (actual result $occ)" + +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh new file mode 100755 index 000000000000..00d55b0e98c1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh @@ -0,0 +1,304 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + create_8021ad_vlan_upper_on_top_front_panel_port + create_8021ad_vlan_upper_on_top_bridge_port + create_8021ad_vlan_upper_on_top_lag + create_8021ad_vlan_upper_on_top_bridge + create_8021ad_vlan_upper_on_top_8021ad_bridge + create_vlan_upper_on_top_8021ad_bridge + create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge + create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge + enslave_front_panel_with_vlan_upper_to_8021ad_bridge + enslave_lag_with_vlan_upper_to_8021ad_bridge + add_ip_address_to_8021ad_bridge + switch_bridge_protocol_from_8021q_to_8021ad +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up + + sleep 10 +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +create_vlan_upper_on_top_of_bridge() +{ + RET=0 + + local bridge_proto=$1; shift + local netdev_proto=$1; shift + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol $bridge_proto vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none + + ip link set dev br0 up + ip link set dev $swp1 master br0 + + ip link add name br0.100 link br0 type vlan \ + protocol $netdev_proto id 100 2>/dev/null + check_fail $? "$netdev_proto vlan upper creation on top of an $bridge_proto bridge not rejected" + + ip link add name br0.100 link br0 type vlan \ + protocol $netdev_proto id 100 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "$netdev_proto vlan upper creation on top of an $bridge_proto bridge rejected without extack" + + log_test "create $netdev_proto vlan upper on top $bridge_proto bridge" + + ip link del dev br0 +} + +create_8021ad_vlan_upper_on_top_front_panel_port() +{ + RET=0 + + ip link add name $swp1.100 link $swp1 type vlan \ + protocol 802.1ad id 100 2>/dev/null + check_fail $? "802.1ad vlan upper creation on top of a front panel not rejected" + + ip link add name $swp1.100 link $swp1 type vlan \ + protocol 802.1ad id 100 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "802.1ad vlan upper creation on top of a front panel rejected without extack" + + log_test "create 802.1ad vlan upper on top of a front panel" +} + +create_8021ad_vlan_upper_on_top_bridge_port() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none + + ip link set dev $swp1 master br0 + ip link set dev br0 up + + ip link add name $swp1.100 link $swp1 type vlan \ + protocol 802.1ad id 100 2>/dev/null + check_fail $? "802.1ad vlan upper creation on top of a bridge port not rejected" + + ip link add name $swp1.100 link $swp1 type vlan \ + protocol 802.1ad id 100 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "802.1ad vlan upper creation on top of a bridge port rejected without extack" + + log_test "create 802.1ad vlan upper on top of a bridge port" + + ip link del dev br0 +} + +create_8021ad_vlan_upper_on_top_lag() +{ + RET=0 + + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp1 master bond1 + + ip link add name bond1.100 link bond1 type vlan \ + protocol 802.1ad id 100 2>/dev/null + check_fail $? "802.1ad vlan upper creation on top of a lag not rejected" + + ip link add name bond1.100 link bond1 type vlan \ + protocol 802.1ad id 100 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "802.1ad vlan upper creation on top of a lag rejected without extack" + + log_test "create 802.1ad vlan upper on top of a lag" + + ip link del dev bond1 +} + +create_8021ad_vlan_upper_on_top_bridge() +{ + RET=0 + + create_vlan_upper_on_top_of_bridge "802.1q" "802.1ad" +} + +create_8021ad_vlan_upper_on_top_8021ad_bridge() +{ + RET=0 + + create_vlan_upper_on_top_of_bridge "802.1ad" "802.1ad" +} + +create_vlan_upper_on_top_8021ad_bridge() +{ + RET=0 + + create_vlan_upper_on_top_of_bridge "802.1ad" "802.1q" +} + +create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + + ip link set dev $swp1 master br0 + + ip link add name $swp1.100 link $swp1 type vlan id 100 2>/dev/null + check_fail $? "vlan upper creation on top of front panel enslaved to 802.1ad bridge not rejected" + + ip link add name $swp1.100 link $swp1 type vlan id 100 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "vlan upper creation on top of front panel enslaved to 802.1ad bridge rejected without extack" + + log_test "create vlan upper on top of front panel enslaved to 802.1ad bridge" + + ip link del dev br0 +} + +create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp1 master bond1 + ip link set dev bond1 master br0 + + ip link add name bond1.100 link bond1 type vlan id 100 2>/dev/null + check_fail $? "vlan upper creation on top of lag enslaved to 802.1ad bridge not rejected" + + ip link add name bond1.100 link bond1 type vlan id 100 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "vlan upper creation on top of lag enslaved to 802.1ad bridge rejected without extack" + + log_test "create vlan upper on top of lag enslaved to 802.1ad bridge" + + ip link del dev bond1 + ip link del dev br0 +} + +enslave_front_panel_with_vlan_upper_to_8021ad_bridge() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + + ip link add name $swp1.100 link $swp1 type vlan id 100 + + ip link set dev $swp1 master br0 2>/dev/null + check_fail $? "front panel with vlan upper enslavemnt to 802.1ad bridge not rejected" + + ip link set dev $swp1 master br0 2>&1 >/dev/null | grep -q mlxsw_spectrum + check_err $? "front panel with vlan upper enslavemnt to 802.1ad bridge rejected without extack" + + log_test "enslave front panel with vlan upper to 802.1ad bridge" + + ip link del dev $swp1.100 + ip link del dev br0 +} + +enslave_lag_with_vlan_upper_to_8021ad_bridge() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp1 master bond1 + ip link add name bond1.100 link bond1 type vlan id 100 + + ip link set dev bond1 master br0 2>/dev/null + check_fail $? "lag with vlan upper enslavemnt to 802.1ad bridge not rejected" + + ip link set dev bond1 master br0 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "lag with vlan upper enslavemnt to 802.1ad bridge rejected without extack" + + log_test "enslave lag with vlan upper to 802.1ad bridge" + + ip link del dev bond1 + ip link del dev br0 +} + + +add_ip_address_to_8021ad_bridge() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none + + ip link set dev br0 up + ip link set dev $swp1 master br0 + + ip addr add dev br0 192.0.2.17/28 2>/dev/null + check_fail $? "IP address addition to 802.1ad bridge not rejected" + + ip addr add dev br0 192.0.2.17/28 2>&1 >/dev/null | grep -q mlxsw_spectrum + check_err $? "IP address addition to 802.1ad bridge rejected without extack" + + log_test "IP address addition to 802.1ad bridge" + + ip link del dev br0 +} + +switch_bridge_protocol_from_8021q_to_8021ad() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none + + ip link set dev br0 up + ip link set dev $swp1 master br0 + + ip link set dev br0 type bridge vlan_protocol 802.1q 2>/dev/null + check_fail $? "switching bridge protocol from 802.1q to 802.1ad not rejected" + + log_test "switch bridge protocol" + + ip link del dev br0 +} + + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh new file mode 100755 index 000000000000..5492fa5550d7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for port-default priority. Non-IP packets ingress $swp1 and are +# prioritized according to the default priority specified at the port. +# rx_octets_prio_* counters are used to verify the prioritization. +# +# +----------------------------------+ +# | H1 | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|-----------------------------+ +# | +# +----|-----------------------------+ +# | SW | | +# | + $swp1 | +# | 192.0.2.2/28 | +# | dcb app default-prio <prio> | +# +----------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_defprio +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=2 +: ${HIT_TIMEOUT:=1000} # ms +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +switch_create() +{ + ip link set dev $swp1 up + ip addr add dev $swp1 192.0.2.2/28 +} + +switch_destroy() +{ + dcb app flush dev $swp1 default-prio + ip addr del dev $swp1 192.0.2.2/28 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +__test_defprio() +{ + local prio_install=$1; shift + local prio_observe=$1; shift + local key + local t1 + local i + + RET=0 + + dcb app add dev $swp1 default-prio $prio_install + + local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe) + mausezahn -q $h1 -d 100m -c 10 -t arp reply + t1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((t0 + 10))" \ + ethtool_stats_get $swp1 rx_frames_prio_$prio_observe) + + check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $((t1 - t0))." + log_test "Default priority $prio_install/$prio_observe" + + dcb app del dev $swp1 default-prio $prio_install +} + +test_defprio() +{ + local prio + + for prio in {0..7}; do + __test_defprio $prio $prio + done + + dcb app add dev $swp1 default-prio 3 + __test_defprio 0 3 + __test_defprio 1 3 + __test_defprio 2 3 + __test_defprio 4 4 + __test_defprio 5 5 + __test_defprio 6 6 + __test_defprio 7 7 + dcb app del dev $swp1 default-prio 3 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh new file mode 100755 index 000000000000..914c63d6318a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh @@ -0,0 +1,182 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for DSCP prioritization and rewrite. Packets ingress $swp1 with a DSCP +# tag and are prioritized according to the map at $swp1. They egress $swp2 and +# the DSCP value is updated to match the map at that interface. The updated DSCP +# tag is verified at $h2. +# +# ICMP responses are produced with the same DSCP tag that arrived at $h2. They +# go through prioritization at $swp2 and DSCP retagging at $swp1. The tag is +# verified at $h1--it should match the original tag. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | | dcb dscp-prio 10:0...17:7 dcb dscp-prio 20:0...27:7 | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_dscp +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=4 +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + tc qdisc add dev $h1 clsact + dscp_capture_install $h1 10 +} + +h1_destroy() +{ + dscp_capture_uninstall $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 + tc qdisc add dev $h2 clsact + dscp_capture_install $h2 20 +} + +h2_destroy() +{ + dscp_capture_uninstall $h2 20 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none + ip link set dev br1 up + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + dcb app add dev $swp1 dscp-prio 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7 + dcb app add dev $swp2 dscp-prio 20:0 21:1 22:2 23:3 24:4 25:5 26:6 27:7 +} + +switch_destroy() +{ + dcb app del dev $swp2 dscp-prio 20:0 21:1 22:2 23:3 24:4 25:5 26:6 27:7 + dcb app del dev $swp1 dscp-prio 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7 + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +dscp_ping_test() +{ + local vrf_name=$1; shift + local sip=$1; shift + local dip=$1; shift + local prio=$1; shift + local dev_10=$1; shift + local dev_20=$1; shift + local key + + local dscp_10=$(((prio + 10) << 2)) + local dscp_20=$(((prio + 20) << 2)) + + RET=0 + + local -A t0s + eval "t0s=($(dscp_fetch_stats $dev_10 10) + $(dscp_fetch_stats $dev_20 20))" + + local ping_timeout=$((PING_TIMEOUT * 5)) + ip vrf exec $vrf_name \ + ${PING} -Q $dscp_10 ${sip:+-I $sip} $dip \ + -c 10 -i 0.5 -w $ping_timeout &> /dev/null + + local -A t1s + eval "t1s=($(dscp_fetch_stats $dev_10 10) + $(dscp_fetch_stats $dev_20 20))" + + for key in ${!t0s[@]}; do + local expect + if ((key == prio+10 || key == prio+20)); then + expect=10 + else + expect=0 + fi + + local delta=$((t1s[$key] - t0s[$key])) + ((expect == delta)) + check_err $? "DSCP $key: Expected to capture $expect packets, got $delta." + done + + log_test "DSCP rewrite: $dscp_10-(prio $prio)-$dscp_20" +} + +test_dscp() +{ + local prio + + for prio in {0..7}; do + dscp_ping_test v$h1 192.0.2.1 192.0.2.2 $prio $h1 $h2 + done +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh new file mode 100755 index 000000000000..f6c23f84423e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh @@ -0,0 +1,269 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for DSCP prioritization in the router. +# +# With ip_forward_update_priority disabled, the packets are expected to keep +# their DSCP (which in this test uses only values 0..7) intact as they are +# forwarded by the switch. That is verified at $h2. ICMP responses are formed +# with the same DSCP as the requests, and likewise pass through the switch +# intact, which is verified at $h1. +# +# With ip_forward_update_priority enabled, router reprioritizes the packets +# according to the table in reprioritize(). Thus, say, DSCP 7 maps to priority +# 4, which on egress maps back to DSCP 4. The response packet then gets +# reprioritized to 6, getting DSCP 6 on egress. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.18/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | + $swp1 $swp2 + | +# | 192.0.2.2/28 192.0.2.17/28 | +# | APP=0,5,0 .. 7,5,7 APP=0,5,0 .. 7,5,7 | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_update + test_no_update + test_pedit_norewrite + test_dscp_leftover +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=4 +source $lib_dir/lib.sh + +reprioritize() +{ + local in=$1; shift + + # This is based on rt_tos2priority in include/net/route.h. Assuming 1:1 + # mapping between priorities and TOS, it yields a new priority for a + # packet with ingress priority of $in. + local -a reprio=(0 0 2 2 6 6 4 4) + + echo ${reprio[$in]} +} + +zero() +{ + echo 0 +} + +three() +{ + echo 3 +} + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + tc qdisc add dev $h1 clsact + dscp_capture_install $h1 0 + ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 +} + +h1_destroy() +{ + ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 + dscp_capture_uninstall $h1 0 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.18/28 + tc qdisc add dev $h2 clsact + dscp_capture_install $h2 0 + ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17 +} + +h2_destroy() +{ + ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17 + dscp_capture_uninstall $h2 0 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.18/28 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/28 + __simple_if_init $swp2 v$swp1 192.0.2.17/28 + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + + dcb app add dev $swp1 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + dcb app add dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 +} + +switch_destroy() +{ + dcb app del dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + dcb app del dev $swp1 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + __simple_if_fini $swp2 192.0.2.17/28 + simple_if_fini $swp1 192.0.2.2/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + sysctl_set net.ipv4.ip_forward_update_priority 1 + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + sysctl_restore net.ipv4.ip_forward_update_priority + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.18 +} + +dscp_ping_test() +{ + local vrf_name=$1; shift + local sip=$1; shift + local dip=$1; shift + local prio=$1; shift + local reprio=$1; shift + local dev1=$1; shift + local dev2=$1; shift + local i + + local prio2=$($reprio $prio) # ICMP Request egress prio + local prio3=$($reprio $prio2) # ICMP Response egress prio + + local dscp=$((prio << 2)) # ICMP Request ingress DSCP + local dscp2=$((prio2 << 2)) # ICMP Request egress DSCP + local dscp3=$((prio3 << 2)) # ICMP Response egress DSCP + + RET=0 + + eval "local -A dev1_t0s=($(dscp_fetch_stats $dev1 0))" + eval "local -A dev2_t0s=($(dscp_fetch_stats $dev2 0))" + + local ping_timeout=$((PING_TIMEOUT * 5)) + ip vrf exec $vrf_name \ + ${PING} -Q $dscp ${sip:+-I $sip} $dip \ + -c 10 -i 0.5 -w $ping_timeout &> /dev/null + + eval "local -A dev1_t1s=($(dscp_fetch_stats $dev1 0))" + eval "local -A dev2_t1s=($(dscp_fetch_stats $dev2 0))" + + for i in {0..7}; do + local dscpi=$((i << 2)) + local expect2=0 + local expect3=0 + + if ((i == prio2)); then + expect2=10 + fi + if ((i == prio3)); then + expect3=10 + fi + + local delta=$((dev2_t1s[$i] - dev2_t0s[$i])) + ((expect2 == delta)) + check_err $? "DSCP $dscpi@$dev2: Expected to capture $expect2 packets, got $delta." + + delta=$((dev1_t1s[$i] - dev1_t0s[$i])) + ((expect3 == delta)) + check_err $? "DSCP $dscpi@$dev1: Expected to capture $expect3 packets, got $delta." + done + + log_test "DSCP rewrite: $dscp-(prio $prio2)-$dscp2-(prio $prio3)-$dscp3" +} + +__test_update() +{ + local update=$1; shift + local reprio=$1; shift + local prio + + sysctl_restore net.ipv4.ip_forward_update_priority + sysctl_set net.ipv4.ip_forward_update_priority $update + + for prio in {0..7}; do + dscp_ping_test v$h1 192.0.2.1 192.0.2.18 $prio $reprio $h1 $h2 + done +} + +test_update() +{ + echo "Test net.ipv4.ip_forward_update_priority=1" + __test_update 1 reprioritize +} + +test_no_update() +{ + echo "Test net.ipv4.ip_forward_update_priority=0" + __test_update 0 echo +} + +# Test that when DSCP is updated in pedit, the DSCP rewrite is turned off. +test_pedit_norewrite() +{ + echo "Test no DSCP rewrite after DSCP is updated by pedit" + + tc filter add dev $swp1 ingress handle 101 pref 1 prot ip flower \ + action pedit ex munge ip dsfield set $((3 << 2)) retain 0xfc \ + action skbedit priority 3 + + __test_update 0 three + + tc filter del dev $swp1 ingress pref 1 +} + +# Test that when the last APP rule is removed, the prio->DSCP map is properly +# set to zeroes, and that the last APP rule does not stay active in the ASIC. +test_dscp_leftover() +{ + echo "Test that last removed DSCP rule is deconfigured correctly" + + dcb app del dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + + __test_update 0 zero + + dcb app add dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh new file mode 100755 index 000000000000..9ca340c5f3a6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -0,0 +1,324 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# A test for strict prioritization of traffic in the switch. Run two streams of +# traffic, each through a different ingress port, one tagged with PCP of 1, the +# other with PCP of 2. Both streams converge at one egress port, where they are +# assigned TC of, respectively, 1 and 2, with strict priority configured between +# them. In H3, we expect to see (almost) exclusively the high-priority traffic. +# +# Please see qos_mc_aware.sh for an explanation of why we use mausezahn and +# counters instead of just running iperf3. +# +# +---------------------------+ +-----------------------------+ +# | H1 | | H2 | +# | $h1.111 + | | + $h2.222 | +# | 192.0.2.33/28 | | | | 192.0.2.65/28 | +# | e-qos-map 0:1 | | | | e-qos-map 0:2 | +# | | | | | | +# | $h1 + | | + $h2 | +# +-----------------|---------+ +---------|-------------------+ +# | | +# +-----------------|-------------------------------------|-------------------+ +# | $swp1 + + $swp2 | +# | >1Gbps | | >1Gbps | +# | +---------------|-----------+ +----------|----------------+ | +# | | $swp1.111 + | | + $swp2.222 | | +# | | BR111 | SW | BR222 | | +# | | $swp3.111 + | | + $swp3.222 | | +# | +---------------|-----------+ +----------|----------------+ | +# | \_____________________________________/ | +# | | | +# | + $swp3 | +# | | 1Gbps bottleneck | +# | | ETS: (up n->tc n for n in 0..7) | +# | | strict priority | +# +------------------------------------|--------------------------------------+ +# | +# +--------------------|--------------------+ +# | + $h3 H3 | +# | / \ | +# | / \ | +# | $h3.111 + + $h3.222 | +# | 192.0.2.34/28 192.0.2.66/28 | +# +-----------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_ets_strict +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +h1_create() +{ + adf_simple_if_init $h1 + + mtu_set $h1 10000 + defer mtu_restore $h1 + + vlan_create $h1 111 v$h1 192.0.2.33/28 + defer vlan_destroy $h1 111 + ip link set dev $h1.111 type vlan egress-qos-map 0:1 +} + +h2_create() +{ + adf_simple_if_init $h2 + + mtu_set $h2 10000 + defer mtu_restore $h2 + + vlan_create $h2 222 v$h2 192.0.2.65/28 + defer vlan_destroy $h2 222 + ip link set dev $h2.222 type vlan egress-qos-map 0:2 +} + +h3_create() +{ + adf_simple_if_init $h3 + + mtu_set $h3 10000 + defer mtu_restore $h3 + + vlan_create $h3 111 v$h3 192.0.2.34/28 + defer vlan_destroy $h3 111 + + vlan_create $h3 222 v$h3 192.0.2.66/28 + defer vlan_destroy $h3 222 +} + +switch_create() +{ + ip link set dev $swp1 up + defer ip link set dev $swp1 down + + mtu_set $swp1 10000 + defer mtu_restore $swp1 + + ip link set dev $swp2 up + defer ip link set dev $swp2 down + + mtu_set $swp2 10000 + defer mtu_restore $swp2 + + # prio n -> TC n, strict scheduling + lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7 + defer lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0 + + lldptool -T -i $swp3 -V ETS-CFG tsa=$( + )"0:strict,"$( + )"1:strict,"$( + )"2:strict,"$( + )"3:strict,"$( + )"4:strict,"$( + )"5:strict,"$( + )"6:strict,"$( + )"7:strict" + sleep 1 + + ip link set dev $swp3 up + defer ip link set dev $swp3 down + + mtu_set $swp3 10000 + defer mtu_restore $swp3 + + tc qdisc replace dev $swp3 root handle 101: tbf rate 1gbit \ + burst 128K limit 1G + defer tc qdisc del dev $swp3 root handle 101: + + vlan_create $swp1 111 + defer vlan_destroy $swp1 111 + + vlan_create $swp2 222 + defer vlan_destroy $swp2 222 + + vlan_create $swp3 111 + defer vlan_destroy $swp3 111 + + vlan_create $swp3 222 + defer vlan_destroy $swp3 222 + + ip link add name br111 type bridge vlan_filtering 0 + defer ip link del dev br111 + ip link set dev br111 addrgenmode none + + ip link set dev br111 up + defer ip link set dev br111 down + + ip link set dev $swp1.111 master br111 + defer ip link set dev $swp1.111 nomaster + + ip link set dev $swp3.111 master br111 + defer ip link set dev $swp3.111 nomaster + + ip link add name br222 type bridge vlan_filtering 0 + defer ip link del dev br222 + ip link set dev br222 addrgenmode none + + ip link set dev br222 up + defer ip link set dev br222 down + + ip link set dev $swp2.222 master br222 + defer ip link set dev $swp2.222 nomaster + + ip link set dev $swp3.222 master br222 + defer ip link set dev $swp3.222 nomaster + + # Make sure that ingress quotas are smaller than egress so that there is + # room for both streams of traffic to be admitted to shared buffer. + devlink_pool_size_thtype_save 0 + devlink_pool_size_thtype_set 0 dynamic 10000000 + defer devlink_pool_size_thtype_restore 0 + + devlink_pool_size_thtype_save 4 + devlink_pool_size_thtype_set 4 dynamic 10000000 + defer devlink_pool_size_thtype_restore 4 + + devlink_port_pool_th_save $swp1 0 + devlink_port_pool_th_set $swp1 0 6 + defer devlink_port_pool_th_restore $swp1 0 + + devlink_tc_bind_pool_th_save $swp1 1 ingress + devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6 + defer devlink_tc_bind_pool_th_restore $swp1 1 ingress + + devlink_port_pool_th_save $swp2 0 + devlink_port_pool_th_set $swp2 0 6 + defer devlink_port_pool_th_restore $swp2 0 + + devlink_tc_bind_pool_th_save $swp2 2 ingress + devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6 + defer devlink_tc_bind_pool_th_restore $swp2 2 ingress + + devlink_tc_bind_pool_th_save $swp3 1 egress + devlink_tc_bind_pool_th_set $swp3 1 egress 4 7 + defer devlink_tc_bind_pool_th_restore $swp3 1 egress + + devlink_tc_bind_pool_th_save $swp3 2 egress + devlink_tc_bind_pool_th_set $swp3 2 egress 4 7 + defer devlink_tc_bind_pool_th_restore $swp3 2 egress + + devlink_port_pool_th_save $swp3 4 + devlink_port_pool_th_set $swp3 4 7 + defer devlink_port_pool_th_restore $swp3 4 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h3mac=$(mac_get $h3) + + adf_vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 " from H1" + ping_test $h2 192.0.2.66 " from H2" +} + +rel() +{ + local old=$1; shift + local new=$1; shift + + bc <<< " + scale=2 + ret = 100 * $new / $old + if (ret > 0) { ret } else { 0 } + " +} + +__run_hi_measure_rate() +{ + local what=$1; shift + local -a uc_rate + + start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac + defer stop_traffic $! + + uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_2 "$what")) + check_err $? "Could not get high enough $what ingress rate" + + echo ${uc_rate[@]} +} + +run_hi_measure_rate() +{ + in_defer_scope __run_hi_measure_rate "$@" +} + +test_ets_strict() +{ + RET=0 + + # Run high-prio traffic on its own. + local -a rate_2 + rate_2=($(run_hi_measure_rate "prio 2")) + local rate_2_in=${rate_2[0]} + local rate_2_eg=${rate_2[1]} + + # Start low-prio stream. + start_traffic $h1.111 192.0.2.33 192.0.2.34 $h3mac + defer stop_traffic $! + + local -a rate_1 + rate_1=($(measure_rate $swp1 $h3 rx_octets_prio_1 "prio 1")) + check_err $? "Could not get high enough prio-1 ingress rate" + local rate_1_in=${rate_1[0]} + local rate_1_eg=${rate_1[1]} + + # High-prio and low-prio on their own should have about the same + # throughput. + local rel21=$(rel $rate_1_eg $rate_2_eg) + check_err $(bc <<< "$rel21 < 95") + check_err $(bc <<< "$rel21 > 105") + + # Start the high-prio stream--now both streams run. + rate_3=($(run_hi_measure_rate "prio 2+1")) + local rate_3_in=${rate_3[0]} + local rate_3_eg=${rate_3[1]} + + # High-prio should have about the same throughput whether or not + # low-prio is in the system. + local rel32=$(rel $rate_2_eg $rate_3_eg) + check_err $(bc <<< "$rel32 < 95") + + log_test "strict priority" + echo "Ingress to switch:" + echo " p1 in rate $(humanize $rate_1_in)" + echo " p2 in rate $(humanize $rate_2_in)" + echo " p2 in rate w/ p1 $(humanize $rate_3_in)" + echo "Egress from switch:" + echo " p1 eg rate $(humanize $rate_1_eg)" + echo " p2 eg rate $(humanize $rate_2_eg) ($rel21% of p1)" + echo " p2 eg rate w/ p1 $(humanize $rate_3_eg) ($rel32% of p2)" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh new file mode 100755 index 000000000000..88162b4027c0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh @@ -0,0 +1,379 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + test_defaults + test_dcb_ets + test_mtu + test_pfc + test_int_buf + test_tc_priomap + test_tc_mtu + test_tc_sizes + test_tc_int_buf +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +swp=$NETIF_NO_CABLE + +cleanup() +{ + pre_cleanup +} + +get_prio_pg() +{ + # Produces a string of numbers "<B0> <B1> ... <B7> ", where BX is number + # of buffer that priority X is mapped to. + dcb -j buffer show dev $swp | + jq -r '[.prio_buffer | .[] | tostring + " "] | add' +} + +get_prio_pfc() +{ + # Produces a string of numbers "<P0> <P1> ... <P7> ", where PX denotes + # whether priority X has PFC enabled (the value is 1) or disabled (0). + dcb -j pfc show dev $swp | + jq -r '[.prio_pfc | .[] | if . then "1 " else "0 " end] | add' +} + +get_prio_tc() +{ + # Produces a string of numbers "<T0> <T1> ... <T7> ", where TC is number + # of TC that priority X is mapped to. + dcb -j ets show dev $swp | + jq -r '[.prio_tc | .[] | tostring + " "] | add' +} + +get_buf_size() +{ + local idx=$1; shift + + dcb -j buffer show dev $swp | jq ".buffer_size[$idx]" +} + +get_tot_size() +{ + dcb -j buffer show dev $swp | jq '.total_size' +} + +check_prio_pg() +{ + local expect=$1; shift + + local current=$(get_prio_pg) + test "$current" = "$expect" + check_err $? "prio2buffer is '$current', expected '$expect'" +} + +check_prio_pfc() +{ + local expect=$1; shift + + local current=$(get_prio_pfc) + test "$current" = "$expect" + check_err $? "prio PFC is '$current', expected '$expect'" +} + +check_prio_tc() +{ + local expect=$1; shift + + local current=$(get_prio_tc) + test "$current" = "$expect" + check_err $? "prio_tc is '$current', expected '$expect'" +} + +__check_buf_size() +{ + local idx=$1; shift + local expr=$1; shift + local what=$1; shift + + local current=$(get_buf_size $idx) + ((current $expr)) + check_err $? "${what}buffer $idx size is '$current', expected '$expr'" + echo $current +} + +check_buf_size() +{ + __check_buf_size "$@" > /dev/null +} + +test_defaults() +{ + RET=0 + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + check_prio_pfc "0 0 0 0 0 0 0 0 " + + log_test "Default headroom configuration" +} + +test_dcb_ets() +{ + RET=0 + + dcb ets set dev $swp prio-tc 0:0 1:2 2:4 3:6 4:1 5:3 6:5 7:7 + + check_prio_pg "0 2 4 6 1 3 5 7 " + check_prio_tc "0 2 4 6 1 3 5 7 " + check_prio_pfc "0 0 0 0 0 0 0 0 " + + dcb ets set dev $swp prio-tc all:0 + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + + dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 2>/dev/null + check_fail $? "prio2buffer accepted in DCB mode" + + log_test "Configuring headroom through ETS" +} + +test_mtu() +{ + local what=$1; shift + local buf0size_2 + local buf0size + + RET=0 + buf0size=$(__check_buf_size 0 "> 0") + + mtu_set $swp 3000 + buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ") + mtu_restore $swp + + mtu_set $swp 6000 + check_buf_size 0 "> $buf0size_2" "MTU 6000: " + mtu_restore $swp + + check_buf_size 0 "== $buf0size" + + log_test "${what}MTU impacts buffer size" +} + +test_tc_mtu() +{ + # In TC mode, MTU still impacts the threshold below which a buffer is + # not permitted to go. + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + test_mtu "TC: " + tc qdisc delete dev $swp root +} + +test_pfc() +{ + RET=0 + + dcb ets set dev $swp prio-tc all:0 5:1 6:2 7:3 + + local buf0size=$(get_buf_size 0) + local buf1size=$(get_buf_size 1) + local buf2size=$(get_buf_size 2) + local buf3size=$(get_buf_size 3) + check_buf_size 0 "> 0" + check_buf_size 1 "> 0" + check_buf_size 2 "> 0" + check_buf_size 3 "> 0" + check_buf_size 4 "== 0" + check_buf_size 5 "== 0" + check_buf_size 6 "== 0" + check_buf_size 7 "== 0" + + log_test "Buffer size sans PFC" + + RET=0 + + dcb pfc set dev $swp prio-pfc all:off 5:on 6:on 7:on delay 0 + + check_prio_pg "0 0 0 0 0 1 2 3 " + check_prio_pfc "0 0 0 0 0 1 1 1 " + check_buf_size 0 "== $buf0size" + check_buf_size 1 "> $buf1size" + check_buf_size 2 "> $buf2size" + check_buf_size 3 "> $buf3size" + + local buf1size=$(get_buf_size 1) + check_buf_size 2 "== $buf1size" + check_buf_size 3 "== $buf1size" + + log_test "PFC: Cable length 0" + + RET=0 + + dcb pfc set dev $swp delay 1000 + + check_buf_size 0 "== $buf0size" + check_buf_size 1 "> $buf1size" + check_buf_size 2 "> $buf1size" + check_buf_size 3 "> $buf1size" + + log_test "PFC: Cable length 1000" + + RET=0 + + dcb pfc set dev $swp prio-pfc all:off delay 0 + dcb ets set dev $swp prio-tc all:0 + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + check_buf_size 0 "> 0" + check_buf_size 1 "== 0" + check_buf_size 2 "== 0" + check_buf_size 3 "== 0" + check_buf_size 4 "== 0" + check_buf_size 5 "== 0" + check_buf_size 6 "== 0" + check_buf_size 7 "== 0" + + log_test "PFC: Restore defaults" +} + +test_tc_priomap() +{ + RET=0 + + dcb ets set dev $swp prio-tc 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + check_prio_pg "0 1 2 3 4 5 6 7 " + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + check_prio_pg "0 0 0 0 0 0 0 0 " + + dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 + check_prio_pg "1 3 5 7 0 2 4 6 " + + tc qdisc delete dev $swp root + check_prio_pg "0 1 2 3 4 5 6 7 " + + # Clean up. + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + dcb buffer set dev $swp prio-buffer all:0 + tc qdisc delete dev $swp root + dcb ets set dev $swp prio-tc all:0 + + log_test "TC: priomap" +} + +test_tc_sizes() +{ + local cell_size=$(devlink_cell_size_get) + local size=$((cell_size * 1000)) + + RET=0 + + dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null + check_fail $? "buffer_size should fail before qdisc is added" + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + + dcb buffer set dev $swp buffer-size all:0 0:$size + check_err $? "buffer_size should pass after qdisc is added" + check_buf_size 0 "== $size" "set size: " + + mtu_set $swp 6000 + check_buf_size 0 "== $size" "set MTU: " + mtu_restore $swp + + dcb buffer set dev $swp buffer-size all:0 + + # After replacing the qdisc for the same kind, buffer_size still has to + # work. + tc qdisc replace dev $swp root handle 1: bfifo limit 1M + + dcb buffer set dev $swp buffer-size all:0 0:$size + check_buf_size 0 "== $size" "post replace, set size: " + + dcb buffer set dev $swp buffer-size all:0 + + # Likewise after replacing for a different kind. + tc qdisc replace dev $swp root handle 2: prio bands 8 + + dcb buffer set dev $swp buffer-size all:0 0:$size + check_buf_size 0 "== $size" "post replace different kind, set size: " + + tc qdisc delete dev $swp root + + dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null + check_fail $? "buffer_size should fail after qdisc is deleted" + + log_test "TC: buffer size" +} + +test_int_buf() +{ + local what=$1; shift + + RET=0 + + local buf0size=$(get_buf_size 0) + local tot_size=$(get_tot_size) + + # Size of internal buffer and buffer 9. + local dsize=$((tot_size - buf0size)) + + tc qdisc add dev $swp clsact + tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp + + local buf0size_2=$(get_buf_size 0) + local tot_size_2=$(get_tot_size) + local dsize_2=$((tot_size_2 - buf0size_2)) + + # Egress SPAN should have added to the "invisible" buffer configuration. + ((dsize_2 > dsize)) + check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'" + + mtu_set $swp 3000 + + local buf0size_3=$(get_buf_size 0) + local tot_size_3=$(get_tot_size) + local dsize_3=$((tot_size_3 - buf0size_3)) + + # MTU change might change buffer 0, which will show at total, but the + # hidden buffers should stay the same size. + ((dsize_3 == dsize_2)) + check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'" + + mtu_restore $swp + tc qdisc del dev $swp clsact + + # After SPAN removal, hidden buffers should be back to the original sizes. + local buf0size_4=$(get_buf_size 0) + local tot_size_4=$(get_tot_size) + local dsize_4=$((tot_size_4 - buf0size_4)) + ((dsize_4 == dsize)) + check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'" + + log_test "${what}internal buffer size" +} + +test_tc_int_buf() +{ + local cell_size=$(devlink_cell_size_get) + local size=$((cell_size * 1000)) + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + test_int_buf "TC: " + + dcb buffer set dev $swp buffer-size all:0 0:$size + test_int_buf "TC+buffsize: " + + dcb buffer set dev $swp buffer-size all:0 + tc qdisc delete dev $swp root +} + +bail_on_lldpad "configure DCB" "configure Qdiscs" + +trap cleanup EXIT +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh new file mode 100644 index 000000000000..5ad092b9bf10 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: GPL-2.0 + +check_rate() +{ + local rate=$1; shift + local min=$1; shift + local what=$1; shift + + if ((rate > min)); then + return 0 + fi + + echo "$what $(humanize $ir) < $(humanize $min)" > /dev/stderr + return 1 +} + +measure_rate() +{ + local sw_in=$1; shift # Where the traffic ingresses the switch + local host_in=$1; shift # Where it ingresses another host + local counter=$1; shift # Counter to use for measurement + local what=$1; shift + + local interval=10 + local i + local ret=0 + + # Dips in performance might cause momentary ingress rate to drop below + # 1Gbps. That wouldn't saturate egress and MC would thus get through, + # seemingly winning bandwidth on account of UC. Demand at least 2Gbps + # average ingress rate to somewhat mitigate this. + local min_ingress=2147483648 + + for i in {5..0}; do + local t0=$(ethtool_stats_get $host_in $counter) + local u0=$(ethtool_stats_get $sw_in $counter) + sleep $interval + local t1=$(ethtool_stats_get $host_in $counter) + local u1=$(ethtool_stats_get $sw_in $counter) + + local ir=$(rate $u0 $u1 $interval) + local er=$(rate $t0 $t1 $interval) + + if check_rate $ir $min_ingress "$what ingress rate"; then + break + fi + + # Fail the test if we can't get the throughput. + if ((i == 0)); then + ret=1 + fi + done + + echo $ir $er + return $ret +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh new file mode 100755 index 000000000000..a4a25637fe2a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh @@ -0,0 +1,243 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test sends many small packets (size is less than cell size) through the +# switch. A shaper is used in $swp2, so the traffic is limited there. Packets +# are queued till they will be sent. +# +# The idea is to verify that the switch can handle at least 85% of maximum +# supported descrpitors by hardware. Then, we verify that the driver configures +# firmware to allow infinite size of egress descriptor pool, and does not use a +# lower limitation. Increase the size of the relevant pools such that the pool's +# size does not limit the traffic. + +# +-----------------------+ +# | H1 | +# | + $h1.111 | +# | | 192.0.2.33/28 | +# | | | +# | + $h1 | +# +---|-------------------+ +# | +# +---|-----------------------------+ +# | + $swp1 | +# | | iPOOL1 | +# | | | +# | +-|------------------------+ | +# | | + $swp1.111 | | +# | | | | +# | | BR1 | | +# | | | | +# | | + $swp2.111 | | +# | +-|------------------------+ | +# | | | +# | + $swp2 | +# | | ePOOL6 | +# | | 1mbit | +# +---+-----------------------------+ +# | +# +---|-------------------+ +# | + $h2 H2 | +# | | | +# | + $h2.111 | +# | 192.0.2.34/28 | +# +-----------------------+ +# + +ALL_TESTS=" + ping_ipv4 + max_descriptors +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source mlxsw_lib.sh + +MAX_POOL_SIZE=$(devlink_pool_size_get) +SHAPER_RATE=1mbit + +# The current TBF qdisc interface does not allow us to configure the shaper to +# flat zero. The ASIC shaper is guaranteed to work with a granularity of +# 200Mbps. On Spectrum-2, writing a value close to zero instead of zero works +# well, but the performance on Spectrum-1 is unpredictable. Thus, do not run the +# test on Spectrum-1. +mlxsw_only_on_spectrum 2+ || exit + +h1_create() +{ + adf_simple_if_init $h1 + + vlan_create $h1 111 v$h1 192.0.2.33/28 + defer vlan_destroy $h1 111 + ip link set dev $h1.111 type vlan egress-qos-map 0:1 +} + +h2_create() +{ + adf_simple_if_init $h2 + + vlan_create $h2 111 v$h2 192.0.2.34/28 + defer vlan_destroy $h2 111 +} + +switch_create() +{ + # pools + # ----- + # devlink_pool_size_thtype_restore needs to be done first so that we can + # reset the various limits to values that are only valid for the + # original static / dynamic setting. + + devlink_pool_size_thtype_save 1 + devlink_pool_size_thtype_set 1 dynamic $MAX_POOL_SIZE + defer_prio devlink_pool_size_thtype_restore 1 + + devlink_pool_size_thtype_save 6 + devlink_pool_size_thtype_set 6 static $MAX_POOL_SIZE + defer_prio devlink_pool_size_thtype_restore 6 + + # $swp1 + # ----- + + ip link set dev $swp1 up + defer ip link set dev $swp1 down + + vlan_create $swp1 111 + defer vlan_destroy $swp1 111 + ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_save $swp1 1 + devlink_port_pool_th_set $swp1 1 16 + defer devlink_tc_bind_pool_th_restore $swp1 1 ingress + + devlink_tc_bind_pool_th_save $swp1 1 ingress + devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16 + defer devlink_port_pool_th_restore $swp1 1 + + tc qdisc replace dev $swp1 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + defer tc qdisc del dev $swp1 root + + dcb buffer set dev $swp1 prio-buffer all:0 1:1 + defer dcb buffer set dev $swp1 prio-buffer all:0 + + # $swp2 + # ----- + + ip link set dev $swp2 up + defer ip link set dev $swp2 down + + vlan_create $swp2 111 + defer vlan_destroy $swp2 111 + ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_save $swp2 6 + devlink_port_pool_th_set $swp2 6 $MAX_POOL_SIZE + defer devlink_tc_bind_pool_th_restore $swp2 1 egress + + devlink_tc_bind_pool_th_save $swp2 1 egress + devlink_tc_bind_pool_th_set $swp2 1 egress 6 $MAX_POOL_SIZE + defer devlink_port_pool_th_restore $swp2 6 + + tc qdisc replace dev $swp2 root handle 1: tbf rate $SHAPER_RATE \ + burst 128K limit 500M + defer tc qdisc del dev $swp2 root + + tc qdisc replace dev $swp2 parent 1:1 handle 11: \ + ets bands 8 strict 8 priomap 7 6 + defer tc qdisc del dev $swp2 parent 1:1 handle 11: + + # bridge + # ------ + + ip link add name br1 type bridge vlan_filtering 0 + defer ip link del dev br1 + + ip link set dev $swp1.111 master br1 + defer ip link set dev $swp1.111 nomaster + + ip link set dev br1 up + defer ip link set dev br1 down + + ip link set dev $swp2.111 master br1 + defer ip link set dev $swp2.111 nomaster +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + adf_vrf_prepare + + h1_create + h2_create + switch_create +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 " h1->h2" +} + +percentage_used() +{ + local num_packets=$1; shift + local max_packets=$1; shift + + bc <<< " + scale=2 + 100 * $num_packets / $max_packets + " +} + +max_descriptors() +{ + local cell_size=$(devlink_cell_size_get) + local exp_perc_used=85 + local max_descriptors + local pktsize=30 + + RET=0 + + max_descriptors=$(mlxsw_max_descriptors_get) || exit 1 + + local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1) + + log_info "Send many small packets, packet size = $pktsize bytes" + start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac + defer stop_traffic $! + + # Sleep to wait for congestion. + sleep 5 + + local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1) + ((d1 == d0)) + check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))" + + # Check how many packets the switch can handle, the limitation is + # maximum descriptors. + local pkts_bytes=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1) + local pkts_num=$((pkts_bytes / cell_size)) + local perc_used=$(percentage_used $pkts_num $max_descriptors) + + check_err $(bc <<< "$perc_used < $exp_perc_used") \ + "Expected > $exp_perc_used% of descriptors, handle $perc_used%" + + log_test "Maximum descriptors usage. The percentage used is $perc_used%" +} + +trap cleanup EXIT +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh new file mode 100755 index 000000000000..d8f8ae8533cd --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -0,0 +1,330 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# A test for switch behavior under MC overload. An issue in Spectrum chips +# causes throughput of UC traffic to drop severely when a switch is under heavy +# MC load. This issue can be overcome by putting the switch to MC-aware mode. +# This test verifies that UC performance stays intact even as the switch is +# under MC flood, and therefore that the MC-aware mode is enabled and correctly +# configured. +# +# Because mlxsw throttles CPU port, the traffic can't actually reach userspace +# at full speed. That makes it impossible to use iperf3 to simply measure the +# throughput, because many packets (that reach $h3) don't get to the kernel at +# all even in UDP mode (the situation is even worse in TCP mode, where one can't +# hope to see more than a couple Mbps). +# +# So instead we send traffic with mausezahn and use RX ethtool counters at $h3. +# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore +# each gets a different priority and we can use per-prio ethtool counters to +# measure the throughput. In order to avoid prioritizing unicast traffic, prio +# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and +# thus TC 0). +# +# Mausezahn can't actually saturate the links unless it's using large frames. +# Thus we set MTU to 10K on all involved interfaces. Then both unicast and +# multicast traffic uses 8K frames. +# +# +---------------------------+ +----------------------------------+ +# | H1 | | H2 | +# | | | unicast --> + $h2.111 | +# | multicast | | traffic | 192.0.2.129/28 | +# | traffic | | | e-qos-map 0:1 | +# | $h1 + <----- | | | | +# | 192.0.2.65/28 | | | + $h2 | +# +---------------|-----------+ +--------------|-------------------+ +# | | +# +---------------|---------------------------------------|-------------------+ +# | $swp1 + + $swp2 | +# | >1Gbps | | >1Gbps | +# | +-------------|------+ +----------|----------------+ | +# | | $swp1.1 + | | + $swp2.111 | | +# | | BR1 | SW | BR111 | | +# | | $swp3.1 + | | + $swp3.111 | | +# | +-------------|------+ +----------|----------------+ | +# | \_______________________________________/ | +# | | | +# | + $swp3 | +# | | 1Gbps bottleneck | +# | | prio qdisc: {0..7} -> 7 | +# +------------------------------------|--------------------------------------+ +# | +# +--|-----------------+ +# | + $h3 H3 | +# | | 192.0.2.66/28 | +# | | | +# | + $h3.111 | +# | 192.0.2.130/28 | +# +--------------------+ + +ALL_TESTS=" + ping_ipv4 + test_mc_aware + test_uc_aware +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +h1_create() +{ + adf_simple_if_init $h1 192.0.2.65/28 + + mtu_set $h1 10000 + defer mtu_restore $h1 +} + +h2_create() +{ + adf_simple_if_init $h2 + + mtu_set $h2 10000 + defer mtu_restore $h2 + + vlan_create $h2 111 v$h2 192.0.2.129/28 + defer vlan_destroy $h2 111 + ip link set dev $h2.111 type vlan egress-qos-map 0:1 +} + +h3_create() +{ + adf_simple_if_init $h3 192.0.2.66/28 + + mtu_set $h3 10000 + defer mtu_restore $h3 + + vlan_create $h3 111 v$h3 192.0.2.130/28 + defer vlan_destroy $h3 111 +} + +switch_create() +{ + ip link set dev $swp1 up + defer ip link set dev $swp1 down + + mtu_set $swp1 10000 + defer mtu_restore $swp1 + + ip link set dev $swp2 up + defer ip link set dev $swp2 down + + mtu_set $swp2 10000 + defer mtu_restore $swp2 + + ip link set dev $swp3 up + defer ip link set dev $swp3 down + + mtu_set $swp3 10000 + defer mtu_restore $swp3 + + vlan_create $swp2 111 + defer vlan_destroy $swp2 111 + + vlan_create $swp3 111 + defer vlan_destroy $swp3 111 + + tc qdisc replace dev $swp3 root handle 3: tbf rate 1gbit \ + burst 128K limit 1G + defer tc qdisc del dev $swp3 root handle 3: + + tc qdisc replace dev $swp3 parent 3:3 handle 33: \ + prio bands 8 priomap 7 7 7 7 7 7 7 7 + defer tc qdisc del dev $swp3 parent 3:3 handle 33: + + ip link add name br1 type bridge vlan_filtering 0 + defer ip link del dev br1 + ip link set dev br1 addrgenmode none + ip link set dev br1 up + + ip link set dev $swp1 master br1 + defer ip link set dev $swp1 nomaster + + ip link set dev $swp3 master br1 + defer ip link set dev $swp3 nomaster + + ip link add name br111 type bridge vlan_filtering 0 + defer ip link del dev br111 + ip link set dev br111 addrgenmode none + ip link set dev br111 up + + ip link set dev $swp2.111 master br111 + defer ip link set dev $swp2.111 nomaster + + ip link set dev $swp3.111 master br111 + defer ip link set dev $swp3.111 nomaster + + # Make sure that ingress quotas are smaller than egress so that there is + # room for both streams of traffic to be admitted to shared buffer. + devlink_port_pool_th_save $swp1 0 + devlink_port_pool_th_set $swp1 0 5 + defer devlink_port_pool_th_restore $swp1 0 + + devlink_tc_bind_pool_th_save $swp1 0 ingress + devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5 + defer devlink_tc_bind_pool_th_restore $swp1 0 ingress + + devlink_port_pool_th_save $swp2 0 + devlink_port_pool_th_set $swp2 0 5 + defer devlink_port_pool_th_restore $swp2 0 + + devlink_tc_bind_pool_th_save $swp2 1 ingress + devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5 + defer devlink_tc_bind_pool_th_restore $swp2 1 ingress + + devlink_port_pool_th_save $swp3 4 + devlink_port_pool_th_set $swp3 4 12 + defer devlink_port_pool_th_restore $swp3 4 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h3mac=$(mac_get $h3) + + adf_vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +ping_ipv4() +{ + ping_test $h2 192.0.2.130 +} + +__run_uc_measure_rate() +{ + local what=$1; shift + local -a uc_rate + + start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac + defer stop_traffic $! + + uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "$what")) + check_err $? "Could not get high enough $what ingress rate" + + echo ${uc_rate[@]} +} + +run_uc_measure_rate() +{ + in_defer_scope __run_uc_measure_rate "$@" +} + +test_mc_aware() +{ + RET=0 + + local -a uc_rate=($(run_uc_measure_rate "UC-only")) + local ucth1=${uc_rate[1]} + + start_traffic $h1 192.0.2.65 bc bc + defer stop_traffic $! + + local d0=$(date +%s) + local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) + local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) + + local -a uc_rate_2=($(run_uc_measure_rate "UC+MC")) + local ucth2=${uc_rate_2[1]} + + local d1=$(date +%s) + local t1=$(ethtool_stats_get $h3 rx_octets_prio_0) + local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0) + + local deg=$(bc <<< " + scale=2 + ret = 100 * ($ucth1 - $ucth2) / $ucth1 + if (ret > 0) { ret } else { 0 } + ") + + # Minimum shaper of 200Mbps on MC TCs should cause about 20% of + # degradation on 1Gbps link. + check_err $(bc <<< "$deg < 15") "Minimum shaper not in effect" + check_err $(bc <<< "$deg > 25") "MC traffic degrades UC performance too much" + + local interval=$((d1 - d0)) + local mc_ir=$(rate $u0 $u1 $interval) + local mc_er=$(rate $t0 $t1 $interval) + + log_test "UC performance under MC overload" + + echo "UC-only throughput $(humanize $ucth1)" + echo "UC+MC throughput $(humanize $ucth2)" + echo "Degradation $deg %" + echo + echo "Full report:" + echo " UC only:" + echo " ingress UC throughput $(humanize ${uc_rate[0]})" + echo " egress UC throughput $(humanize ${uc_rate[1]})" + echo " UC+MC:" + echo " ingress UC throughput $(humanize ${uc_rate_2[0]})" + echo " egress UC throughput $(humanize ${uc_rate_2[1]})" + echo " ingress MC throughput $(humanize $mc_ir)" + echo " egress MC throughput $(humanize $mc_er)" + echo +} + +test_uc_aware() +{ + RET=0 + + start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac + defer stop_traffic $! + + local d0=$(date +%s) + local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) + sleep 1 + + local attempts=50 + local passes=0 + local i + + for ((i = 0; i < attempts; ++i)); do + if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 1; then + ((passes++)) + fi + + sleep 0.1 + done + + local d1=$(date +%s) + local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) + + local interval=$((d1 - d0)) + local uc_ir=$(rate $u0 $u1 $interval) + local uc_er=$(rate $t0 $t1 $interval) + + ((attempts == passes)) + check_err $? + + log_test "MC performance under UC overload" + echo " ingress UC throughput $(humanize ${uc_ir})" + echo " egress UC throughput $(humanize ${uc_er})" + echo " sent $attempts BC ARPs, got $passes responses" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh new file mode 100755 index 000000000000..0f0f4f05807c --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -0,0 +1,417 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority +# of 1. This stream is consistently prioritized as priority 1, is put to PG +# buffer 1, and scheduled at TC 1. +# +# - the stream first ingresses through $swp1, where it is forwarded to $swp3 +# +# - then it ingresses through $swp4. Here it is put to a lossless buffer and put +# to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is +# shaped, and thus the PFC pool eventually fills, therefore the headroom +# fills, and $swp3 is paused. +# +# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at +# a pool ("overflow pool"). The overflow pool needs to be large enough to +# contain the whole burst. +# +# - eventually the PFC pool gets some traffic out, headroom therefore gets some +# traffic to the pool, and $swp3 is unpaused again. This way the traffic is +# gradually forwarded from the overflow pool, through the PFC pool, out of +# $swp2, and eventually to $h2. +# +# - if PFC works, all lossless flow packets that ingress through $swp1 should +# also be seen ingressing $h2. If it doesn't, there will be drops due to +# discrepancy between the speeds of $swp1 and $h2. +# +# - it should all play out relatively quickly, so that SLL and HLL will not +# cause drops. +# +# +-----------------------+ +# | H1 | +# | + $h1.111 | +# | | 192.0.2.33/28 | +# | | | +# | + $h1 | +# +---|-------------------+ +--------------------+ +# | | | +# +---|----------------------|--------------------|---------------------------+ +# | + $swp1 $swp3 + + $swp4 | +# | | iPOOL1 iPOOL0 | | iPOOL2 | +# | | ePOOL4 ePOOL5 | | ePOOL4 | +# | | PFC:enabled=1 | | PFC:enabled=1 | +# | +-|----------------------|-+ +-|------------------------+ | +# | | + $swp1.111 $swp3.111 + | | + $swp4.111 | | +# | | | | | | +# | | BR1 | | BR2 | | +# | | | | | | +# | | | | + $swp2.111 | | +# | +--------------------------+ +---------|----------------+ | +# | | | +# | iPOOL0: 500KB dynamic | | +# | iPOOL1: 10MB static | | +# | iPOOL2: 1MB static + $swp2 | +# | ePOOL4: 500KB dynamic | iPOOL0 | +# | ePOOL5: 10MB static | ePOOL6 | +# | ePOOL6: "infinite" static | 200Mbps shaper | +# +-------------------------------------------------------|-------------------+ +# | +# +---|-------------------+ +# | + $h2 H2 | +# | | | +# | + $h2.111 | +# | 192.0.2.34/28 | +# +-----------------------+ +# +# iPOOL0+ePOOL4 is a helper pool for control traffic etc. +# iPOOL1+ePOOL5 are overflow pools. +# iPOOL2+ePOOL6 are PFC pools. + +ALL_TESTS=" + ping_ipv4 + test_qos_pfc +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +_1KB=1000 +_100KB=$((100 * _1KB)) +_500KB=$((500 * _1KB)) +_1MB=$((1000 * _1KB)) +_10MB=$((10 * _1MB)) + +h1_create() +{ + simple_if_init $h1 + mtu_set $h1 10000 + + vlan_create $h1 111 v$h1 192.0.2.33/28 +} + +h1_destroy() +{ + vlan_destroy $h1 111 + + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + + vlan_create $h2 111 v$h2 192.0.2.34/28 +} + +h2_destroy() +{ + vlan_destroy $h2 111 + + mtu_restore $h2 + simple_if_fini $h2 +} + +switch_create() +{ + local lanes_swp4 + local pg1_size + + # pools + # ----- + + devlink_pool_size_thtype_save 0 + devlink_pool_size_thtype_save 4 + devlink_pool_size_thtype_save 1 + devlink_pool_size_thtype_save 5 + devlink_pool_size_thtype_save 2 + devlink_pool_size_thtype_save 6 + + devlink_port_pool_th_save $swp1 1 + devlink_port_pool_th_save $swp2 6 + devlink_port_pool_th_save $swp3 5 + devlink_port_pool_th_save $swp4 2 + + devlink_tc_bind_pool_th_save $swp1 1 ingress + devlink_tc_bind_pool_th_save $swp2 1 egress + devlink_tc_bind_pool_th_save $swp3 1 egress + devlink_tc_bind_pool_th_save $swp4 1 ingress + + # Control traffic pools. Just reduce the size. Keep them dynamic so that + # we don't need to change all the uninteresting quotas. + devlink_pool_size_thtype_set 0 dynamic $_500KB + devlink_pool_size_thtype_set 4 dynamic $_500KB + + # Overflow pools. + devlink_pool_size_thtype_set 1 static $_10MB + devlink_pool_size_thtype_set 5 static $_10MB + + # PFC pools. As per the writ, the size of egress PFC pool should be + # infinice, but actually it just needs to be large enough to not matter + # in practice, so reuse the 10MB limit. + devlink_pool_size_thtype_set 2 static $_1MB + devlink_pool_size_thtype_set 6 static $_10MB + + # $swp1 + # ----- + + ip link set dev $swp1 up + mtu_set $swp1 10000 + vlan_create $swp1 111 + ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp1 1 $_10MB + devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB + + # Configure qdisc so that we can configure PG and therefore pool + # assignment. + tc qdisc replace dev $swp1 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + dcb buffer set dev $swp1 prio-buffer all:0 1:1 + + # $swp2 + # ----- + + ip link set dev $swp2 up + mtu_set $swp2 10000 + vlan_create $swp2 111 + ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp2 6 $_10MB + devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB + + # prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped. + tc qdisc replace dev $swp2 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + tc qdisc replace dev $swp2 parent 1:7 handle 17: \ + tbf rate 200Mbit burst 131072 limit 1M + + # $swp3 + # ----- + + ip link set dev $swp3 up + mtu_set $swp3 10000 + vlan_create $swp3 111 + ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp3 5 $_10MB + devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB + + # prio 0->TC0 (band 7), 1->TC1 (band 6) + tc qdisc replace dev $swp3 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + + # Need to enable PFC so that PAUSE takes effect. Therefore need to put + # the lossless prio into a buffer of its own. Don't bother with buffer + # sizes though, there is not going to be any pressure in the "backward" + # direction. + dcb buffer set dev $swp3 prio-buffer all:0 1:1 + dcb pfc set dev $swp3 prio-pfc all:off 1:on + + # $swp4 + # ----- + + ip link set dev $swp4 up + mtu_set $swp4 10000 + vlan_create $swp4 111 + ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp4 2 $_1MB + devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB + + # Configure qdisc so that we can hand-tune headroom. + tc qdisc replace dev $swp4 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + dcb buffer set dev $swp4 prio-buffer all:0 1:1 + dcb pfc set dev $swp4 prio-pfc all:off 1:on + # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which + # is (-2*MTU) about 80K of delay provision. + pg1_size=$_100KB + + setup_wait_dev_with_timeout $swp4 + + lanes_swp4=$(ethtool $swp4 | grep 'Lanes:') + lanes_swp4=${lanes_swp4#*"Lanes: "} + + # 8-lane ports use two buffers among which the configured buffer + # is split, so double the size to get twice (20K + 80K). + if [[ $lanes_swp4 -eq 8 ]]; then + pg1_size=$((pg1_size * 2)) + fi + + dcb buffer set dev $swp4 buffer-size all:0 1:$pg1_size + + # bridges + # ------- + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev $swp1.111 master br1 + ip link set dev $swp3.111 master br1 + ip link set dev br1 up + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev $swp2.111 master br2 + ip link set dev $swp4.111 master br2 + ip link set dev br2 up +} + +switch_destroy() +{ + # Do this first so that we can reset the limits to values that are only + # valid for the original static / dynamic setting. + devlink_pool_size_thtype_restore 6 + devlink_pool_size_thtype_restore 5 + devlink_pool_size_thtype_restore 4 + devlink_pool_size_thtype_restore 2 + devlink_pool_size_thtype_restore 1 + devlink_pool_size_thtype_restore 0 + + # bridges + # ------- + + ip link set dev br2 down + ip link set dev $swp4.111 nomaster + ip link set dev $swp2.111 nomaster + ip link del dev br2 + + ip link set dev br1 down + ip link set dev $swp3.111 nomaster + ip link set dev $swp1.111 nomaster + ip link del dev br1 + + # $swp4 + # ----- + + dcb buffer set dev $swp4 buffer-size all:0 + dcb pfc set dev $swp4 prio-pfc all:off + dcb buffer set dev $swp4 prio-buffer all:0 + tc qdisc del dev $swp4 root + + devlink_tc_bind_pool_th_restore $swp4 1 ingress + devlink_port_pool_th_restore $swp4 2 + + vlan_destroy $swp4 111 + mtu_restore $swp4 + ip link set dev $swp4 down + + # $swp3 + # ----- + + dcb pfc set dev $swp3 prio-pfc all:off + dcb buffer set dev $swp3 prio-buffer all:0 + tc qdisc del dev $swp3 root + + devlink_tc_bind_pool_th_restore $swp3 1 egress + devlink_port_pool_th_restore $swp3 5 + + vlan_destroy $swp3 111 + mtu_restore $swp3 + ip link set dev $swp3 down + + # $swp2 + # ----- + + tc qdisc del dev $swp2 parent 1:7 + tc qdisc del dev $swp2 root + + devlink_tc_bind_pool_th_restore $swp2 1 egress + devlink_port_pool_th_restore $swp2 6 + + vlan_destroy $swp2 111 + mtu_restore $swp2 + ip link set dev $swp2 down + + # $swp1 + # ----- + + dcb buffer set dev $swp1 prio-buffer all:0 + tc qdisc del dev $swp1 root + + devlink_tc_bind_pool_th_restore $swp1 1 ingress + devlink_port_pool_th_restore $swp1 1 + + vlan_destroy $swp1 111 + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + swp4=${NETIFS[p6]} + + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 +} + +test_qos_pfc() +{ + RET=0 + + # 10M pool, each packet is 8K of payload + headers + local pkts=$((_10MB / 8050)) + local size=$((pkts * 8050)) + local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1) + local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1) + + $MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \ + -a own -b $h2mac -c $pkts -t udp -q + sleep 2 + + local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1) + local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1) + + local din=$((in1 - in0)) + local dout=$((out1 - out0)) + + local pct_in=$((din * 100 / size)) + + ((pct_in > 95 && pct_in < 105)) + check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%" + + ((dout == din)) + check_err $? "$((din - dout)) bytes out of $din ingressed got lost" + + log_test "PFC" +} + +bail_on_lldpad "configure DCB" "configure Qdiscs" + +trap cleanup EXIT +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh new file mode 100755 index 000000000000..4a11bf1d514a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh @@ -0,0 +1,184 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + bridge_rif_add + bridge_rif_nomaster + bridge_rif_remaster + bridge_rif_nomaster_addr + bridge_rif_nomaster_port + bridge_rif_remaster_port +" + +REQUIRE_TEAMD="yes" +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + team_create lag1 lacp + ip link set dev lag1 addrgenmode none + ip link set dev lag1 address $(mac_get $swp1) + + team_create lag2 lacp + ip link set dev lag2 addrgenmode none + ip link set dev lag2 address $(mac_get $swp2) + + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none + ip link set dev br1 address $(mac_get lag1) + ip link set dev br1 up + + ip link set dev lag1 master br1 + + ip link set dev $swp1 master lag1 + ip link set dev $swp1 up + + ip link set dev $swp2 master lag2 + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 nomaster + ip link set dev $swp2 down + + ip link set dev $swp1 nomaster + ip link set dev $swp1 down + + ip link del dev lag2 + ip link set dev lag1 nomaster + ip link del dev lag1 + + ip link del dev br1 +} + +bridge_rif_add() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + __addr_add_del br1 add 192.0.2.2/28 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIF for bridge on address addition" +} + +bridge_rif_nomaster() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev lag1 nomaster + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 - 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Drop RIF for bridge on LAG deslavement" +} + +bridge_rif_remaster() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev lag1 master br1 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIF for bridge on LAG reenslavement" +} + +bridge_rif_nomaster_addr() +{ + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + + # Adding an address while the LAG is enslaved shouldn't generate a RIF. + __addr_add_del lag1 add 192.0.2.65/28 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + # Removing the LAG from the bridge should drop RIF for the bridge (as + # tested in bridge_rif_lag_nomaster), but since the LAG now has an + # address, it should gain a RIF. + ip link set dev lag1 nomaster + sleep 1 + local rifs_occ_t2=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0)) + + ((expected_rifs == rifs_occ_t2)) + check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used" + + log_test "Add RIF for LAG on deslavement from bridge" + + __addr_add_del lag1 del 192.0.2.65/28 + ip link set dev lag1 master br1 + sleep 1 +} + +bridge_rif_nomaster_port() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev $swp1 nomaster + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 - 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Drop RIF for bridge on deslavement of port from LAG" +} + +bridge_rif_remaster_port() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev $swp1 down + ip link set dev $swp1 master lag1 + ip link set dev $swp1 up + setup_wait_dev $swp1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIF for bridge on reenslavement of port to LAG" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh new file mode 100644 index 000000000000..a43a9926e690 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: GPL-2.0 + +RIF_COUNTER_NUM_NETIFS=2 + +rif_counter_addr4() +{ + local i=$1; shift + local p=$1; shift + + printf 192.0.%d.%d $((i / 64)) $(((4 * i % 256) + p)) +} + +rif_counter_addr4pfx() +{ + rif_counter_addr4 $@ + printf /30 +} + +rif_counter_h1_create() +{ + simple_if_init $h1 +} + +rif_counter_h1_destroy() +{ + simple_if_fini $h1 +} + +rif_counter_h2_create() +{ + simple_if_init $h2 +} + +rif_counter_h2_destroy() +{ + simple_if_fini $h2 +} + +rif_counter_setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + vrf_prepare + + rif_counter_h1_create + rif_counter_h2_create +} + +rif_counter_cleanup() +{ + local count=$1; shift + + pre_cleanup + + for ((i = 1; i <= count; i++)); do + vlan_destroy $h2 $i + done + + rif_counter_h2_destroy + rif_counter_h1_destroy + + vrf_cleanup + + if [[ -v RIF_COUNTER_BATCH_FILE ]]; then + rm -f $RIF_COUNTER_BATCH_FILE + fi +} + + +rif_counter_test() +{ + local count=$1; shift + local should_fail=$1; shift + + RIF_COUNTER_BATCH_FILE="$(mktemp)" + + for ((i = 1; i <= count; i++)); do + vlan_create $h2 $i v$h2 $(rif_counter_addr4pfx $i 2) + done + for ((i = 1; i <= count; i++)); do + cat >> $RIF_COUNTER_BATCH_FILE <<-EOF + stats set dev $h2.$i l3_stats on + EOF + done + + ip -b $RIF_COUNTER_BATCH_FILE + check_err_fail $should_fail $? "RIF counter enablement" +} + +rif_counter_traffic_test() +{ + local count=$1; shift + local i; + + for ((i = count; i > 0; i /= 2)); do + $MZ $h1 -Q $i -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \ + -A $(rif_counter_addr4 $i 1) \ + -B $(rif_counter_addr4 $i 2) \ + -q -t udp sp=54321,dp=12345 + done + for ((i = count; i > 0; i /= 2)); do + busywait "$TC_HIT_TIMEOUT" until_counter_is "== 1" \ + hw_stats_get l3_stats $h2.$i rx packets > /dev/null + check_err $? "Traffic not seen at RIF $h2.$i" + done +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh new file mode 100755 index 000000000000..b8bbe94f4736 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh @@ -0,0 +1,137 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + lag_rif_add + lag_rif_nomaster + lag_rif_remaster + lag_rif_nomaster_addr +" + +REQUIRE_TEAMD="yes" +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + team_create lag1 lacp + ip link set dev lag1 addrgenmode none + ip link set dev lag1 address $(mac_get $swp1) + + team_create lag2 lacp + ip link set dev lag2 addrgenmode none + ip link set dev lag2 address $(mac_get $swp2) + + ip link set dev $swp1 master lag1 + ip link set dev $swp1 up + + ip link set dev $swp2 master lag2 + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 nomaster + ip link set dev $swp2 down + + ip link set dev $swp1 nomaster + ip link set dev $swp1 down + + ip link del dev lag2 + ip link del dev lag1 +} + +lag_rif_add() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + __addr_add_del lag1 add 192.0.2.2/28 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIF for LAG on address addition" +} + +lag_rif_nomaster() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev $swp1 nomaster + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 - 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Drop RIF for LAG on port deslavement" +} + +lag_rif_remaster() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev $swp1 down + ip link set dev $swp1 master lag1 + ip link set dev $swp1 up + setup_wait_dev $swp1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIF for LAG on port reenslavement" +} + +lag_rif_nomaster_addr() +{ + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + + # Adding an address while the port is LAG'd shouldn't generate a RIF. + __addr_add_del $swp1 add 192.0.2.65/28 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + # Removing the port from LAG should drop RIF for the LAG (as tested in + # lag_rif_nomaster), but since the port now has an address, it should + # gain a RIF. + ip link set dev $swp1 nomaster + sleep 1 + local rifs_occ_t2=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0)) + + ((expected_rifs == rifs_occ_t2)) + check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used" + + __addr_add_del $swp1 del 192.0.2.65/28 + log_test "Add RIF for port on deslavement from LAG" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh new file mode 100755 index 000000000000..d1a9d379eaf3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + lag_rif_add + lag_rif_nomaster + lag_rif_remaster + lag_rif_nomaster_addr +" + +REQUIRE_TEAMD="yes" +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + team_create lag1 lacp + ip link set dev lag1 addrgenmode none + ip link set dev lag1 address $(mac_get $swp1) + + team_create lag2 lacp + ip link set dev lag2 addrgenmode none + ip link set dev lag2 address $(mac_get $swp2) + + ip link set dev $swp1 master lag1 + ip link set dev $swp1 up + + ip link set dev $swp2 master lag2 + ip link set dev $swp2 up + + vlan_create lag1 100 + ip link set dev lag1.100 addrgenmode none + + vlan_create lag1 200 + ip link set dev lag1.200 addrgenmode none +} + +cleanup() +{ + pre_cleanup + + ip link del dev lag1.200 + ip link del dev lag1.100 + + ip link set dev $swp2 nomaster + ip link set dev $swp2 down + + ip link set dev $swp1 nomaster + ip link set dev $swp1 down + + ip link del dev lag2 + ip link del dev lag1 +} + +lag_rif_add() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + __addr_add_del lag1.100 add 192.0.2.2/28 + __addr_add_del lag1.200 add 192.0.2.18/28 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 2)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIFs for LAG VLANs on address addition" +} + +lag_rif_nomaster() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev $swp1 nomaster + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 - 2)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Drop RIFs for LAG VLANs on port deslavement" +} + +lag_rif_remaster() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev $swp1 down + ip link set dev $swp1 master lag1 + ip link set dev $swp1 up + setup_wait_dev $swp1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 2)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIFs for LAG VLANs on port reenslavement" +} + +lag_rif_nomaster_addr() +{ + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + + # Adding an address while the port is LAG'd shouldn't generate a RIF. + __addr_add_del $swp1 add 192.0.2.65/28 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + # Removing the port from LAG should drop two RIFs for the LAG VLANs (as + # tested in lag_rif_nomaster), but since the port now has an address, it + # should gain a RIF. + ip link set dev $swp1 nomaster + sleep 1 + local rifs_occ_t2=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 - 1)) + + ((expected_rifs == rifs_occ_t2)) + check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used" + + __addr_add_del $swp1 del 192.0.2.65/28 + log_test "Add RIF for port on deslavement from LAG" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh new file mode 100644 index 000000000000..71e7681f15f6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for RIF MAC profiles resource. The test adds VLAN netdevices according to +# the maximum number of RIF MAC profiles, sets each of them with a random +# MAC address, and checks that eventually the number of occupied RIF MAC +# profiles equals the maximum number of RIF MAC profiles. + + +RIF_MAC_PROFILE_NUM_NETIFS=2 + +rif_mac_profiles_create() +{ + local count=$1; shift + local should_fail=$1; shift + local batch_file="$(mktemp)" + + for ((i = 1; i <= count; i++)); do + vlan=$(( i*10 )) + m=$(( i*11 )) + + cat >> $batch_file <<-EOF + link add link $h1 name $h1.$vlan \ + address 00:$m:$m:$m:$m:$m type vlan id $vlan + address add 192.0.$m.1/24 dev $h1.$vlan + EOF + done + + ip -b $batch_file &> /dev/null + check_err_fail $should_fail $? "RIF creation" + + rm -f $batch_file +} + +rif_mac_profile_test() +{ + local count=$1; shift + local should_fail=$1; shift + + rif_mac_profiles_create $count $should_fail + + occ=$(devlink -j resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]') + + [[ $occ -eq $count ]] + check_err_fail $should_fail $? "Attempt to use $count profiles (actual result $occ)" +} + +rif_mac_profile_setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + # Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses + # being generated and RIFs being created. + sysctl_set net.ipv6.conf.$h1.disable_ipv6 1 + sysctl_set net.ipv6.conf.$h2.disable_ipv6 1 + + ip link set $h1 up + ip link set $h2 up +} + +rif_mac_profile_cleanup() +{ + pre_cleanup + + ip link set $h2 down + ip link set $h1 down + + sysctl_restore net.ipv6.conf.$h2.disable_ipv6 + sysctl_restore net.ipv6.conf.$h1.disable_ipv6 +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh new file mode 100755 index 000000000000..c18340cee55d --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh @@ -0,0 +1,213 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + mac_profile_test +" +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 + ip route add 198.51.100.0/24 vrf v$h1 nexthop via 192.0.2.2 + + tc qdisc add dev $h1 ingress +} + +h1_destroy() +{ + tc qdisc del dev $h1 ingress + + ip route del 198.51.100.0/24 vrf v$h1 + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 + ip route add 192.0.2.0/24 vrf v$h2 nexthop via 198.51.100.2 + + tc qdisc add dev $h2 ingress +} + +h2_destroy() +{ + tc qdisc del dev $h2 ingress + + ip route del 192.0.2.0/24 vrf v$h2 + simple_if_fini $h2 198.51.100.1/24 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + tc qdisc add dev $rp1 clsact + tc qdisc add dev $rp2 clsact + ip address add 192.0.2.2/24 dev $rp1 + ip address add 198.51.100.2/24 dev $rp2 +} + +router_destroy() +{ + ip address del 198.51.100.2/24 dev $rp2 + ip address del 192.0.2.2/24 dev $rp1 + tc qdisc del dev $rp2 clsact + tc qdisc del dev $rp1 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +h1_to_h2() +{ + local test_name=$@; shift + local smac=$(mac_get $rp2) + + RET=0 + + # Replace neighbour to avoid first packet being forwarded in software + ip neigh replace dev $rp2 198.51.100.1 lladdr $(mac_get $h2) + + # Add a filter to ensure that packets are forwarded in hardware. Cannot + # match on source MAC because it is not set in eACL after routing + tc filter add dev $rp2 egress proto ip pref 1 handle 101 \ + flower skip_sw ip_proto udp src_port 12345 dst_port 54321 \ + action pass + + # Add a filter to ensure that packets are received with the correct + # source MAC + tc filter add dev $h2 ingress proto ip pref 1 handle 101 \ + flower skip_sw src_mac $smac ip_proto udp src_port 12345 \ + dst_port 54321 action pass + + $MZ $h1 -a own -b $(mac_get $rp1) -t udp "sp=12345,dp=54321" \ + -A 192.0.2.1 -B 198.51.100.1 -c 10 -p 100 -d 1msec -q + + tc_check_packets "dev $rp2 egress" 101 10 + check_err $? "packets not forwarded in hardware" + + tc_check_packets "dev $h2 ingress" 101 10 + check_err $? "packets not forwarded with correct source mac" + + log_test "h1->h2: $test_name" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower + ip neigh del dev $rp2 198.51.100.1 lladdr $(mac_get $h2) +} + +h2_to_h1() +{ + local test_name=$@; shift + local rp1_mac=$(mac_get $rp1) + + RET=0 + + ip neigh replace dev $rp1 192.0.2.1 lladdr $(mac_get $h1) + + tc filter add dev $rp1 egress proto ip pref 1 handle 101 \ + flower skip_sw ip_proto udp src_port 54321 dst_port 12345 \ + action pass + + tc filter add dev $h1 ingress proto ip pref 1 handle 101 \ + flower skip_sw src_mac $rp1_mac ip_proto udp src_port 54321 \ + dst_port 12345 action pass + + $MZ $h2 -a own -b $(mac_get $rp2) -t udp "sp=54321,dp=12345" \ + -A 198.51.100.1 -B 192.0.2.1 -c 10 -p 100 -d 1msec -q + + tc_check_packets "dev $rp1 egress" 101 10 + check_err $? "packets not forwarded in hardware" + + tc_check_packets "dev $h1 ingress" 101 10 + check_err $? "packets not forwarded with correct source mac" + + log_test "h2->h1: $test_name" + + tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $rp1 egress protocol ip pref 1 handle 101 flower + ip neigh del dev $rp1 192.0.2.1 lladdr $(mac_get $h1) +} + +smac_test() +{ + local test_name=$@; shift + + # Test that packets forwarded to $h2 via $rp2 are forwarded with the + # current source MAC of $rp2 + h1_to_h2 $test_name + + # Test that packets forwarded to $h1 via $rp1 are forwarded with the + # current source MAC of $rp1. This MAC is never changed during the test, + # but given the shared nature of MAC profile, the point is to see that + # changes to the MAC of $rp2 do not affect that of $rp1 + h2_to_h1 $test_name +} + +mac_profile_test() +{ + local rp2_mac=$(mac_get $rp2) + + # Test behavior when the RIF backing $rp2 is transitioned to use + # a new MAC profile + ip link set dev $rp2 addr 00:11:22:33:44:55 + smac_test "new mac profile" + + # Test behavior when the MAC profile used by the RIF is edited + ip link set dev $rp2 address 00:22:22:22:22:22 + smac_test "edit mac profile" + + # Restore original MAC + ip link set dev $rp2 addr $rp2_mac +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +mac_profiles=$(devlink_resource_size_get rif_mac_profiles) +if [[ $mac_profiles -ne 1 ]]; then + tests_run +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh new file mode 100755 index 000000000000..026a126f584d --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + rif_mac_profile_edit_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + # Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses + # being generated and RIFs being created + sysctl_set net.ipv6.conf.$h1.disable_ipv6 1 + sysctl_set net.ipv6.conf.$h2.disable_ipv6 1 + + ip link set $h1 up + ip link set $h2 up +} + +cleanup() +{ + pre_cleanup + + ip link set $h2 down + ip link set $h1 down + + sysctl_restore net.ipv6.conf.$h2.disable_ipv6 + sysctl_restore net.ipv6.conf.$h1.disable_ipv6 + + # Reload in order to clean all the RIFs and RIF MAC profiles created + devlink_reload +} + +create_max_rif_mac_profiles() +{ + local count=$1; shift + local batch_file="$(mktemp)" + + for ((i = 1; i <= count; i++)); do + vlan=$(( i*10 )) + m=$(( i*11 )) + + cat >> $batch_file <<-EOF + link add link $h1 name $h1.$vlan \ + address 00:$m:$m:$m:$m:$m type vlan id $vlan + address add 192.0.$m.1/24 dev $h1.$vlan + EOF + done + + ip -b $batch_file &> /dev/null + rm -f $batch_file +} + +rif_mac_profile_replacement_test() +{ + local h1_10_mac=$(mac_get $h1.10) + + RET=0 + + ip link set $h1.10 address 00:12:34:56:78:99 + check_err $? + + log_test "RIF MAC profile replacement" + + ip link set $h1.10 address $h1_10_mac +} + +rif_mac_profile_consolidation_test() +{ + local count=$1; shift + local h1_20_mac + + RET=0 + + if [[ $count -eq 1 ]]; then + return + fi + + h1_20_mac=$(mac_get $h1.20) + + # Set the MAC of $h1.20 to that of $h1.10 and confirm that they are + # using the same MAC profile. + ip link set $h1.20 address 00:11:11:11:11:11 + check_err $? + + occ=$(devlink -j resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]') + + [[ $occ -eq $((count - 1)) ]] + check_err $? "MAC profile occupancy did not decrease" + + log_test "RIF MAC profile consolidation" + + ip link set $h1.20 address $h1_20_mac +} + +rif_mac_profile_shared_replacement_test() +{ + local count=$1; shift + local i=$((count + 1)) + local vlan=$(( i*10 )) + local m=11 + + RET=0 + + # Create a VLAN netdevice that has the same MAC as the first one. + ip link add link $h1 name $h1.$vlan address 00:$m:$m:$m:$m:$m \ + type vlan id $vlan + ip address add 192.0.$m.1/24 dev $h1.$vlan + + # MAC replacement should fail because all the MAC profiles are in use + # and the profile is shared between multiple RIFs + m=$(( i*11 )) + ip link set $h1.$vlan address 00:$m:$m:$m:$m:$m &> /dev/null + check_fail $? + + log_test "RIF MAC profile shared replacement" + + ip link del dev $h1.$vlan +} + +rif_mac_profile_edit_test() +{ + local count=$(devlink_resource_size_get rif_mac_profiles) + + create_max_rif_mac_profiles $count + + rif_mac_profile_replacement_test + rif_mac_profile_consolidation_test $count + rif_mac_profile_shared_replacement_test $count +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh new file mode 100755 index 000000000000..6ce317cfaf9b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test enslavement to LAG with a clean slate. +# See $lib_dir/router_bridge_lag.sh for further details. + +ALL_TESTS=" + config_devlink_reload + config_enslave_h1 + config_enslave_h2 + config_enslave_h3 + config_enslave_h4 + config_enslave_swp1 + config_enslave_swp2 + config_enslave_swp3 + config_enslave_swp4 + config_wait + ping_ipv4 + ping_ipv6 +" + +config_devlink_reload() +{ + log_info "Devlink reload" + devlink_reload +} + +config_enslave_h1() +{ + config_enslave $h1 lag1 +} + +config_enslave_h2() +{ + config_enslave $h2 lag4 +} + +config_enslave_h3() +{ + config_enslave $h3 lag4 +} + +config_enslave_h4() +{ + config_enslave $h4 lag1 +} + +lib_dir=$(dirname $0)/../../../net/forwarding +EXTRA_SOURCE="source $lib_dir/devlink_lib.sh" +source $lib_dir/router_bridge_lag.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh new file mode 100644 index 000000000000..683759d29199 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh @@ -0,0 +1,142 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ROUTER_NUM_NETIFS=4 +: ${TIMEOUT:=20000} # ms + +router_h1_create() +{ + simple_if_init $h1 192.0.1.1/24 +} + +router_h1_destroy() +{ + simple_if_fini $h1 192.0.1.1/24 +} + +router_h2_create() +{ + simple_if_init $h2 192.0.2.1/24 + tc qdisc add dev $h2 handle ffff: ingress +} + +router_h2_destroy() +{ + tc qdisc del dev $h2 handle ffff: ingress + simple_if_fini $h2 192.0.2.1/24 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + ip address add 192.0.1.2/24 dev $rp1 + ip address add 192.0.2.2/24 dev $rp2 +} + +router_destroy() +{ + ip address del 192.0.2.2/24 dev $rp2 + ip address del 192.0.1.2/24 dev $rp1 + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +router_setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + rp1mac=$(mac_get $rp1) + + vrf_prepare + + router_h1_create + router_h2_create + + router_create +} + +wait_for_routes() +{ + local t0=$1; shift + local route_count=$1; shift + + local t1=$(ip route | grep 'offload' | grep -v 'offload_failed' | wc -l) + local delta=$((t1 - t0)) + echo $delta + [[ $delta -ge $route_count ]] +} + +router_routes_create() +{ + local route_count=$1 + local count=0 + + ROUTE_FILE="$(mktemp)" + + for i in {0..255} + do + for j in {0..255} + do + for k in {0..255} + do + if [[ $count -eq $route_count ]]; then + break 3 + fi + + echo route add 193.${i}.${j}.${k}/32 dev $rp2 \ + >> $ROUTE_FILE + ((count++)) + done + done + done + + ip -b $ROUTE_FILE &> /dev/null +} + +router_routes_destroy() +{ + if [[ -v ROUTE_FILE ]]; then + rm -f $ROUTE_FILE + fi +} + +router_test() +{ + local route_count=$1 + local should_fail=$2 + local delta + + RET=0 + + local t0=$(ip route | grep -o 'offload' | wc -l) + router_routes_create $route_count + delta=$(busywait "$TIMEOUT" wait_for_routes $t0 $route_count) + + check_err_fail $should_fail $? "Offload routes: Expected $route_count, got $delta." + if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then + return + fi + + router_routes_destroy +} + +router_cleanup() +{ + pre_cleanup + + router_routes_destroy + router_destroy + + router_h2_destroy + router_h1_destroy + + vrf_cleanup +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh new file mode 100755 index 000000000000..45a569618424 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -0,0 +1,935 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test various interface configuration scenarios. Observe that configurations +# deemed valid by mlxsw succeed, invalid configurations fail and that no traces +# are produced. To prevent the test from passing in case traces are produced, +# the user can set the 'kernel.panic_on_warn' and 'kernel.panic_on_oops' +# sysctls in its environment. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + rif_vrf_set_addr_test + rif_non_inherit_bridge_addr_test + vlan_interface_deletion_test + bridge_deletion_test + bridge_vlan_flags_test + vlan_1_test + duplicate_vlans_test + vlan_rif_refcount_test + subport_rif_refcount_test + subport_rif_lag_join_test + vlan_dev_deletion_test + lag_unlink_slaves_test + lag_dev_deletion_test + vlan_interface_uppers_test + bridge_extern_learn_test + neigh_offload_test + nexthop_offload_test + nexthop_obj_invalid_test + nexthop_obj_offload_test + nexthop_obj_group_offload_test + nexthop_obj_bucket_offload_test + nexthop_obj_blackhole_offload_test + nexthop_obj_route_offload_test + bridge_locked_port_test + devlink_reload_test +" +NUM_NETIFS=2 +: ${TIMEOUT:=20000} # ms +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +rif_vrf_set_addr_test() +{ + # Test that it is possible to set an IP address on a VRF upper despite + # its random MAC address. + RET=0 + + ip link add name vrf-test type vrf table 10 + ip link set dev $swp1 master vrf-test + + ip -4 address add 192.0.2.1/24 dev vrf-test + check_err $? "failed to set IPv4 address on VRF" + ip -6 address add 2001:db8:1::1/64 dev vrf-test + check_err $? "failed to set IPv6 address on VRF" + + log_test "RIF - setting IP address on VRF" + + ip link del dev vrf-test +} + +rif_non_inherit_bridge_addr_test() +{ + local swp2_mac=$(mac_get $swp2) + + RET=0 + + # Create first RIF + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + # Create a FID RIF + ip link add name br1 up type bridge vlan_filtering 0 + ip link set dev br1 addr $swp2_mac + ip link set dev $swp2 master br1 + ip addr add dev br1 192.0.2.17/28 + check_err $? + + # Prepare a device with a low MAC address + ip link add name d up type dummy + ip link set dev d addr 00:11:22:33:44:55 + + # Attach the device to br1. Since the bridge address was set, it should + # work. + ip link set dev d master br1 &>/dev/null + check_err $? "Could not attach a device with low MAC to a bridge with RIF" + + # Port MAC address change should be allowed for a bridge with set MAC. + ip link set dev $swp2 addr 00:11:22:33:44:55 + check_err $? "Changing swp2's MAC address not permitted" + + log_test "RIF - attach port with bad MAC to bridge with set MAC" + + ip link set dev $swp2 addr $swp2_mac + ip link del dev d + ip link del dev br1 + ip addr del dev $swp1 192.0.2.1/28 +} + +vlan_interface_deletion_test() +{ + # Test that when a VLAN interface is deleted, its associated router + # interface (RIF) is correctly deleted and not leaked. See commit + # c360867ec46a ("mlxsw: spectrum: Delete RIF when VLAN device is + # removed") for more details + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + ip link add link br0 name br0.10 type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev br0.10 + ip link del dev br0.10 + + # If we leaked the previous RIF, then this should produce a trace + ip link add link br0 name br0.20 type vlan id 20 + ip -6 address add 2001:db8:1::1/64 dev br0.20 + ip link del dev br0.20 + + log_test "vlan interface deletion" + + ip link del dev br0 +} + +bridge_deletion_test() +{ + # Test that when a bridge with VLAN interfaces is deleted, we correctly + # delete the associated RIFs. See commit 602b74eda813 ("mlxsw: + # spectrum_switchdev: Do not leak RIFs when removing bridge") for more + # details + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + ip -6 address add 2001:db8::1/64 dev br0 + + ip link add link br0 name br0.10 type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev br0.10 + + ip link add link br0 name br0.20 type vlan id 20 + ip -6 address add 2001:db8:2::1/64 dev br0.20 + + ip link del dev br0 + + # If we leaked previous RIFs, then this should produce a trace + ip -6 address add 2001:db8:1::1/64 dev $swp1 + ip -6 address del 2001:db8:1::1/64 dev $swp1 + + log_test "bridge deletion" +} + +bridge_vlan_flags_test() +{ + # Test that when bridge VLAN flags are toggled, we do not take + # unnecessary references on related structs. See commit 9e25826ffc94 + # ("mlxsw: spectrum_switchdev: Fix port_vlan refcounting") for more + # details + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + bridge vlan add vid 10 dev $swp1 pvid untagged + bridge vlan add vid 10 dev $swp1 untagged + bridge vlan add vid 10 dev $swp1 pvid + bridge vlan add vid 10 dev $swp1 + ip link del dev br0 + + # If we did not handle references correctly, then this should produce a + # trace + devlink_reload + + log_test "bridge vlan flags" +} + +vlan_1_test() +{ + # Test that VLAN 1 can be configured over mlxsw ports. In the past it + # was used internally for untagged traffic. See commit 47bf9df2e820 + # ("mlxsw: spectrum: Forbid creation of VLAN 1 over port/LAG") for more + # details + RET=0 + + ip link add link $swp1 name $swp1.1 type vlan id 1 + check_err $? "did not manage to create vlan 1 when should" + + log_test "vlan 1" + + ip link del dev $swp1.1 +} + +duplicate_vlans_test() +{ + # Test that on a given port a VLAN is only used once. Either as VLAN + # in a VLAN-aware bridge or as a VLAN device + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + bridge vlan add vid 10 dev $swp1 + + ip link add link $swp1 name $swp1.10 type vlan id 10 &> /dev/null + check_fail $? "managed to create vlan device when should not" + + bridge vlan del vid 10 dev $swp1 + ip link add link $swp1 name $swp1.10 type vlan id 10 + check_err $? "did not manage to create vlan device when should" + bridge vlan add vid 10 dev $swp1 &> /dev/null + check_fail $? "managed to add bridge vlan when should not" + + log_test "duplicate vlans" + + ip link del dev $swp1.10 + ip link del dev br0 +} + +vlan_rif_refcount_test() +{ + # Test that RIFs representing VLAN interfaces are not affected from + # ports member in the VLAN. We use the offload indication on routes + # configured on the RIF to understand if it was created / destroyed + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + ip link set dev $swp1 up + ip link set dev br0 up + + ip link add link br0 name br0.10 up type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev br0.10 + + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 + check_err $? "vlan rif was not created before adding port to vlan" + + bridge vlan add vid 10 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 + check_err $? "vlan rif was destroyed after adding port to vlan" + + bridge vlan del vid 10 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 + check_err $? "vlan rif was destroyed after removing port from vlan" + + ip link set dev $swp1 nomaster + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 + check_err $? "vlan rif was not destroyed after unlinking port from bridge" + + log_test "vlan rif refcount" + + ip link del dev br0.10 + ip link set dev $swp1 down + ip link del dev br0 +} + +subport_rif_refcount_test() +{ + # Test that RIFs representing upper devices of physical ports are + # reference counted correctly and destroyed when should. We use the + # offload indication on routes configured on the RIF to understand if + # it was created / destroyed + RET=0 + + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link set dev $swp1 master bond1 + ip link set dev $swp2 master bond1 + + ip link set dev bond1 up + ip link add link bond1 name bond1.10 up type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev bond1 + ip -6 address add 2001:db8:2::1/64 dev bond1.10 + + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif was not created on lag device" + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 + check_err $? "subport rif was not created on vlan device" + + ip link set dev $swp1 nomaster + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif of lag device was destroyed when should not" + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 + check_err $? "subport rif of vlan device was destroyed when should not" + + ip link set dev $swp2 nomaster + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif of lag device was not destroyed when should" + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 + check_err $? "subport rif of vlan device was not destroyed when should" + + log_test "subport rif refcount" + + ip link del dev bond1.10 + ip link del dev bond1 +} + +subport_rif_lag_join_test() +{ + # Test that the reference count of a RIF configured for a LAG is + # incremented / decremented when ports join / leave the LAG. We use the + # offload indication on routes configured on the RIF to understand if + # it was created / destroyed + RET=0 + + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link set dev $swp1 master bond1 + ip link set dev $swp2 master bond1 + + ip link set dev bond1 up + ip -6 address add 2001:db8:1::1/64 dev bond1 + + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif was not created on lag device" + + ip link set dev $swp1 nomaster + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif of lag device was destroyed after removing one port" + + ip link set dev $swp1 master bond1 + ip link set dev $swp2 nomaster + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif of lag device was destroyed after re-adding a port and removing another" + + ip link set dev $swp1 nomaster + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 + check_err $? "subport rif of lag device was not destroyed when should" + + log_test "subport rif lag join" + + ip link del dev bond1 +} + +vlan_dev_deletion_test() +{ + # Test that VLAN devices are correctly deleted / unlinked when enslaved + # to bridge + RET=0 + + ip link add name br10 type bridge + ip link add name br20 type bridge + ip link add name br30 type bridge + ip link add link $swp1 name $swp1.10 type vlan id 10 + ip link add link $swp1 name $swp1.20 type vlan id 20 + ip link add link $swp1 name $swp1.30 type vlan id 30 + ip link set dev $swp1.10 master br10 + ip link set dev $swp1.20 master br20 + ip link set dev $swp1.30 master br30 + + # If we did not handle the situation correctly, then these operations + # might produce a trace + ip link set dev $swp1.30 nomaster + ip link del dev $swp1.20 + # Deletion via ioctl uses different code paths from netlink + vconfig rem $swp1.10 &> /dev/null + + log_test "vlan device deletion" + + ip link del dev $swp1.30 + ip link del dev br30 + ip link del dev br20 + ip link del dev br10 +} + +lag_create() +{ + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link set dev $swp1 master bond1 + ip link set dev $swp2 master bond1 + + ip link add link bond1 name bond1.10 type vlan id 10 + ip link add link bond1 name bond1.20 type vlan id 20 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev bond1 master br0 + + ip link add name br10 type bridge + ip link set dev bond1.10 master br10 + + ip link add name br20 type bridge + ip link set dev bond1.20 master br20 +} + +lag_unlink_slaves_test() +{ + # Test that ports are correctly unlinked from their LAG master, when + # the LAG and its VLAN uppers are enslaved to bridges + RET=0 + + lag_create + + ip link set dev $swp1 nomaster + check_err $? "lag slave $swp1 was not unlinked from master" + ip link set dev $swp2 nomaster + check_err $? "lag slave $swp2 was not unlinked from master" + + # Try to configure corresponding VLANs as router interfaces + ip -6 address add 2001:db8:1::1/64 dev $swp1 + check_err $? "failed to configure ip address on $swp1" + + ip link add link $swp1 name $swp1.10 type vlan id 10 + ip -6 address add 2001:db8:10::1/64 dev $swp1.10 + check_err $? "failed to configure ip address on $swp1.10" + + ip link add link $swp1 name $swp1.20 type vlan id 20 + ip -6 address add 2001:db8:20::1/64 dev $swp1.20 + check_err $? "failed to configure ip address on $swp1.20" + + log_test "lag slaves unlinking" + + ip link del dev $swp1.20 + ip link del dev $swp1.10 + ip address flush dev $swp1 + + ip link del dev br20 + ip link del dev br10 + ip link del dev br0 + ip link del dev bond1 +} + +lag_dev_deletion_test() +{ + # Test that LAG device is correctly deleted, when the LAG and its VLAN + # uppers are enslaved to bridges + RET=0 + + lag_create + + ip link del dev bond1 + + log_test "lag device deletion" + + ip link del dev br20 + ip link del dev br10 + ip link del dev br0 +} + +vlan_interface_uppers_test() +{ + # Test that uppers of a VLAN interface are correctly sanitized + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + ip link add link br0 name br0.10 type vlan id 10 + + ip -6 address add 2001:db8:1::1/64 dev br0.10 + ip link add link br0.10 name macvlan0 type macvlan mode private + check_err $? "did not manage to create a macvlan when should" + + ip link del dev macvlan0 + + ip link add name vrf-test type vrf table 10 + ip link set dev br0.10 master vrf-test + check_err $? "did not manage to enslave vlan interface to vrf" + ip link del dev vrf-test + + ip link add name br-test type bridge + ip link set dev br0.10 master br-test &> /dev/null + check_fail $? "managed to enslave vlan interface to bridge when should not" + ip link del dev br-test + + log_test "vlan interface uppers" + + ip link del dev br0 +} + +bridge_extern_learn_test() +{ + # Test that externally learned entries added from user space are + # marked as offloaded + RET=0 + + ip link add name br0 type bridge + ip link set dev $swp1 master br0 + + bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn + + busywait "$TIMEOUT" wait_for_offload \ + bridge fdb show brport $swp1 de:ad:be:ef:13:37 + check_err $? "fdb entry not marked as offloaded when should" + + log_test "externally learned fdb entry" + + ip link del dev br0 +} + +neigh_offload_test() +{ + # Test that IPv4 and IPv6 neighbour entries are marked as offloaded + RET=0 + + ip -4 address add 192.0.2.1/24 dev $swp1 + ip -6 address add 2001:db8:1::1/64 dev $swp1 + + ip -4 neigh add 192.0.2.2 lladdr de:ad:be:ef:13:37 nud perm dev $swp1 + ip -6 neigh add 2001:db8:1::2 lladdr de:ad:be:ef:13:37 nud perm \ + dev $swp1 + + busywait "$TIMEOUT" wait_for_offload \ + ip -4 neigh show dev $swp1 192.0.2.2 + check_err $? "ipv4 neigh entry not marked as offloaded when should" + busywait "$TIMEOUT" wait_for_offload \ + ip -6 neigh show dev $swp1 2001:db8:1::2 + check_err $? "ipv6 neigh entry not marked as offloaded when should" + + log_test "neighbour offload indication" + + ip -6 neigh del 2001:db8:1::2 dev $swp1 + ip -4 neigh del 192.0.2.2 dev $swp1 + ip -6 address del 2001:db8:1::1/64 dev $swp1 + ip -4 address del 192.0.2.1/24 dev $swp1 +} + +nexthop_offload_test() +{ + # Test that IPv4 and IPv6 nexthops are marked as offloaded + RET=0 + + sysctl_set net.ipv6.conf.$swp2.keep_addr_on_down 1 + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 192.0.2.2/24 2001:db8:1::2/64 + setup_wait + + ip -4 route add 198.51.100.0/24 vrf v$swp1 \ + nexthop via 192.0.2.2 dev $swp1 + ip -6 route add 2001:db8:2::/64 vrf v$swp1 \ + nexthop via 2001:db8:1::2 dev $swp1 + + busywait "$TIMEOUT" wait_for_offload \ + ip -4 route show 198.51.100.0/24 vrf v$swp1 + check_err $? "ipv4 nexthop not marked as offloaded when should" + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route show 2001:db8:2::/64 vrf v$swp1 + check_err $? "ipv6 nexthop not marked as offloaded when should" + + ip link set dev $swp2 down + sleep 1 + + busywait "$TIMEOUT" not wait_for_offload \ + ip -4 route show 198.51.100.0/24 vrf v$swp1 + check_err $? "ipv4 nexthop marked as offloaded when should not" + busywait "$TIMEOUT" not wait_for_offload \ + ip -6 route show 2001:db8:2::/64 vrf v$swp1 + check_err $? "ipv6 nexthop marked as offloaded when should not" + + ip link set dev $swp2 up + setup_wait + + busywait "$TIMEOUT" wait_for_offload \ + ip -4 route show 198.51.100.0/24 vrf v$swp1 + check_err $? "ipv4 nexthop not marked as offloaded after neigh add" + busywait "$TIMEOUT" wait_for_offload \ + ip -6 route show 2001:db8:2::/64 vrf v$swp1 + check_err $? "ipv6 nexthop not marked as offloaded after neigh add" + + log_test "nexthop offload indication" + + ip -6 route del 2001:db8:2::/64 vrf v$swp1 + ip -4 route del 198.51.100.0/24 vrf v$swp1 + + simple_if_fini $swp2 192.0.2.2/24 2001:db8:1::2/64 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 + sysctl_restore net.ipv6.conf.$swp2.keep_addr_on_down +} + +nexthop_obj_invalid_test() +{ + # Test that invalid nexthop object configurations are rejected + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 192.0.2.2/24 2001:db8:1::2/64 + setup_wait + + ip nexthop add id 1 via 192.0.2.3 fdb + check_fail $? "managed to configure an FDB nexthop when should not" + + ip nexthop add id 1 encap mpls 200/300 via 192.0.2.3 dev $swp1 + check_fail $? "managed to configure a nexthop with MPLS encap when should not" + + ip nexthop add id 1 dev $swp1 + ip nexthop add id 2 dev $swp1 + ip nexthop add id 3 via 192.0.2.3 dev $swp1 + ip nexthop add id 10 group 1/2 + check_fail $? "managed to configure a nexthop group with device-only nexthops when should not" + + ip nexthop add id 10 group 3 type resilient buckets 7 + check_fail $? "managed to configure a too small resilient nexthop group when should not" + + ip nexthop add id 10 group 3 type resilient buckets 129 + check_fail $? "managed to configure a resilient nexthop group with invalid number of buckets when should not" + + ip nexthop add id 10 group 1/2 type resilient buckets 32 + check_fail $? "managed to configure a resilient nexthop group with device-only nexthops when should not" + + ip nexthop add id 10 group 3 type resilient buckets 32 + check_err $? "failed to configure a valid resilient nexthop group" + ip nexthop replace id 3 dev $swp1 + check_fail $? "managed to populate a nexthop bucket with a device-only nexthop when should not" + + log_test "nexthop objects - invalid configurations" + + ip nexthop del id 10 + ip nexthop del id 3 + ip nexthop del id 2 + ip nexthop del id 1 + + simple_if_fini $swp2 192.0.2.2/24 2001:db8:1::2/64 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + +nexthop_obj_offload_test() +{ + # Test offload indication of nexthop objects + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ + dev $swp1 + + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop not marked as offloaded when should" + + ip neigh replace 192.0.2.2 nud failed dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop marked as offloaded after setting neigh to failed state" + + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ + dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop not marked as offloaded after neigh replace" + + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop marked as offloaded after replacing to use an invalid address" + + ip nexthop replace id 1 via 192.0.2.2 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "nexthop not marked as offloaded after replacing to use a valid address" + + log_test "nexthop objects offload indication" + + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + +nexthop_obj_group_offload_test() +{ + # Test offload indication of nexthop group objects + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip nexthop add id 2 via 2001:db8:1::2 dev $swp1 + ip nexthop add id 10 group 1/2 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ + dev $swp1 + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \ + dev $swp1 + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \ + dev $swp1 + + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "IPv4 nexthop not marked as offloaded when should" + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 2 + check_err $? "IPv6 nexthop not marked as offloaded when should" + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 10 + check_err $? "nexthop group not marked as offloaded when should" + + # Invalidate nexthop id 1 + ip neigh replace 192.0.2.2 nud failed dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip nexthop show id 10 + check_fail $? "nexthop group not marked as offloaded with one valid nexthop" + + # Invalidate nexthop id 2 + ip neigh replace 2001:db8:1::2 nud failed dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip nexthop show id 10 + check_err $? "nexthop group marked as offloaded when should not" + + # Revalidate nexthop id 1 + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 10 + check_err $? "nexthop group not marked as offloaded after revalidating nexthop" + + log_test "nexthop group objects offload indication" + + ip neigh del 2001:db8:1::2 dev $swp1 + ip neigh del 192.0.2.3 dev $swp1 + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 10 + ip nexthop del id 2 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + +nexthop_obj_bucket_offload_test() +{ + # Test offload indication of nexthop buckets + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip nexthop add id 2 via 2001:db8:1::2 dev $swp1 + ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ + dev $swp1 + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \ + dev $swp1 + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \ + dev $swp1 + + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 1 + check_err $? "IPv4 nexthop buckets not marked as offloaded when should" + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 2 + check_err $? "IPv6 nexthop buckets not marked as offloaded when should" + + # Invalidate nexthop id 1 + ip neigh replace 192.0.2.2 nud failed dev $swp1 + busywait "$TIMEOUT" wait_for_trap \ + ip nexthop bucket show nhid 1 + check_err $? "IPv4 nexthop buckets not marked with trap when should" + + # Invalidate nexthop id 2 + ip neigh replace 2001:db8:1::2 nud failed dev $swp1 + busywait "$TIMEOUT" wait_for_trap \ + ip nexthop bucket show nhid 2 + check_err $? "IPv6 nexthop buckets not marked with trap when should" + + # Revalidate nexthop id 1 by changing its configuration + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 1 + check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop" + + # Revalidate nexthop id 2 by changing its neighbour + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \ + dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 2 + check_err $? "nexthop bucket not marked as offloaded after revalidating neighbour" + + log_test "nexthop bucket offload indication" + + ip neigh del 2001:db8:1::2 dev $swp1 + ip neigh del 192.0.2.3 dev $swp1 + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 10 + ip nexthop del id 2 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + +nexthop_obj_blackhole_offload_test() +{ + # Test offload indication of blackhole nexthop objects + RET=0 + + ip nexthop add id 1 blackhole + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 1 + check_err $? "Blackhole nexthop not marked as offloaded when should" + + ip nexthop add id 10 group 1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop show id 10 + check_err $? "Nexthop group not marked as offloaded when should" + + log_test "blackhole nexthop objects offload indication" + + ip nexthop del id 10 + ip nexthop del id 1 +} + +nexthop_obj_route_offload_test() +{ + # Test offload indication of routes using nexthop objects + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \ + dev $swp1 + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \ + dev $swp1 + + ip route replace 198.51.100.0/24 nhid 1 + busywait "$TIMEOUT" wait_for_offload \ + ip route show 198.51.100.0/24 + check_err $? "route not marked as offloaded when using valid nexthop" + + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip route show 198.51.100.0/24 + check_err $? "route not marked as offloaded after replacing valid nexthop with a valid one" + + ip nexthop replace id 1 via 192.0.2.4 dev $swp1 + busywait "$TIMEOUT" not wait_for_offload \ + ip route show 198.51.100.0/24 + check_err $? "route marked as offloaded after replacing valid nexthop with an invalid one" + + ip nexthop replace id 1 via 192.0.2.2 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip route show 198.51.100.0/24 + check_err $? "route not marked as offloaded after replacing invalid nexthop with a valid one" + + log_test "routes using nexthop objects offload indication" + + ip route del 198.51.100.0/24 + ip neigh del 192.0.2.3 dev $swp1 + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + +bridge_locked_port_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 0 + + ip link add link $swp1 name $swp1.10 type vlan id 10 + ip link set dev $swp1.10 master br1 + + bridge link set dev $swp1.10 locked on + check_fail $? "managed to set locked flag on a VLAN upper" + + ip link set dev $swp1.10 nomaster + ip link set dev $swp1 master br1 + + bridge link set dev $swp1 locked on + check_fail $? "managed to set locked flag on a bridge port that has a VLAN upper" + + ip link del dev $swp1.10 + bridge link set dev $swp1 locked on + + ip link add link $swp1 name $swp1.10 type vlan id 10 + check_fail $? "managed to configure a VLAN upper on a locked port" + + log_test "bridge locked port" + + ip link del dev $swp1.10 &> /dev/null + ip link del dev br1 +} + +devlink_reload_test() +{ + # Test that after executing all the above configuration tests, a + # devlink reload can be performed without errors + RET=0 + + devlink_reload + + log_test "devlink reload - last test" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh new file mode 100755 index 000000000000..4aaceb6b2b60 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A driver for the ETS selftest that implements testing in offloaded datapath. +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/sch_ets_core.sh +source $lib_dir/devlink_lib.sh + +ALL_TESTS=" + ping_ipv4 + priomap_mode + ets_test_strict + ets_test_mixed + ets_test_dwrr +" + +PARENT="parent 3:3" + +switch_create() +{ + # Create a bottleneck so that the DWRR process can kick in. + tc qdisc replace dev $swp2 root handle 3: tbf rate 1gbit \ + burst 128K limit 1G + defer tc qdisc del dev $swp2 root handle 3: + + ets_switch_create + + # Set the ingress quota high and use the three egress TCs to limit the + # amount of traffic that is admitted to the shared buffers. This makes + # sure that there is always enough traffic of all types to select from + # for the DWRR process. + devlink_port_pool_th_save $swp1 0 + devlink_port_pool_th_set $swp1 0 12 + defer devlink_port_pool_th_restore $swp1 0 + + devlink_tc_bind_pool_th_save $swp1 0 ingress + devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12 + defer devlink_tc_bind_pool_th_restore $swp1 0 ingress + + devlink_port_pool_th_save $swp2 4 + devlink_port_pool_th_set $swp2 4 12 + defer devlink_port_pool_th_restore $swp2 4 + + devlink_tc_bind_pool_th_save $swp2 7 egress + devlink_tc_bind_pool_th_set $swp2 7 egress 4 5 + defer devlink_tc_bind_pool_th_restore $swp2 7 egress + + devlink_tc_bind_pool_th_save $swp2 6 egress + devlink_tc_bind_pool_th_set $swp2 6 egress 4 5 + defer devlink_tc_bind_pool_th_restore $swp2 6 egress + + devlink_tc_bind_pool_th_save $swp2 5 egress + devlink_tc_bind_pool_th_set $swp2 5 egress 4 5 + defer devlink_tc_bind_pool_th_restore $swp2 5 egress + + # Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet + # priorities at $swp1 based on their 802.1p headers. ingress-qos-map is + # not offloaded by mlxsw as of this writing, but the mapping used is + # 1:1, which is the mapping currently hard-coded by the driver. +} + +# Callback from sch_ets_tests.sh +collect_stats() +{ + local -a streams=("$@") + local stream + + # Wait for qdisc counter update so that we don't get it mid-way through. + busywait_for_counter 1000 +1 \ + qdisc_parent_stats_get $swp2 10:$((${streams[0]} + 1)) .bytes \ + > /dev/null + + for stream in ${streams[@]}; do + qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes + done +} + +bail_on_lldpad "configure DCB" "configure Qdiscs" +ets_run diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh new file mode 100755 index 000000000000..071a33d10c20 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh @@ -0,0 +1,290 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test qdisc offload indication + + +ALL_TESTS=" + test_root + test_port_tbf + test_etsprio + test_etsprio_port_tbf +" +NUM_NETIFS=1 +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/lib.sh + +check_not_offloaded() +{ + local handle=$1; shift + local h + local offloaded + + h=$(qdisc_stats_get $h1 "$handle" .handle) + [[ $h == '"'$handle'"' ]] + check_err $? "Qdisc with handle $handle does not exist" + + offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded) + [[ $offloaded == true ]] + check_fail $? "Qdisc with handle $handle offloaded, but should not be" +} + +check_all_offloaded() +{ + local handle=$1; shift + + if [[ ! -z $handle ]]; then + local offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded) + [[ $offloaded == true ]] + check_err $? "Qdisc with handle $handle not offloaded" + fi + + local unoffloaded=$(tc q sh dev $h1 invisible | + grep -v offloaded | + sed s/root/parent\ root/ | + cut -d' ' -f 5) + [[ -z $unoffloaded ]] + check_err $? "Qdiscs with following parents not offloaded: $unoffloaded" + + pre_cleanup +} + +with_ets() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle \ + ets bands 8 priomap 7 6 5 4 3 2 1 0 + "$@" + tc qdisc del dev $h1 $locus +} + +with_prio() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle \ + prio bands 8 priomap 7 6 5 4 3 2 1 0 + "$@" + tc qdisc del dev $h1 $locus +} + +with_red() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle \ + red limit 1000000 min 200000 max 300000 probability 0.5 avpkt 1500 + "$@" + tc qdisc del dev $h1 $locus +} + +with_tbf() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle \ + tbf rate 400Mbit burst 128K limit 1M + "$@" + tc qdisc del dev $h1 $locus +} + +with_pfifo() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle pfifo limit 100K + "$@" + tc qdisc del dev $h1 $locus +} + +with_bfifo() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle bfifo limit 100K + "$@" + tc qdisc del dev $h1 $locus +} + +with_drr() +{ + local handle=$1; shift + local locus=$1; shift + + tc qdisc add dev $h1 $locus handle $handle drr + "$@" + tc qdisc del dev $h1 $locus +} + +with_qdiscs() +{ + local handle=$1; shift + local parent=$1; shift + local kind=$1; shift + local next_handle=$((handle * 2)) + local locus; + + if [[ $kind == "--" ]]; then + local cmd=$1; shift + $cmd $(printf %x: $parent) "$@" + else + if ((parent == 0)); then + locus=root + else + locus=$(printf "parent %x:1" $parent) + fi + + with_$kind $(printf %x: $handle) "$locus" \ + with_qdiscs $next_handle $handle "$@" + fi +} + +get_name() +{ + local parent=$1; shift + local name=$(echo "" "${@^^}" | tr ' ' -) + + if ((parent != 0)); then + kind=$(qdisc_stats_get $h1 $parent: .kind) + kind=${kind%\"} + kind=${kind#\"} + name="-${kind^^}$name" + fi + + echo root$name +} + +do_test_offloaded() +{ + local handle=$1; shift + local parent=$1; shift + + RET=0 + with_qdiscs $handle $parent "$@" -- check_all_offloaded + log_test $(get_name $parent "$@")" offloaded" +} + +do_test_nooffload() +{ + local handle=$1; shift + local parent=$1; shift + + local name=$(echo "${@^^}" | tr ' ' -) + local kind + + RET=0 + with_qdiscs $handle $parent "$@" -- check_not_offloaded + log_test $(get_name $parent "$@")" not offloaded" +} + +do_test_combinations() +{ + local handle=$1; shift + local parent=$1; shift + + local cont + local leaf + local fifo + + for cont in "" ets prio; do + for leaf in "" red tbf "red tbf" "tbf red"; do + for fifo in "" pfifo bfifo; do + if [[ -z "$cont$leaf$fifo" ]]; then + continue + fi + do_test_offloaded $handle $parent \ + $cont $leaf $fifo + done + done + done + + for cont in ets prio; do + for leaf in red tbf; do + do_test_nooffload $handle $parent $cont red tbf $leaf + do_test_nooffload $handle $parent $cont tbf red $leaf + done + for leaf in "red red" "tbf tbf"; do + do_test_nooffload $handle $parent $cont $leaf + done + done + + do_test_nooffload $handle $parent drr +} + +test_root() +{ + do_test_combinations 1 0 +} + +test_port_tbf() +{ + with_tbf 1: root \ + do_test_combinations 8 1 +} + +do_test_etsprio() +{ + local parent=$1; shift + local tbfpfx=$1; shift + local cont + + for cont in ets prio; do + RET=0 + with_$cont 8: "$parent" \ + with_red 11: "parent 8:1" \ + with_red 12: "parent 8:2" \ + with_tbf 13: "parent 8:3" \ + with_tbf 14: "parent 8:4" \ + check_all_offloaded + log_test "root$tbfpfx-ETS-{RED,TBF} offloaded" + + RET=0 + with_$cont 8: "$parent" \ + with_red 81: "parent 8:1" \ + with_tbf 811: "parent 81:1" \ + with_tbf 84: "parent 8:4" \ + with_red 841: "parent 84:1" \ + check_all_offloaded + log_test "root$tbfpfx-ETS-{RED-TBF,TBF-RED} offloaded" + + RET=0 + with_$cont 8: "$parent" \ + with_red 81: "parent 8:1" \ + with_tbf 811: "parent 81:1" \ + with_bfifo 8111: "parent 811:1" \ + with_tbf 82: "parent 8:2" \ + with_red 821: "parent 82:1" \ + with_bfifo 8211: "parent 821:1" \ + check_all_offloaded + log_test "root$tbfpfx-ETS-{RED-TBF-bFIFO,TBF-RED-bFIFO} offloaded" + done +} + +test_etsprio() +{ + do_test_etsprio root "" +} + +test_etsprio_port_tbf() +{ + with_tbf 1: root \ + do_test_etsprio "parent 1:1" "-TBF" +} + +cleanup() +{ + tc qdisc del dev $h1 root &>/dev/null +} + +trap cleanup EXIT +h1=${NETIFS[p1]} +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh new file mode 100644 index 000000000000..47d2ffcf366e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -0,0 +1,761 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This test sends a >1Gbps stream of traffic from H1, to the switch, which +# forwards it to a 1Gbps port. This 1Gbps stream is then looped back to the +# switch and forwarded to the port under test $swp3, which is also 1Gbps. +# +# This way, $swp3 should be 100% filled with traffic without any of it spilling +# to the backlog. Any extra packets sent should almost 1:1 go to backlog. That +# is what H2 is used for--it sends the extra traffic to create backlog. +# +# A RED Qdisc is installed on $swp3. The configuration is such that the minimum +# and maximum size are 1 byte apart, so there is a very clear border under which +# no marking or dropping takes place, and above which everything is marked or +# dropped. +# +# The test uses the buffer build-up behavior to test the installed RED. +# +# In order to test WRED, $swp3 actually contains RED under PRIO, with two +# different configurations. Traffic is prioritized using 802.1p and relies on +# the implicit mlxsw configuration, where packet priority is taken 1:1 from the +# 802.1p marking. +# +# +--------------------------+ +--------------------------+ +# | H1 | | H2 | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | | | | | +# | | $h1.11 + | | | $h2.11 + | +# | | 192.0.2.17/28 | | | | 192.0.2.18/28 | | +# | | | | | | | | +# | \______ ______/ | | \______ ______/ | +# | \ / | | \ / | +# | + $h1 | | + $h2 | +# +-------------|------------+ +-------------|------------+ +# | >1Gbps | +# +-------------|------------------------------------------------|------------+ +# | SW + $swp1 + $swp2 | +# | _______/ \___________ ___________/ \_______ | +# | / \ / \ | +# | +-|-----------------+ | +-|-----------------+ | | +# | | + $swp1.10 | | | + $swp2.10 | | | +# | | | | .-------------+ $swp5.10 | | | +# | | BR1_10 | | | | | | | +# | | | | | | BR2_10 | | | +# | | + $swp2.10 | | | | | | | +# | +-|-----------------+ | | | + $swp3.10 | | | +# | | | | +-|-----------------+ | | +# | | +-----------------|-+ | | +-----------------|-+ | +# | | | $swp1.11 + | | | | $swp2.11 + | | +# | | | | | .-----------------+ $swp5.11 | | +# | | | BR1_11 | | | | | | | +# | | | | | | | | BR2_11 | | +# | | | $swp2.11 + | | | | | | | +# | | +-----------------|-+ | | | | $swp3.11 + | | +# | | | | | | +-----------------|-+ | +# | \_______ ___________/ | | \___________ _______/ | +# | \ / \ / \ / | +# | + $swp4 + $swp5 + $swp3 | +# +-------------|----------------------|-------------------------|------------+ +# | | | 1Gbps +# \________1Gbps_________/ | +# +----------------------------|------------+ +# | H3 + $h3 | +# | _____________________/ \_______ | +# | / \ | +# | | | | +# | + $h3.10 $h3.11 + | +# | 192.0.2.3/28 192.0.2.19/28 | +# +-----------------------------------------+ + +NUM_NETIFS=8 +CHECK_TC="yes" +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source mlxsw_lib.sh + +stop_traffic_sleep() +{ + local pid=$1; shift + + # Issuing a kill still leaves a bunch of packets lingering in the + # buffers. This traffic then arrives at the point where a follow-up test + # is already running, and can confuse the test. Therefore sleep after + # stopping traffic to flush any leftover packets. + stop_traffic "$pid" + sleep 1 +} + +ipaddr() +{ + local host=$1; shift + local vlan=$1; shift + + echo 192.0.2.$((16 * (vlan - 10) + host)) +} + +host_create() +{ + local dev=$1; shift + local host=$1; shift + + adf_simple_if_init $dev + + mtu_set $dev 10000 + defer mtu_restore $dev + + vlan_create $dev 10 v$dev $(ipaddr $host 10)/28 + defer vlan_destroy $dev 10 + ip link set dev $dev.10 type vlan egress 0:0 + + vlan_create $dev 11 v$dev $(ipaddr $host 11)/28 + defer vlan_destroy $dev 11 + ip link set dev $dev.11 type vlan egress 0:1 +} + +h1_create() +{ + host_create $h1 1 +} + +h2_create() +{ + host_create $h2 2 + + tc qdisc add dev $h2 clsact + defer tc qdisc del dev $h2 clsact + + # Some of the tests in this suite use multicast traffic. As this traffic + # enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus + # e.g. traffic ingressing through $swp2 is flooded to $swp3 (the + # intended destination) and $swp5 (which is intended as ingress for + # another stream of traffic). + # + # This is generally not a problem, but if the $swp5 throughput is lower + # than $swp2 throughput, there will be a build-up at $swp5. That may + # cause packets to fail to queue up at $swp3 due to shared buffer + # quotas, and the test to spuriously fail. + # + # Prevent this by adding a shaper which limits the traffic in $h2 to + # 1Gbps. + + tc qdisc replace dev $h2 root handle 10: tbf rate 200mbit \ + burst 128K limit 1G + defer tc qdisc del dev $h2 root handle 10: +} + +h3_create() +{ + host_create $h3 3 +} + +switch_create() +{ + local intf + local vlan + + ip link add dev br1_10 type bridge + defer ip link del dev br1_10 + + ip link add dev br1_11 type bridge + defer ip link del dev br1_11 + + ip link add dev br2_10 type bridge + defer ip link del dev br2_10 + + ip link add dev br2_11 type bridge + defer ip link del dev br2_11 + + for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do + ip link set dev $intf up + defer ip link set dev $intf down + + mtu_set $intf 10000 + defer mtu_restore $intf + done + + for intf in $swp1 $swp4; do + for vlan in 10 11; do + vlan_create $intf $vlan + defer vlan_destroy $intf $vlan + + ip link set dev $intf.$vlan master br1_$vlan + defer ip link set dev $intf.$vlan nomaster + + ip link set dev $intf.$vlan up + defer ip link set dev $intf.$vlan up + done + done + + for intf in $swp2 $swp3 $swp5; do + for vlan in 10 11; do + vlan_create $intf $vlan + defer vlan_destroy $intf $vlan + + ip link set dev $intf.$vlan master br2_$vlan + defer ip link set dev $intf.$vlan nomaster + + ip link set dev $intf.$vlan up + defer ip link set dev $intf.$vlan up + done + done + + ip link set dev $swp4.10 type vlan egress 0:0 + ip link set dev $swp4.11 type vlan egress 0:1 + for intf in $swp1 $swp2 $swp5; do + for vlan in 10 11; do + ip link set dev $intf.$vlan type vlan ingress 0:0 1:1 + done + done + + for intf in $swp3 $swp4; do + tc qdisc replace dev $intf root handle 1: tbf rate 200mbit \ + burst 128K limit 1G + defer tc qdisc del dev $intf root handle 1: + done + + ip link set dev br1_10 up + defer ip link set dev br1_10 down + + ip link set dev br1_11 up + defer ip link set dev br1_11 down + + ip link set dev br2_10 up + defer ip link set dev br2_10 down + + ip link set dev br2_11 up + defer ip link set dev br2_11 down + + local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1) + devlink_port_pool_th_save $swp3 8 + devlink_port_pool_th_set $swp3 8 $size + defer devlink_port_pool_th_restore $swp3 8 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + swp4=${NETIFS[p7]} + swp5=${NETIFS[p8]} + + h3_mac=$(mac_get $h3) + + adf_vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +ping_ipv4() +{ + ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10" + ping_test $h1.11 $(ipaddr 3 11) " from host 1, vlan 11" + ping_test $h2.10 $(ipaddr 3 10) " from host 2, vlan 10" + ping_test $h2.11 $(ipaddr 3 11) " from host 2, vlan 11" +} + +get_tc() +{ + local vlan=$1; shift + + echo $((vlan - 10)) +} + +get_qdisc_handle() +{ + local vlan=$1; shift + + local tc=$(get_tc $vlan) + local band=$((8 - tc)) + + # Handle is 107: for TC1, 108: for TC0. + echo "10$band:" +} + +get_qdisc_backlog() +{ + local vlan=$1; shift + + qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .backlog +} + +get_mc_transmit_queue() +{ + local vlan=$1; shift + + local tc=$(($(get_tc $vlan) + 8)) + ethtool_stats_get $swp3 tc_transmit_queue_tc_$tc +} + +get_nmarked() +{ + local vlan=$1; shift + + ethtool_stats_get $swp3 ecn_marked +} + +get_qdisc_nmarked() +{ + local vlan=$1; shift + + busywait_for_counter 1100 +1 \ + qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .marked +} + +get_qdisc_npackets() +{ + local vlan=$1; shift + + busywait_for_counter 1100 +1 \ + qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets +} + +send_packets() +{ + local vlan=$1; shift + local proto=$1; shift + local pkts=$1; shift + + $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \ + -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \ + -t $proto -q -c $pkts "$@" +} + +# This sends traffic in an attempt to build a backlog of $size. Returns 0 on +# success. After 10 failed attempts it bails out and returns 1. It dumps the +# backlog size to stdout. +build_backlog() +{ + local vlan=$1; shift + local size=$1; shift + local proto=$1; shift + + local tc=$((vlan - 10)) + local band=$((8 - tc)) + local cur=-1 + local i=0 + + while :; do + sleep 1 + local cur=$(busywait 1100 until_counter_is "> $cur" \ + get_qdisc_backlog $vlan) + local diff=$((size - cur)) + local pkts=$(((diff + 7999) / 8000)) + + if ((cur >= size)); then + echo $cur + return 0 + elif ((i++ > 10)); then + echo $cur + return 1 + fi + + send_packets $vlan $proto $pkts "$@" + done +} + +check_marking() +{ + local get_nmarked=$1; shift + local vlan=$1; shift + local cond=$1; shift + + local npackets_0=$(get_qdisc_npackets $vlan) + local nmarked_0=$($get_nmarked $vlan) + sleep 5 + local npackets_1=$(get_qdisc_npackets $vlan) + local nmarked_1=$($get_nmarked $vlan) + + local nmarked_d=$((nmarked_1 - nmarked_0)) + local npackets_d=$((npackets_1 - npackets_0)) + local pct=$((100 * nmarked_d / npackets_d)) + + echo $pct + ((pct $cond)) +} + +ecn_test_common() +{ + local name=$1; shift + local get_nmarked=$1; shift + local vlan=$1; shift + local limit=$1; shift + local backlog + local pct + + # Build the below-the-limit backlog using UDP. We could use TCP just + # fine, but this way we get a proof that UDP is accepted when queue + # length is below the limit. The main stream is using TCP, and if the + # limit is misconfigured, we would see this traffic being ECN marked. + RET=0 + backlog=$(build_backlog $vlan $((2 * limit / 3)) udp) + check_err $? "Could not build the requested backlog" + pct=$(check_marking "$get_nmarked" $vlan "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "TC $((vlan - 10)): $name backlog < limit" + + # Now push TCP, because non-TCP traffic would be early-dropped after the + # backlog crosses the limit, and we want to make sure that the backlog + # is above the limit. + RET=0 + backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_marking "$get_nmarked" $vlan ">= 95") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95." + log_test "TC $((vlan - 10)): $name backlog > limit" +} + +__do_ecn_test() +{ + local get_nmarked=$1; shift + local vlan=$1; shift + local limit=$1; shift + local name=${1-ECN}; shift + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + defer stop_traffic_sleep $! + sleep 1 + + ecn_test_common "$name" "$get_nmarked" $vlan $limit + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, it should all get dropped, and backlog + # building should fail. + RET=0 + build_backlog $vlan $((2 * limit)) udp >/dev/null + check_fail $? "UDP traffic went into backlog instead of being early-dropped" + log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped" +} + +do_ecn_test() +{ + local vlan=$1; shift + local limit=$1; shift + + in_defer_scope \ + __do_ecn_test get_nmarked "$vlan" "$limit" +} + +do_ecn_test_perband() +{ + local vlan=$1; shift + local limit=$1; shift + + mlxsw_only_on_spectrum 3+ || return + in_defer_scope \ + __do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN" +} + +__do_ecn_nodrop_test() +{ + local vlan=$1; shift + local limit=$1; shift + local name="ECN nodrop" + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + defer stop_traffic_sleep $! + sleep 1 + + ecn_test_common "$name" get_nmarked $vlan $limit + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, in nodrop mode, make sure it goes to + # backlog as well. + RET=0 + build_backlog $vlan $((2 * limit)) udp >/dev/null + check_err $? "UDP traffic was early-dropped instead of getting into backlog" + log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped" +} + +do_ecn_nodrop_test() +{ + in_defer_scope \ + __do_ecn_nodrop_test "$@" +} + +__do_red_test() +{ + local vlan=$1; shift + local limit=$1; shift + local backlog + local pct + + # Use ECN-capable TCP to verify there's no marking even though the queue + # is above limit. + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + defer stop_traffic_sleep $! + + # Pushing below the queue limit should work. + RET=0 + backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_marking get_nmarked $vlan "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "TC $((vlan - 10)): RED backlog < limit" + + # Pushing above should not. + RET=0 + backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01) + check_fail $? "Traffic went into backlog instead of being early-dropped" + pct=$(check_marking get_nmarked $vlan "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + backlog=$(get_qdisc_backlog $vlan) + local diff=$((limit - backlog)) + pct=$((100 * diff / limit)) + ((-15 <= pct && pct <= 15)) + check_err $? "backlog $backlog / $limit expected <= 15% distance" + log_test "TC $((vlan - 10)): RED backlog > limit" +} + +do_red_test() +{ + in_defer_scope \ + __do_red_test "$@" +} + +__do_mc_backlog_test() +{ + local vlan=$1; shift + local limit=$1; shift + local backlog + local pct + + RET=0 + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc + defer stop_traffic_sleep $! + + start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc + defer stop_traffic_sleep $! + + qbl=$(busywait 5000 until_counter_is ">= 500000" \ + get_qdisc_backlog $vlan) + check_err $? "Could not build MC backlog" + + # Verify that we actually see the backlog on BUM TC. Do a busywait as + # well, performance blips might cause false fail. + local ebl + ebl=$(busywait 5000 until_counter_is ">= 500000" \ + get_mc_transmit_queue $vlan) + check_err $? "MC backlog reported by qdisc not visible in ethtool" + + log_test "TC $((vlan - 10)): Qdisc reports MC backlog" +} + +do_mc_backlog_test() +{ + in_defer_scope \ + __do_mc_backlog_test "$@" +} + +__do_mark_test() +{ + local vlan=$1; shift + local limit=$1; shift + local subtest=$1; shift + local fetch_counter=$1; shift + local should_fail=$1; shift + local base + + mlxsw_only_on_spectrum 2+ || return + + RET=0 + + start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ + $h3_mac tos=0x01 + defer stop_traffic_sleep $! + + # Create a bit of a backlog and observe no mirroring due to marks. + qevent_rule_install_$subtest + + build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01 >/dev/null + + base=$($fetch_counter) + count=$(busywait 1100 until_counter_is ">= $((base + 1))" \ + $fetch_counter) + check_fail $? "Spurious packets ($base -> $count) observed without buffer pressure" + + # Above limit, everything should be mirrored, we should see lots of + # packets. + build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01 >/dev/null + busywait_for_counter 1100 +2500 \ + $fetch_counter > /dev/null + check_err_fail "$should_fail" $? "ECN-marked packets $subtest'd" + + # When the rule is uninstalled, there should be no mirroring. + qevent_rule_uninstall_$subtest + busywait_for_counter 1100 +10 \ + $fetch_counter > /dev/null + check_fail $? "Spurious packets observed after uninstall" + + if ((should_fail)); then + log_test "TC $((vlan - 10)): marked packets not $subtest'd" + else + log_test "TC $((vlan - 10)): marked packets $subtest'd" + fi +} + +do_mark_test() +{ + in_defer_scope \ + __do_mark_test "$@" +} + +__do_drop_test() +{ + local vlan=$1; shift + local limit=$1; shift + local trigger=$1; shift + local subtest=$1; shift + local fetch_counter=$1; shift + local base + local now + + mlxsw_only_on_spectrum 2+ || return + + RET=0 + + start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac + defer stop_traffic_sleep $! + + # Create a bit of a backlog and observe no mirroring due to drops. + qevent_rule_install_$subtest + base=$($fetch_counter) + + build_backlog $vlan $((2 * limit / 3)) udp >/dev/null + + busywait 1100 until_counter_is ">= $((base + 1))" $fetch_counter >/dev/null + check_fail $? "Spurious packets observed without buffer pressure" + + # Push to the queue until it's at the limit. The configured limit is + # rounded by the qdisc and then by the driver, so this is the best we + # can do to get to the real limit of the system. + build_backlog $vlan $((3 * limit / 2)) udp >/dev/null + + base=$($fetch_counter) + send_packets $vlan udp 100 + + now=$(busywait 1100 until_counter_is ">= $((base + 95))" $fetch_counter) + check_err $? "${trigger}ped packets not observed: 100 expected, $((now - base)) seen" + + # When no extra traffic is injected, there should be no mirroring. + busywait 1100 until_counter_is ">= $((base + 110))" \ + $fetch_counter >/dev/null + check_fail $? "Spurious packets observed" + + # When the rule is uninstalled, there should be no mirroring. + qevent_rule_uninstall_$subtest + send_packets $vlan udp 100 + now=$(busywait 1100 until_counter_is ">= $((base + 110))" \ + $fetch_counter) + check_fail $? "$((now - base)) spurious packets observed after uninstall" + + log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd" +} + +do_drop_test() +{ + in_defer_scope \ + __do_drop_test "$@" +} + +qevent_rule_install_mirror() +{ + tc filter add block 10 pref 1234 handle 102 matchall skip_sw \ + action mirred egress mirror dev $swp2 hw_stats disabled +} + +qevent_rule_uninstall_mirror() +{ + tc filter del block 10 pref 1234 handle 102 matchall +} + +qevent_counter_fetch_mirror() +{ + tc_rule_handle_stats_get "dev $h2 ingress" 101 +} + +do_drop_mirror_test() +{ + local vlan=$1; shift + local limit=$1; shift + local qevent_name=$1; shift + + tc filter add dev $h2 ingress pref 1 handle 101 prot ip \ + flower skip_sw ip_proto udp \ + action drop + + do_drop_test "$vlan" "$limit" "$qevent_name" mirror \ + qevent_counter_fetch_mirror + + tc filter del dev $h2 ingress pref 1 handle 101 flower +} + +do_mark_mirror_test() +{ + local vlan=$1; shift + local limit=$1; shift + + tc filter add dev $h2 ingress pref 1 handle 101 prot ip \ + flower skip_sw ip_proto tcp \ + action drop + + do_mark_test "$vlan" "$limit" mirror \ + qevent_counter_fetch_mirror \ + $(: should_fail=)0 + + tc filter del dev $h2 ingress pref 1 handle 101 flower +} + +qevent_rule_install_trap() +{ + tc filter add block 10 pref 1234 handle 102 matchall skip_sw \ + action trap hw_stats disabled +} + +qevent_rule_uninstall_trap() +{ + tc filter del block 10 pref 1234 handle 102 matchall +} + +qevent_counter_fetch_trap() +{ + local trap_name=$1; shift + + devlink_trap_rx_packets_get "$trap_name" +} + +do_drop_trap_test() +{ + local vlan=$1; shift + local limit=$1; shift + local trap_name=$1; shift + + do_drop_test "$vlan" "$limit" "$trap_name" trap \ + "qevent_counter_fetch_trap $trap_name" +} + +qevent_rule_install_trap_fwd() +{ + tc filter add block 10 pref 1234 handle 102 matchall skip_sw \ + action trap_fwd hw_stats disabled +} + +qevent_rule_uninstall_trap_fwd() +{ + tc filter del block 10 pref 1234 handle 102 matchall +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh new file mode 100755 index 000000000000..8902a115d9cd --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + ecn_test + ecn_test_perband + ecn_nodrop_test + red_test + mc_backlog_test + red_mirror_test + red_trap_test + ecn_mirror_test +" +: ${QDISC:=ets} +source sch_red_core.sh + +# do_ecn_test first build 2/3 of the requested backlog and expects no marking, +# and then builds 3/2 of it and does expect marking. The values of $BACKLOG1 and +# $BACKLOG2 are far enough not to overlap, so that we can assume that if we do +# see (do not see) marking, it is actually due to the configuration of that one +# TC, and not due to configuration of the other TC leaking over. +BACKLOG1=400000 +BACKLOG2=1000000 + +install_root_qdisc() +{ + tc qdisc add dev $swp3 parent 1: handle 10: $QDISC \ + bands 8 priomap 7 6 5 4 3 2 1 0 +} + +install_qdisc_tc0() +{ + local -a args=("$@") + + tc qdisc add dev $swp3 parent 10:8 handle 108: red \ + limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \ + probability 1.0 avpkt 8000 burst 51 "${args[@]}" +} + +install_qdisc_tc1() +{ + local -a args=("$@") + + tc qdisc add dev $swp3 parent 10:7 handle 107: red \ + limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \ + probability 1.0 avpkt 8000 burst 126 "${args[@]}" +} + +install_qdisc() +{ + install_root_qdisc + install_qdisc_tc0 "$@" + install_qdisc_tc1 "$@" + sleep 1 +} + +uninstall_qdisc_tc0() +{ + tc qdisc del dev $swp3 parent 10:8 +} + +uninstall_qdisc_tc1() +{ + tc qdisc del dev $swp3 parent 10:7 +} + +uninstall_root_qdisc() +{ + tc qdisc del dev $swp3 parent 1: +} + +uninstall_qdisc() +{ + uninstall_qdisc_tc0 + uninstall_qdisc_tc1 + uninstall_root_qdisc +} + +ecn_test() +{ + install_qdisc ecn + defer uninstall_qdisc + + do_ecn_test 10 $BACKLOG1 + do_ecn_test 11 $BACKLOG2 +} + +ecn_test_perband() +{ + install_qdisc ecn + defer uninstall_qdisc + + do_ecn_test_perband 10 $BACKLOG1 + do_ecn_test_perband 11 $BACKLOG2 +} + +ecn_nodrop_test() +{ + install_qdisc ecn nodrop + defer uninstall_qdisc + + do_ecn_nodrop_test 10 $BACKLOG1 + do_ecn_nodrop_test 11 $BACKLOG2 +} + +red_test() +{ + install_qdisc + defer uninstall_qdisc + + # Make sure that we get the non-zero value if there is any. + local cur=$(busywait 1100 until_counter_is "> 0" \ + qdisc_stats_get $swp3 10: .backlog) + (( cur == 0 )) + check_err $? "backlog of $cur observed on non-busy qdisc" + log_test "$QDISC backlog properly cleaned" + + do_red_test 10 $BACKLOG1 + do_red_test 11 $BACKLOG2 +} + +mc_backlog_test() +{ + install_qdisc + defer uninstall_qdisc + + # Note that the backlog numbers here do not correspond to RED + # configuration, but are arbitrary. + do_mc_backlog_test 10 $BACKLOG1 + do_mc_backlog_test 11 $BACKLOG2 +} + +red_mirror_test() +{ + install_qdisc qevent early_drop block 10 + defer uninstall_qdisc + + do_drop_mirror_test 10 $BACKLOG1 early_drop + do_drop_mirror_test 11 $BACKLOG2 early_drop +} + +red_trap_test() +{ + install_qdisc qevent early_drop block 10 + defer uninstall_qdisc + + do_drop_trap_test 10 $BACKLOG1 early_drop + do_drop_trap_test 11 $BACKLOG2 early_drop +} + +ecn_mirror_test() +{ + install_qdisc ecn qevent mark block 10 + defer uninstall_qdisc + + do_mark_mirror_test 10 $BACKLOG1 + do_mark_mirror_test 11 $BACKLOG2 +} + +bail_on_lldpad "configure DCB" "configure Qdiscs" + +trap cleanup EXIT +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh new file mode 100755 index 000000000000..76820a0e9a1b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +QDISC=prio +source sch_red_ets.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh new file mode 100755 index 000000000000..e9043771787b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + ecn_test + ecn_test_perband + ecn_nodrop_test + red_test + mc_backlog_test + red_mirror_test +" +source sch_red_core.sh + +BACKLOG=300000 + +install_qdisc() +{ + local -a args=("$@") + + tc qdisc add dev $swp3 parent 1: handle 108: red \ + limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \ + probability 1.0 avpkt 8000 burst 38 "${args[@]}" + sleep 1 +} + +uninstall_qdisc() +{ + tc qdisc del dev $swp3 parent 1: +} + +ecn_test() +{ + install_qdisc ecn + defer uninstall_qdisc + + do_ecn_test 10 $BACKLOG +} + +ecn_test_perband() +{ + install_qdisc ecn + defer uninstall_qdisc + + do_ecn_test_perband 10 $BACKLOG +} + +ecn_nodrop_test() +{ + install_qdisc ecn nodrop + defer uninstall_qdisc + + do_ecn_nodrop_test 10 $BACKLOG +} + +red_test() +{ + install_qdisc + defer uninstall_qdisc + + do_red_test 10 $BACKLOG +} + +mc_backlog_test() +{ + install_qdisc + defer uninstall_qdisc + + # Note that the backlog value here does not correspond to RED + # configuration, but is arbitrary. + do_mc_backlog_test 10 $BACKLOG +} + +red_mirror_test() +{ + install_qdisc qevent early_drop block 10 + defer uninstall_qdisc + + do_drop_mirror_test 10 $BACKLOG +} + +bail_on_lldpad "configure DCB" "configure Qdiscs" + +trap cleanup EXIT +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh new file mode 100755 index 000000000000..ecc3664376b3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +sch_tbf_pre_hook() +{ + bail_on_lldpad "configure DCB" "configure Qdiscs" +} + +lib_dir=$(dirname $0)/../../../net/forwarding +TCFLAGS=skip_sw +source $lib_dir/sch_tbf_ets.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh new file mode 100755 index 000000000000..2e0a4efb1703 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +sch_tbf_pre_hook() +{ + bail_on_lldpad "configure DCB" "configure Qdiscs" +} + +lib_dir=$(dirname $0)/../../../net/forwarding +TCFLAGS=skip_sw +source $lib_dir/sch_tbf_prio.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh new file mode 100755 index 000000000000..6679a338dfc4 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +sch_tbf_pre_hook() +{ + bail_on_lldpad "configure DCB" "configure Qdiscs" +} + +lib_dir=$(dirname $0)/../../../net/forwarding +TCFLAGS=skip_sw +source $lib_dir/sch_tbf_root.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh new file mode 100755 index 000000000000..c068e6c2a580 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh @@ -0,0 +1,243 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + port_pool_test + port_tc_ip_test + port_tc_arp_test +" + +NUM_NETIFS=2 +source ../../../net/forwarding/lib.sh +source ../../../net/forwarding/devlink_lib.sh +source mlxsw_lib.sh + +SB_POOL_ING=0 +SB_POOL_EGR_CPU=10 + +SB_ITC_CPU_IP=2 +SB_ITC_CPU_ARP=2 +SB_ITC=0 + +h1_create() +{ + simple_if_init $h1 192.0.1.1/24 + tc qdisc add dev $h1 clsact + + # Add egress filter on $h1 that will guarantee that the packet sent, + # will be the only packet being passed to the device. + tc filter add dev $h1 egress pref 2 handle 102 matchall action drop +} + +h1_destroy() +{ + tc filter del dev $h1 egress pref 2 handle 102 matchall action drop + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.1.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.1.2/24 + tc qdisc add dev $h2 clsact + + # Add egress filter on $h2 that will guarantee that the packet sent, + # will be the only packet being passed to the device. + tc filter add dev $h2 egress pref 1 handle 101 matchall action drop +} + +h2_destroy() +{ + tc filter del dev $h2 egress pref 1 handle 101 matchall action drop + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.1.2/24 +} + +sb_occ_pool_check() +{ + local dl_port=$1; shift + local pool=$1; shift + local exp_max_occ=$1 + local max_occ + local err=0 + + max_occ=$(devlink sb -j occupancy show $dl_port \ + | jq -e ".[][][\"pool\"][\"$pool\"][\"max\"]") + + if [[ "$max_occ" -ne "$exp_max_occ" ]]; then + err=1 + fi + + echo $max_occ + return $err +} + +sb_occ_itc_check() +{ + local dl_port=$1; shift + local itc=$1; shift + local exp_max_occ=$1 + local max_occ + local err=0 + + max_occ=$(devlink sb -j occupancy show $dl_port \ + | jq -e ".[][][\"itc\"][\"$itc\"][\"max\"]") + + if [[ "$max_occ" -ne "$exp_max_occ" ]]; then + err=1 + fi + + echo $max_occ + return $err +} + +sb_occ_etc_check() +{ + local dl_port=$1; shift + local etc=$1; shift + local exp_max_occ=$1; shift + local max_occ + local err=0 + + max_occ=$(devlink sb -j occupancy show $dl_port \ + | jq -e ".[][][\"etc\"][\"$etc\"][\"max\"]") + + if [[ "$max_occ" -ne "$exp_max_occ" ]]; then + err=1 + fi + + echo $max_occ + return $err +} + +port_pool_test() +{ + local exp_max_occ=$(devlink_cell_size_get) + local max_occ + + tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \ + src_mac $h1mac dst_mac $h2mac \ + src_ip 192.0.1.1 dst_ip 192.0.1.2 \ + action pass + + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ + -t ip -q + + devlink sb occupancy snapshot $DEVLINK_DEV + + RET=0 + max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ) + check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h2) ingress pool" + + RET=0 + max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ) + check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress pool" + + tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \ + src_mac $h1mac dst_mac $h2mac \ + src_ip 192.0.1.1 dst_ip 192.0.1.2 \ + action pass +} + +port_tc_ip_test() +{ + local exp_max_occ=$(devlink_cell_size_get) + local max_occ + + tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \ + src_mac $h1mac dst_mac $h2mac \ + src_ip 192.0.1.1 dst_ip 192.0.1.2 \ + action pass + + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ + -t ip -q + + devlink sb occupancy snapshot $DEVLINK_DEV + + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h2) ingress TC - IP packet" + + RET=0 + max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ) + check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress TC - IP packet" + + tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \ + src_mac $h1mac dst_mac $h2mac \ + src_ip 192.0.1.1 dst_ip 192.0.1.2 \ + action pass +} + +port_tc_arp_test() +{ + local exp_max_occ=$(devlink_cell_size_get) + local max_occ + + tc filter add dev $h1 egress protocol arp pref 1 handle 101 flower \ + src_mac $h1mac action pass + + devlink sb occupancy clearmax $DEVLINK_DEV + + $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q + + devlink sb occupancy snapshot $DEVLINK_DEV + + RET=0 + max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) + check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "physical port's($h2) ingress TC - ARP packet" + + RET=0 + max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ) + check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ" + log_test "CPU port's egress TC - ARP packet" + + tc filter del dev $h1 egress protocol arp pref 1 handle 101 flower \ + src_mac $h1mac action pass +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + dl_port1=$(devlink_port_by_netdev $h1) + dl_port2=$(devlink_port_by_netdev $h2) + + cpu_dl_port=$(devlink_cpu_port_get) + + vrf_prepare + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py new file mode 100755 index 000000000000..2223337eed0c --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +import json as j +import random + + +class SkipTest(Exception): + pass + + +class RandomValuePicker: + """ + Class for storing shared buffer configuration. Can handle 3 different + objects, pool, tcbind and portpool. Provide an interface to get random + values for a specific object type as the follow: + 1. Pool: + - random size + + 2. TcBind: + - random pool number + - random threshold + + 3. PortPool: + - random threshold + """ + def __init__(self, pools): + self._pools = [] + for pool in pools: + self._pools.append(pool) + + def _cell_size(self): + return self._pools[0]["cell_size"] + + def _get_static_size(self, th): + # For threshold of 16, this works out to be about 12MB on Spectrum-1, + # and about 17MB on Spectrum-2. + return th * 8000 * self._cell_size() + + def _get_size(self): + return self._get_static_size(16) + + def _get_thtype(self): + return "static" + + def _get_th(self, pool): + # Threshold value could be any integer between 3 to 16 + th = random.randint(3, 16) + if pool["thtype"] == "dynamic": + return th + else: + return self._get_static_size(th) + + def _get_pool(self, direction): + ing_pools = [] + egr_pools = [] + for pool in self._pools: + if pool["type"] == "ingress": + ing_pools.append(pool) + else: + egr_pools.append(pool) + if direction == "ingress": + arr = ing_pools + else: + arr = egr_pools + return arr[random.randint(0, len(arr) - 1)] + + def get_value(self, objid): + if isinstance(objid, Pool): + if objid["pool"] in [4, 8, 9, 10]: + # The threshold type of pools 4, 8, 9 and 10 cannot be changed + raise SkipTest() + else: + return (self._get_size(), self._get_thtype()) + if isinstance(objid, TcBind): + if objid["tc"] >= 8: + # Multicast TCs cannot be changed + raise SkipTest() + else: + pool = self._get_pool(objid["type"]) + th = self._get_th(pool) + pool_n = pool["pool"] + return (pool_n, th) + if isinstance(objid, PortPool): + pool_n = objid["pool"] + pool = self._pools[pool_n] + assert pool["pool"] == pool_n + th = self._get_th(pool) + return (th,) + + +class RecordValuePickerException(Exception): + pass + + +class RecordValuePicker: + """ + Class for storing shared buffer configuration. Can handle 2 different + objects, pool and tcbind. Provide an interface to get the stored values per + object type. + """ + def __init__(self, objlist): + self._recs = [] + for item in objlist: + self._recs.append({"objid": item, "value": item.var_tuple()}) + + def get_value(self, objid): + if isinstance(objid, Pool) and objid["pool"] in [4, 8, 9, 10]: + # The threshold type of pools 4, 8, 9 and 10 cannot be changed + raise SkipTest() + if isinstance(objid, TcBind) and objid["tc"] >= 8: + # Multicast TCs cannot be changed + raise SkipTest() + for rec in self._recs: + if rec["objid"].weak_eq(objid): + return rec["value"] + raise RecordValuePickerException() + + +def run_cmd(cmd, json=False): + out = subprocess.check_output(cmd, shell=True) + if json: + return j.loads(out) + return out + + +def run_json_cmd(cmd): + return run_cmd(cmd, json=True) + + +def log_test(test_name, err_msg=None): + if err_msg: + print("\t%s" % err_msg) + print("TEST: %-80s [FAIL]" % test_name) + else: + print("TEST: %-80s [ OK ]" % test_name) + + +class CommonItem(dict): + varitems = [] + + def var_tuple(self): + ret = [] + self.varitems.sort() + for key in self.varitems: + ret.append(self[key]) + return tuple(ret) + + def weak_eq(self, other): + for key in self: + if key in self.varitems: + continue + if self[key] != other[key]: + return False + return True + + +class CommonList(list): + def get_by(self, by_obj): + for item in self: + if item.weak_eq(by_obj): + return item + return None + + def del_by(self, by_obj): + for item in self: + if item.weak_eq(by_obj): + self.remove(item) + + +class Pool(CommonItem): + varitems = ["size", "thtype"] + + def dl_set(self, dlname, size, thtype): + run_cmd("devlink sb pool set {} sb {} pool {} size {} thtype {}".format(dlname, self["sb"], + self["pool"], + size, thtype)) + + +class PoolList(CommonList): + pass + + +def get_pools(dlname, direction=None): + d = run_json_cmd("devlink sb pool show -j") + pools = PoolList() + for pooldict in d["pool"][dlname]: + if not direction or direction == pooldict["type"]: + pools.append(Pool(pooldict)) + return pools + + +def do_check_pools(dlname, pools, vp): + for pool in pools: + pre_pools = get_pools(dlname) + try: + (size, thtype) = vp.get_value(pool) + except SkipTest: + continue + pool.dl_set(dlname, size, thtype) + post_pools = get_pools(dlname) + pool = post_pools.get_by(pool) + + err_msg = None + if pool["size"] != size: + err_msg = "Incorrect pool size (got {}, expected {})".format(pool["size"], size) + if pool["thtype"] != thtype: + err_msg = "Incorrect pool threshold type (got {}, expected {})".format(pool["thtype"], thtype) + + pre_pools.del_by(pool) + post_pools.del_by(pool) + if pre_pools != post_pools: + err_msg = "Other pool setup changed as well" + log_test("pool {} of sb {} set verification".format(pool["pool"], + pool["sb"]), err_msg) + + +def check_pools(dlname, pools): + # Save defaults + record_vp = RecordValuePicker(pools) + + # For each pool, set random size and static threshold type + do_check_pools(dlname, pools, RandomValuePicker(pools)) + + # Restore defaults + do_check_pools(dlname, pools, record_vp) + + +class TcBind(CommonItem): + varitems = ["pool", "threshold"] + + def __init__(self, port, d): + super(TcBind, self).__init__(d) + self["dlportname"] = port.name + + def dl_set(self, pool, th): + run_cmd("devlink sb tc bind set {} sb {} tc {} type {} pool {} th {}".format(self["dlportname"], + self["sb"], + self["tc"], + self["type"], + pool, th)) + + +class TcBindList(CommonList): + pass + + +def get_tcbinds(ports, verify_existence=False): + d = run_json_cmd("devlink sb tc bind show -j -n") + tcbinds = TcBindList() + for port in ports: + err_msg = None + if port.name not in d["tc_bind"] or len(d["tc_bind"][port.name]) == 0: + err_msg = "No tc bind for port" + else: + for tcbinddict in d["tc_bind"][port.name]: + tcbinds.append(TcBind(port, tcbinddict)) + if verify_existence: + log_test("tc bind existence for port {} verification".format(port.name), err_msg) + return tcbinds + + +def do_check_tcbind(ports, tcbinds, vp): + for tcbind in tcbinds: + pre_tcbinds = get_tcbinds(ports) + try: + (pool, th) = vp.get_value(tcbind) + except SkipTest: + continue + tcbind.dl_set(pool, th) + post_tcbinds = get_tcbinds(ports) + tcbind = post_tcbinds.get_by(tcbind) + + err_msg = None + if tcbind["pool"] != pool: + err_msg = "Incorrect pool (got {}, expected {})".format(tcbind["pool"], pool) + if tcbind["threshold"] != th: + err_msg = "Incorrect threshold (got {}, expected {})".format(tcbind["threshold"], th) + + pre_tcbinds.del_by(tcbind) + post_tcbinds.del_by(tcbind) + if pre_tcbinds != post_tcbinds: + err_msg = "Other tc bind setup changed as well" + log_test("tc bind {}-{} of sb {} set verification".format(tcbind["dlportname"], + tcbind["tc"], + tcbind["sb"]), err_msg) + + +def check_tcbind(dlname, ports, pools): + tcbinds = get_tcbinds(ports, verify_existence=True) + + # Save defaults + record_vp = RecordValuePicker(tcbinds) + + # Bind each port and unicast TC (TCs < 8) to a random pool and a random + # threshold + do_check_tcbind(ports, tcbinds, RandomValuePicker(pools)) + + # Restore defaults + do_check_tcbind(ports, tcbinds, record_vp) + + +class PortPool(CommonItem): + varitems = ["threshold"] + + def __init__(self, port, d): + super(PortPool, self).__init__(d) + self["dlportname"] = port.name + + def dl_set(self, th): + run_cmd("devlink sb port pool set {} sb {} pool {} th {}".format(self["dlportname"], + self["sb"], + self["pool"], th)) + + +class PortPoolList(CommonList): + pass + + +def get_portpools(ports, verify_existence=False): + d = run_json_cmd("devlink sb port pool -j -n") + portpools = PortPoolList() + for port in ports: + err_msg = None + if port.name not in d["port_pool"] or len(d["port_pool"][port.name]) == 0: + err_msg = "No port pool for port" + else: + for portpooldict in d["port_pool"][port.name]: + portpools.append(PortPool(port, portpooldict)) + if verify_existence: + log_test("port pool existence for port {} verification".format(port.name), err_msg) + return portpools + + +def do_check_portpool(ports, portpools, vp): + for portpool in portpools: + pre_portpools = get_portpools(ports) + (th,) = vp.get_value(portpool) + portpool.dl_set(th) + post_portpools = get_portpools(ports) + portpool = post_portpools.get_by(portpool) + + err_msg = None + if portpool["threshold"] != th: + err_msg = "Incorrect threshold (got {}, expected {})".format(portpool["threshold"], th) + + pre_portpools.del_by(portpool) + post_portpools.del_by(portpool) + if pre_portpools != post_portpools: + err_msg = "Other port pool setup changed as well" + log_test("port pool {}-{} of sb {} set verification".format(portpool["dlportname"], + portpool["pool"], + portpool["sb"]), err_msg) + + +def check_portpool(dlname, ports, pools): + portpools = get_portpools(ports, verify_existence=True) + + # Save defaults + record_vp = RecordValuePicker(portpools) + + # For each port pool, set a random threshold + do_check_portpool(ports, portpools, RandomValuePicker(pools)) + + # Restore defaults + do_check_portpool(ports, portpools, record_vp) + + +class Port: + def __init__(self, name): + self.name = name + + +class PortList(list): + pass + + +def get_ports(dlname): + d = run_json_cmd("devlink port show -j") + ports = PortList() + for name in d["port"]: + if name.find(dlname) == 0 and d["port"][name]["flavour"] == "physical": + ports.append(Port(name)) + return ports + + +def get_device(): + devices_info = run_json_cmd("devlink -j dev info")["info"] + for d in devices_info: + if "mlxsw_spectrum" in devices_info[d]["driver"]: + return d + return None + + +class UnavailableDevlinkNameException(Exception): + pass + + +def test_sb_configuration(): + # Use static seed + random.seed(0) + + dlname = get_device() + if not dlname: + raise UnavailableDevlinkNameException() + + ports = get_ports(dlname) + pools = get_pools(dlname) + + check_pools(dlname, pools) + check_tcbind(dlname, ports, pools) + check_portpool(dlname, ports, pools) + + +test_sb_configuration() diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh new file mode 100644 index 000000000000..f7c168decd1e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../mirror_gre_scale.sh + +mirror_gre_get_target() +{ + local should_fail=$1; shift + local target + + target=$(devlink_resource_size_get span_agents) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh new file mode 120000 index 000000000000..bd670d9dc4e5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh @@ -0,0 +1 @@ +../spectrum/port_range_scale.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh new file mode 100644 index 000000000000..0b71dfbbb447 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../port_scale.sh + +port_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get physical_ports) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh new file mode 100755 index 000000000000..d7505b933aef --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh +source ../mlxsw_lib.sh + +mlxsw_only_on_spectrum 2+ || exit 1 + +current_test="" + +cleanup() +{ + pre_cleanup + if [ ! -z $current_test ]; then + ${current_test}_cleanup + fi + # Need to reload in order to avoid router abort. + devlink_reload +} + +trap cleanup EXIT + +ALL_TESTS=" + router + tc_flower + mirror_gre + tc_police + port + rif_mac_profile + rif_counter + port_range +" + +for current_test in ${TESTS:-$ALL_TESTS}; do + RET_FIN=0 + source ${current_test}_scale.sh + + num_netifs_var=${current_test^^}_NUM_NETIFS + num_netifs=${!num_netifs_var:-$NUM_NETIFS} + + for should_fail in 0 1; do + RET=0 + target=$(${current_test}_get_target "$should_fail") + if ((target == 0)); then + continue + fi + + ${current_test}_setup_prepare + setup_wait_n $num_netifs + # Update target in case occupancy of a certain resource changed + # following the test setup. + target=$(${current_test}_get_target "$should_fail") + ${current_test}_test "$target" "$should_fail" + if [[ "$should_fail" -eq 0 ]]; then + log_test "'$current_test' $target" + + if ((!RET)); then + tt=${current_test}_traffic_test + if [[ $(type -t $tt) == "function" ]]; then + $tt "$target" + log_test "'$current_test' $target traffic test" + fi + fi + else + log_test "'$current_test' overflow $target" + fi + ${current_test}_cleanup $target + devlink_reload + RET_FIN=$(( RET_FIN || RET )) + done +done +current_test="" + +exit "$RET_FIN" diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh new file mode 120000 index 000000000000..1f5752e8ffc0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh @@ -0,0 +1 @@ +../spectrum/rif_counter_scale.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh new file mode 100644 index 000000000000..303d7cbe3c45 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../rif_mac_profile_scale.sh + +rif_mac_profile_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get rif_mac_profiles) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh new file mode 100644 index 000000000000..1897e163e3ab --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../router_scale.sh + +router_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get kvd) + + if [[ $should_fail -eq 0 ]]; then + target=$((target * 85 / 100)) + else + target=$((target + 1)) + fi + + echo $target +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh new file mode 100755 index 000000000000..4994bea5daf8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -0,0 +1,1176 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking the A-TCAM and C-TCAM operation in Spectrum-2. +# It tries to exercise as many code paths in the eRP state machine as +# possible. + +lib_dir=$(dirname $0)/../../../../net/forwarding + +ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ + multiple_masks_test ctcam_edge_cases_test delta_simple_test \ + delta_two_masks_one_key_test delta_simple_rehash_test \ + bloom_simple_test bloom_complex_test bloom_delta_test \ + max_erp_entries_test max_group_size_test collision_test" +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 198.51.100.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 198.51.100.2/24 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24 +} + +tp_record() +{ + local tracepoint=$1 + local cmd=$2 + + perf record -q -e $tracepoint $cmd + return $? +} + +tp_record_all() +{ + local tracepoint=$1 + local seconds=$2 + + perf record -a -q -e $tracepoint sleep $seconds + return $? +} + +__tp_hit_count() +{ + local tracepoint=$1 + + local perf_output=`perf script -F trace:event,trace` + return `echo $perf_output | grep "$tracepoint:" | wc -l` +} + +tp_check_hits() +{ + local tracepoint=$1 + local count=$2 + + __tp_hit_count $tracepoint + if [[ "$?" -ne "$count" ]]; then + return 1 + fi + return 0 +} + +tp_check_hits_any() +{ + local tracepoint=$1 + + __tp_hit_count $tracepoint + if [[ "$?" -eq "0" ]]; then + return 1 + fi + return 0 +} + +single_mask_test() +{ + # When only a single mask is required, the device uses the master + # mask and not the eRP table. Verify that under this mode the right + # filter is matched + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Single filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 198.51.100.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 2 + check_err $? "Two filters - did not match highest priority" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Two filters - did not match lowest priority" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Single filter - did not match after delete" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "single mask test ($tcflags)" +} + +identical_filters_test() +{ + # When two filters that only differ in their priority are used, + # one needs to be inserted into the C-TCAM. This test verifies + # that filters are correctly spilled to C-TCAM and that the right + # filter is matched + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match A-TCAM filter" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match C-TCAM filter after A-TCAM delete" + + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Did not match C-TCAM filter after A-TCAM add" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match A-TCAM filter after C-TCAM delete" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + + log_test "identical filters test ($tcflags)" +} + +two_masks_test() +{ + # When more than one mask is required, the eRP table is used. This + # test verifies that the eRP table is correctly allocated and used + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Two filters - did not match highest priority" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Single filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Two filters - did not match highest priority after add" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "two masks test ($tcflags)" +} + +multiple_masks_test() +{ + # The number of masks in a region is limited. Once the maximum + # number of masks has been reached filters that require new + # masks are spilled to the C-TCAM. This test verifies that + # spillage is performed correctly and that the right filter is + # matched + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + local index + + RET=0 + + NUM_MASKS=32 + NUM_ERPS=16 + BASE_INDEX=100 + + for i in $(eval echo {1..$NUM_MASKS}); do + index=$((BASE_INDEX - i)) + + if ((i > NUM_ERPS)); then + exp_hits=1 + err_msg="$i filters - C-TCAM spill did not happen when it was expected" + else + exp_hits=0 + err_msg="$i filters - C-TCAM spill happened when it should not" + fi + + tp_record "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" \ + "tc filter add dev $h2 ingress protocol ip pref $index \ + handle $index \ + flower $tcflags \ + dst_ip 192.0.2.2/${i} src_ip 192.0.2.1/${i} \ + action drop" + tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" \ + $exp_hits + check_err $? "$err_msg" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \ + -B 192.0.2.2 -t ip -q + + tc_check_packets "dev $h2 ingress" $index 1 + check_err $? "$i filters - did not match highest priority (add)" + done + + for i in $(eval echo {$NUM_MASKS..1}); do + index=$((BASE_INDEX - i)) + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \ + -B 192.0.2.2 -t ip -q + + tc_check_packets "dev $h2 ingress" $index 2 + check_err $? "$i filters - did not match highest priority (del)" + + tc filter del dev $h2 ingress protocol ip pref $index \ + handle $index flower + done + + log_test "multiple masks test ($tcflags)" +} + +ctcam_two_atcam_masks_test() +{ + RET=0 + + # First case: C-TCAM is disabled when there are two A-TCAM masks. + # We push a filter into the C-TCAM by using two identical filters + # as in identical_filters_test() + + # Filter goes into A-TCAM + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + # Filter goes into C-TCAM + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + # Filter goes into A-TCAM + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.0.0/16 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match A-TCAM filter" + + # Delete both A-TCAM and C-TCAM filters and make sure the remaining + # A-TCAM filter still works + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match A-TCAM filter" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + + log_test "ctcam with two atcam masks test ($tcflags)" +} + +ctcam_one_atcam_mask_test() +{ + RET=0 + + # Second case: C-TCAM is disabled when there is one A-TCAM mask. + # The test is similar to identical_filters_test() + + # Filter goes into A-TCAM + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + # Filter goes into C-TCAM + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match C-TCAM filter" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match A-TCAM filter" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "ctcam with one atcam mask test ($tcflags)" +} + +ctcam_no_atcam_masks_test() +{ + RET=0 + + # Third case: C-TCAM is disabled when there are no A-TCAM masks + # This test exercises the code path that transitions the eRP table + # to its initial state after deleting the last C-TCAM mask + + # Filter goes into A-TCAM + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + # Filter goes into C-TCAM + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "ctcam with no atcam masks test ($tcflags)" +} + +ctcam_edge_cases_test() +{ + # When the C-TCAM is disabled after deleting the last C-TCAM + # mask, we want to make sure the eRP state machine is put in + # the correct state + + ctcam_two_atcam_masks_test + ctcam_one_atcam_mask_test + ctcam_no_atcam_masks_test +} + +delta_simple_test() +{ + # The first filter will create eRP, the second filter will fit into + # the first eRP with delta. Remove the first rule then and check that + # the eRP stays (referenced by the second filter). + + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \ + pref 1 handle 101 flower $tcflags dst_ip 192.0.0.0/24 \ + action drop" + tp_check_hits "objagg:objagg_obj_root_create" 1 + check_err $? "eRP was not created" + + tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \ + pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \ + action drop" + tp_check_hits "objagg:objagg_obj_root_create" 0 + check_err $? "eRP was incorrectly created" + tp_check_hits "objagg:objagg_obj_parent_assign" 1 + check_err $? "delta was not created" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \ + pref 1 handle 101 flower" + tp_check_hits "objagg:objagg_obj_root_destroy" 0 + check_err $? "eRP was incorrectly destroyed" + tp_check_hits "objagg:objagg_obj_parent_unassign" 0 + check_err $? "delta was incorrectly destroyed" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Did not match on correct filter after the first was removed" + + tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \ + pref 2 handle 102 flower" + tp_check_hits "objagg:objagg_obj_parent_unassign" 1 + check_err $? "delta was not destroyed" + tp_check_hits "objagg:objagg_obj_root_destroy" 1 + check_err $? "eRP was not destroyed" + + log_test "delta simple test ($tcflags)" +} + +delta_two_masks_one_key_test() +{ + # If 2 keys are the same and only differ in mask in a way that + # they belong under the same ERP (second is delta of the first), + # there should be C-TCAM spill. + + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \ + pref 1 handle 101 flower $tcflags dst_ip 192.0.2.0/24 \ + action drop" + tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 0 + check_err $? "incorrect C-TCAM spill while inserting the first rule" + + tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \ + pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \ + action drop" + tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 1 + check_err $? "C-TCAM spill did not happen while inserting the second rule" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "delta two masks one key test ($tcflags)" +} + +delta_simple_rehash_test() +{ + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 0 + check_err $? "Failed to set ACL region rehash interval" + + tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_fail $? "Rehash trace was hit even when rehash should be disabled" + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 3000 + check_err $? "Failed to set ACL region rehash interval" + + sleep 1 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.1.0/25 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.3.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 103 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tp_record_all mlxsw:* 3 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_err $? "Rehash trace was not hit" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate + check_err $? "Migrate trace was not hit" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end + check_err $? "Migrate end trace was not hit" + tp_record_all mlxsw:* 3 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_err $? "Rehash trace was not hit" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate + check_fail $? "Migrate trace was hit when no migration should happen" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end + check_fail $? "Migrate end trace was hit when no migration should happen" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter after rehash" + + tc_check_packets "dev $h2 ingress" 103 1 + check_fail $? "Matched a wrong filter after rehash" + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Did not match on correct filter after rehash" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + log_test "delta simple rehash test ($tcflags)" +} + +delta_simple_ipv6_rehash_test() +{ + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 0 + check_err $? "Failed to set ACL region rehash interval" + + tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_fail $? "Rehash trace was hit even when rehash should be disabled" + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 3000 + check_err $? "Failed to set ACL region rehash interval" + + sleep 1 + + tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 101 flower \ + $tcflags dst_ip 2001:db8:1::0/121 action drop + tc filter add dev $h2 ingress protocol ipv6 pref 2 handle 102 flower \ + $tcflags dst_ip 2001:db8:2::2 action drop + tc filter add dev $h2 ingress protocol ipv6 pref 3 handle 103 flower \ + $tcflags dst_ip 2001:db8:3::0/120 action drop + + $MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \ + -A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 103 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tp_record_all mlxsw:* 3 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_err $? "Rehash trace was not hit" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate + check_err $? "Migrate trace was not hit" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end + check_err $? "Migrate end trace was not hit" + tp_record_all mlxsw:* 3 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_err $? "Rehash trace was not hit" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate + check_fail $? "Migrate trace was hit when no migration should happen" + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end + check_fail $? "Migrate end trace was hit when no migration should happen" + + $MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \ + -A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter after rehash" + + tc_check_packets "dev $h2 ingress" 103 1 + check_fail $? "Matched a wrong filter after rehash" + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Did not match on correct filter after rehash" + + tc filter del dev $h2 ingress protocol ipv6 pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ipv6 pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 101 flower + + log_test "delta simple IPv6 rehash test ($tcflags)" +} + +TEST_RULE_BASE=256 +declare -a test_rules_inserted + +test_rule_add() +{ + local iface=$1 + local tcflags=$2 + local index=$3 + + if ! [ ${test_rules_inserted[$index]} ] ; then + test_rules_inserted[$index]=false + fi + if ${test_rules_inserted[$index]} ; then + return + fi + + local number=$(( $index + $TEST_RULE_BASE )) + printf -v hexnumber '%x' $number + + batch="${batch}filter add dev $iface ingress protocol ipv6 pref 1 \ + handle $number flower $tcflags \ + src_ip 2001:db8:1::$hexnumber action drop\n" + test_rules_inserted[$index]=true +} + +test_rule_del() +{ + local iface=$1 + local index=$2 + + if ! [ ${test_rules_inserted[$index]} ] ; then + test_rules_inserted[$index]=false + fi + if ! ${test_rules_inserted[$index]} ; then + return + fi + + local number=$(( $index + $TEST_RULE_BASE )) + printf -v hexnumber '%x' $number + + batch="${batch}filter del dev $iface ingress protocol ipv6 pref 1 \ + handle $number flower\n" + test_rules_inserted[$index]=false +} + +test_rule_add_or_remove() +{ + local iface=$1 + local tcflags=$2 + local index=$3 + + if ! [ ${test_rules_inserted[$index]} ] ; then + test_rules_inserted[$index]=false + fi + if ${test_rules_inserted[$index]} ; then + test_rule_del $iface $index + else + test_rule_add $iface $tcflags $index + fi +} + +test_rule_add_or_remove_random_batch() +{ + local iface=$1 + local tcflags=$2 + local total_count=$3 + local skip=0 + local count=0 + local MAXSKIP=20 + local MAXCOUNT=20 + + for ((i=1;i<=total_count;i++)); do + if (( $skip == 0 )) && (($count == 0)); then + ((skip=$RANDOM % $MAXSKIP + 1)) + ((count=$RANDOM % $MAXCOUNT + 1)) + fi + if (( $skip != 0 )); then + ((skip-=1)) + else + ((count-=1)) + test_rule_add_or_remove $iface $tcflags $i + fi + done +} + +delta_massive_ipv6_rehash_test() +{ + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 0 + check_err $? "Failed to set ACL region rehash interval" + + tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7 + tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash + check_fail $? "Rehash trace was hit even when rehash should be disabled" + + RANDOM=4432897 + declare batch="" + test_rule_add_or_remove_random_batch $h2 $tcflags 5000 + + echo -n -e $batch | tc -b - + + declare batch="" + test_rule_add_or_remove_random_batch $h2 $tcflags 5000 + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 3000 + check_err $? "Failed to set ACL region rehash interval" + + sleep 1 + + tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 101 flower \ + $tcflags dst_ip 2001:db8:1::0/121 action drop + tc filter add dev $h2 ingress protocol ipv6 pref 2 handle 102 flower \ + $tcflags dst_ip 2001:db8:2::2 action drop + tc filter add dev $h2 ingress protocol ipv6 pref 3 handle 103 flower \ + $tcflags dst_ip 2001:db8:3::0/120 action drop + + $MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \ + -A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 103 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + echo -n -e $batch | tc -b - + + devlink dev param set $DEVLINK_DEV \ + name acl_region_rehash_interval cmode runtime value 0 + check_err $? "Failed to set ACL region rehash interval" + + $MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \ + -A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter after rehash" + + tc_check_packets "dev $h2 ingress" 103 1 + check_fail $? "Matched a wrong filter after rehash" + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Did not match on correct filter after rehash" + + tc filter del dev $h2 ingress protocol ipv6 pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ipv6 pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 101 flower + + declare batch="" + for i in {1..5000}; do + test_rule_del $h2 $tcflags $i + done + echo -e $batch | tc -b - + + log_test "delta massive IPv6 rehash test ($tcflags)" +} + +bloom_simple_test() +{ + # Bloom filter requires that the eRP table is used. This test + # verifies that Bloom filter is not harming correctness of ACLs. + # First, make sure that eRP table is used and then set rule patterns + # which are distant enough and will result skipping a lookup after + # consulting the Bloom filter. Although some eRP lookups are skipped, + # the correct filter should be hit. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 5 handle 104 flower \ + $tcflags dst_ip 198.51.100.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Two filters - did not match highest priority" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Single filter - did not match" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Low prio filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 198.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Two filters - did not match highest priority after add" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 5 handle 104 flower + + log_test "bloom simple test ($tcflags)" +} + +bloom_complex_test() +{ + # Bloom filter index computation is affected from region ID, eRP + # ID and from the region key size. In order to exercise those parts + # of the Bloom filter code, use a series of regions, each with a + # different key size and send packet that should hit all of them. + local index + + RET=0 + NUM_CHAINS=4 + BASE_INDEX=100 + + # Create chain with up to 2 key blocks (ip_proto only) + tc chain add dev $h2 ingress chain 1 protocol ip flower \ + ip_proto tcp &> /dev/null + # Create chain with 2-4 key blocks (ip_proto, src MAC) + tc chain add dev $h2 ingress chain 2 protocol ip flower \ + ip_proto tcp \ + src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null + # Create chain with 4-8 key blocks (ip_proto, src & dst MAC, IPv4 dest) + tc chain add dev $h2 ingress chain 3 protocol ip flower \ + ip_proto tcp \ + dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \ + src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \ + dst_ip 0.0.0.0/32 &> /dev/null + # Default chain contains all fields and therefore is 8-12 key blocks + tc chain add dev $h2 ingress chain 4 + + # We need at least 2 rules in every region to have eRP table active + # so create a dummy rule per chain using a different pattern + for i in $(eval echo {0..$NUM_CHAINS}); do + index=$((BASE_INDEX - 1 - i)) + tc filter add dev $h2 ingress chain $i protocol ip \ + pref 2 handle $index flower \ + $tcflags ip_proto tcp action drop + done + + # Add rules to test Bloom filter, each in a different chain + index=$BASE_INDEX + tc filter add dev $h2 ingress protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags dst_ip 192.0.0.0/16 action goto chain 1 + tc filter add dev $h2 ingress chain 1 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags action goto chain 2 + tc filter add dev $h2 ingress chain 2 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags src_mac $h1mac action goto chain 3 + tc filter add dev $h2 ingress chain 3 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags dst_ip 192.0.0.0/8 action goto chain 4 + tc filter add dev $h2 ingress chain 4 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags src_ip 192.0.2.0/24 action drop + + # Send a packet that is supposed to hit all chains + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + for i in $(eval echo {0..$NUM_CHAINS}); do + index=$((BASE_INDEX + i + 1)) + tc_check_packets "dev $h2 ingress" $index 1 + check_err $? "Did not match chain $i" + done + + # Rules cleanup + for i in $(eval echo {$NUM_CHAINS..0}); do + index=$((BASE_INDEX - i - 1)) + tc filter del dev $h2 ingress chain $i \ + pref 2 handle $index flower + index=$((BASE_INDEX + i + 1)) + tc filter del dev $h2 ingress chain $i \ + pref 1 handle $index flower + done + + # Chains cleanup + for i in $(eval echo {$NUM_CHAINS..1}); do + tc chain del dev $h2 ingress chain $i + done + + log_test "bloom complex test ($tcflags)" +} + + +bloom_delta_test() +{ + # When multiple masks are used, the eRP table is activated. When + # masks are close enough (delta) the masks reside on the same + # eRP table. This test verifies that the eRP table is correctly + # allocated and used in delta condition and that Bloom filter is + # still functional with delta. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.1.0.0/16 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.1.2.1 -B 192.1.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Single filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.2.1.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.2.1.1 -B 192.2.1.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Delta filters - did not match second filter" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "bloom delta test ($tcflags)" +} + +max_erp_entries_test() +{ + # The number of eRP entries is limited. Once the maximum number of eRPs + # has been reached, filters cannot be added. This test verifies that + # when this limit is reached, inserstion fails without crashing. + + RET=0 + + local num_masks=32 + local num_regions=15 + local chain_failed + local mask_failed + local ret + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + for ((i=1; i < $num_regions; i++)); do + for ((j=$num_masks; j >= 0; j--)); do + tc filter add dev $h2 ingress chain $i protocol ip \ + pref $i handle $j flower $tcflags \ + dst_ip 192.1.0.0/$j &> /dev/null + ret=$? + + if [ $ret -ne 0 ]; then + chain_failed=$i + mask_failed=$j + break 2 + fi + done + done + + # We expect to exceed the maximum number of eRP entries, so that + # insertion eventually fails. Otherwise, the test should be adjusted to + # add more filters. + check_fail $ret "expected to exceed number of eRP entries" + + for ((; i >= 1; i--)); do + for ((j=0; j <= $num_masks; j++)); do + tc filter del dev $h2 ingress chain $i protocol ip \ + pref $i handle $j flower &> /dev/null + done + done + + log_test "max eRP entries test ($tcflags). " \ + "max chain $chain_failed, mask $mask_failed" +} + +max_group_size_test() +{ + # The number of ACLs in an ACL group is limited. Once the maximum + # number of ACLs has been reached, filters cannot be added. This test + # verifies that when this limit is reached, insertion fails without + # crashing. + + RET=0 + + local num_acls=32 + local max_size + local ret + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + for ((i=1; i < $num_acls; i++)); do + if [[ $(( i % 2 )) == 1 ]]; then + tc filter add dev $h2 ingress pref $i proto ipv4 \ + flower $tcflags dst_ip 198.51.100.1/32 \ + ip_proto tcp tcp_flags 0x01/0x01 \ + action drop &> /dev/null + else + tc filter add dev $h2 ingress pref $i proto ipv6 \ + flower $tcflags dst_ip 2001:db8:1::1/128 \ + action drop &> /dev/null + fi + + ret=$? + [[ $ret -ne 0 ]] && max_size=$((i - 1)) && break + done + + # We expect to exceed the maximum number of ACLs in a group, so that + # insertion eventually fails. Otherwise, the test should be adjusted to + # add more filters. + check_fail $ret "expected to exceed number of ACLs in a group" + + for ((; i >= 1; i--)); do + if [[ $(( i % 2 )) == 1 ]]; then + tc filter del dev $h2 ingress pref $i proto ipv4 \ + flower $tcflags dst_ip 198.51.100.1/32 \ + ip_proto tcp tcp_flags 0x01/0x01 \ + action drop &> /dev/null + else + tc filter del dev $h2 ingress pref $i proto ipv6 \ + flower $tcflags dst_ip 2001:db8:1::1/128 \ + action drop &> /dev/null + fi + done + + log_test "max ACL group size test ($tcflags). max size $max_size" +} + +collision_test() +{ + # Filters cannot share an eRP if in the common unmasked part (i.e., + # without the delta bits) they have the same values. If the driver does + # not prevent such configuration (by spilling into the C-TCAM), then + # multiple entries will be present in the device with the same key, + # leading to collisions and a reduced scale. + # + # Create such a scenario and make sure all the filters are successfully + # added. + + RET=0 + + local ret + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + # Add a single dst_ip/24 filter and multiple dst_ip/32 filters that all + # have the same values in the common unmasked part (dst_ip/24). + + tc filter add dev $h2 ingress pref 1 proto ipv4 handle 101 \ + flower $tcflags dst_ip 198.51.100.0/24 \ + action drop + + for i in {0..255}; do + tc filter add dev $h2 ingress pref 2 proto ipv4 \ + handle $((102 + i)) \ + flower $tcflags dst_ip 198.51.100.${i}/32 \ + action drop + ret=$? + [[ $ret -ne 0 ]] && break + done + + check_err $ret "failed to add all the filters" + + for i in {255..0}; do + tc filter del dev $h2 ingress pref 2 proto ipv4 \ + handle $((102 + i)) flower + done + + tc filter del dev $h2 ingress pref 1 proto ipv4 handle 101 flower + + log_test "collision test ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +if ! tc_offload_check; then + check_err 1 "Could not test offloaded functionality" + log_test "mlxsw-specific tests for tc flower" + exit +else + tcflags="skip_sw" + tests_run +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh new file mode 100644 index 000000000000..4444bbace1a9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../tc_flower_scale.sh + +tc_flower_get_target() +{ + local should_fail=$1; shift + local max_cnts + + # The driver associates a counter with each tc filter, which means the + # number of supported filters is bounded by the number of available + # counters. + max_cnts=$(devlink_resource_size_get counters flow) + + # Remove already allocated counters. + ((max_cnts -= $(devlink_resource_occ_get counters flow))) + + # Each rule uses two counters, for packets and bytes. + ((max_cnts /= 2)) + + if ((! should_fail)); then + echo $max_cnts + else + echo $((max_cnts + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh new file mode 100644 index 000000000000..e79ac0dad1f4 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../tc_police_scale.sh + +tc_police_get_target() +{ + local should_fail=$1; shift + local target + + target=$(devlink_resource_size_get global_policers single_rate_policers) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh new file mode 100755 index 000000000000..fd23c80eba31 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh @@ -0,0 +1,339 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test VxLAN flooding. The device stores flood records in a singly linked list +# where each record stores up to four IPv6 addresses of remote VTEPs. The test +# verifies that packets are correctly flooded in various cases such as deletion +# of a record in the middle of the list. +# +# +-----------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 2001:db8:1::1/64 | +# +----|------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | + vxlan0 (vxlan) | | +# | | local 2001:db8:2::1 | | +# | | remote 2001:db8:2::{2..17} | | +# | | id 10 dstport 4789 | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:2::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | R2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../../net/forwarding + +ALL_TESTS="flooding_test" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +switch_create() +{ + # Make sure the bridge uses the MAC address of the local port and + # not that of the VxLAN's device + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 address $(mac_get $swp1) + + ip link add name vxlan0 type vxlan id 10 nolearning \ + udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \ + local 2001:db8:2::1 dstport 4789 + + ip address add 2001:db8:2::1/128 dev lo + + ip link set dev $swp1 master br0 + ip link set dev vxlan0 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev vxlan0 up +} + +switch_destroy() +{ + ip link set dev vxlan0 down + ip link set dev $swp1 down + ip link set dev br0 down + + ip link set dev vxlan0 nomaster + ip link set dev $swp1 nomaster + + ip address del 2001:db8:2::1/128 dev lo + + ip link del dev vxlan0 + + ip link del dev br0 +} + +router1_create() +{ + # This router is in the default VRF, where the VxLAN device is + # performing the L3 lookup + ip link set dev $rp1 up + ip address add 2001:db8:3::1/64 dev $rp1 + ip route add 2001:db8:2::0/64 via 2001:db8:3::2 +} + +router1_destroy() +{ + ip route del 2001:db8:2::0/64 via 2001:db8:3::2 + ip address del 2001:db8:3::1/64 dev $rp1 + ip link set dev $rp1 down +} + +router2_create() +{ + # This router is not in the default VRF, so use simple_if_init() + simple_if_init $rp2 2001:db8:3::2/64 +} + +router2_destroy() +{ + simple_if_fini $rp2 2001:db8:3::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + + h1_create + + switch_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +flooding_remotes_add() +{ + local num_remotes=$1 + local lsb + local i + + # Prevent unwanted packets from entering the bridge and interfering + # with the test. + tc qdisc add dev br0 clsact + tc filter add dev br0 egress protocol all pref 1 handle 1 \ + matchall skip_hw action drop + tc qdisc add dev $h1 clsact + tc filter add dev $h1 egress protocol all pref 1 handle 1 \ + flower skip_hw dst_mac de:ad:be:ef:13:37 action pass + tc filter add dev $h1 egress protocol all pref 2 handle 2 \ + matchall skip_hw action drop + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \ + dst 2001:db8:2::$lsb + done +} + +flooding_filters_add() +{ + local num_remotes=$1 + local lsb + local i + + tc qdisc add dev $rp2 clsact + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \ + flower ip_proto udp dst_ip 2001:db8:2::$lsb \ + dst_port 4789 skip_sw action drop + done +} + +flooding_filters_del() +{ + local num_remotes=$1 + local i + + for i in $(eval echo {1..$num_remotes}); do + tc filter del dev $rp2 ingress protocol ipv6 pref $i \ + handle $i flower + done + + tc qdisc del dev $rp2 clsact + + tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall + tc filter del dev $h1 egress protocol all pref 1 handle 1 flower + tc qdisc del dev $h1 clsact + tc filter del dev br0 egress protocol all pref 1 handle 1 matchall + tc qdisc del dev br0 clsact +} + +flooding_check_packets() +{ + local packets=("$@") + local num_remotes=${#packets[@]} + local i + + for i in $(eval echo {1..$num_remotes}); do + tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]} + check_err $? "remote $i - did not get expected number of packets" + done +} + +flooding_test() +{ + # Use 16 remote VTEPs that will be stored in 4 records. The array + # 'packets' will store how many packets are expected to be received + # by each remote VTEP at each stage of the test + declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1) + local num_remotes=16 + + RET=0 + + # Add FDB entries for remote VTEPs and corresponding tc filters on the + # ingress of the nexthop router. These filters will count how many + # packets were flooded to each remote VTEP + flooding_remotes_add $num_remotes + flooding_filters_add $num_remotes + + # Send one packet and make sure it is flooded to all the remote VTEPs + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 1 packet" + + # Delete the third record which corresponds to VTEPs with LSB 10..13 + # and check that packet is flooded correctly when we remove a record + # from the middle of the list + RET=0 + + packets=(2 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 2 packets" + + # Delete the first record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 2 3 3 3 3 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 3 packets" + + # Delete the last record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 2 4 4 4 4 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 4 packets" + + # Delete the last record, one entry at a time and make sure single + # entries are correctly removed + RET=0 + + packets=(2 2 2 2 4 5 5 5 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 5 packets" + + RET=0 + + packets=(2 2 2 2 4 5 6 6 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 6 packets" + + RET=0 + + packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 7 packets" + + RET=0 + + packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 8 packets" + + flooding_filters_del $num_remotes +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh new file mode 100644 index 000000000000..06a80f40daa4 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source "../../../../net/forwarding/devlink_lib.sh" +source ../mlxsw_lib.sh + +mlxsw_only_on_spectrum 1 || exit 1 + +# Needed for returning to default +declare -A KVD_DEFAULTS + +KVD_CHILDREN="linear hash_single hash_double" +KVDL_CHILDREN="singles chunks large_chunks" + +devlink_sp_resource_minimize() +{ + local size + local i + + for i in $KVD_CHILDREN; do + size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]') + devlink_resource_size_set "$size" kvd "$i" + done + + for i in $KVDL_CHILDREN; do + size=$(devlink_resource_get kvd linear "$i" | \ + jq '.["size_min"]') + devlink_resource_size_set "$size" kvd linear "$i" + done +} + +devlink_sp_size_kvd_to_default() +{ + local need_reload=0 + local i + + for i in $KVD_CHILDREN; do + local size=$(echo "${KVD_DEFAULTS[kvd_$i]}" | jq '.["size"]') + current_size=$(devlink_resource_size_get kvd "$i") + + if [ "$size" -ne "$current_size" ]; then + devlink_resource_size_set "$size" kvd "$i" + need_reload=1 + fi + done + + for i in $KVDL_CHILDREN; do + local size=$(echo "${KVD_DEFAULTS[kvd_linear_$i]}" | \ + jq '.["size"]') + current_size=$(devlink_resource_size_get kvd linear "$i") + + if [ "$size" -ne "$current_size" ]; then + devlink_resource_size_set "$size" kvd linear "$i" + need_reload=1 + fi + done + + if [ "$need_reload" -ne "0" ]; then + devlink_reload + fi +} + +devlink_sp_read_kvd_defaults() +{ + local key + local i + + KVD_DEFAULTS[kvd]=$(devlink_resource_get "kvd") + for i in $KVD_CHILDREN; do + key=kvd_$i + KVD_DEFAULTS[$key]=$(devlink_resource_get kvd "$i") + done + + for i in $KVDL_CHILDREN; do + key=kvd_linear_$i + KVD_DEFAULTS[$key]=$(devlink_resource_get kvd linear "$i") + done +} + +KVD_PROFILES="default scale ipv4_max" + +devlink_sp_resource_kvd_profile_set() +{ + local profile=$1 + + case "$profile" in + scale) + devlink_resource_size_set 64000 kvd linear + devlink_resource_size_set 15616 kvd linear singles + devlink_resource_size_set 32000 kvd linear chunks + devlink_resource_size_set 16384 kvd linear large_chunks + devlink_resource_size_set 128000 kvd hash_single + devlink_resource_size_set 48000 kvd hash_double + devlink_reload + ;; + ipv4_max) + devlink_resource_size_set 64000 kvd linear + devlink_resource_size_set 15616 kvd linear singles + devlink_resource_size_set 32000 kvd linear chunks + devlink_resource_size_set 16384 kvd linear large_chunks + devlink_resource_size_set 144000 kvd hash_single + devlink_resource_size_set 32768 kvd hash_double + devlink_reload + ;; + default) + devlink_resource_size_set 98304 kvd linear + devlink_resource_size_set 16384 kvd linear singles + devlink_resource_size_set 49152 kvd linear chunks + devlink_resource_size_set 32768 kvd linear large_chunks + devlink_resource_size_set 87040 kvd hash_single + devlink_resource_size_set 60416 kvd hash_double + devlink_reload + ;; + *) + check_err 1 "Unknown profile $profile" + esac +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh new file mode 100755 index 000000000000..6f2683cbc7d5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../../net/forwarding + +NUM_NETIFS=1 +source $lib_dir/lib.sh +source devlink_lib_spectrum.sh + +setup_prepare() +{ + devlink_sp_read_kvd_defaults +} + +cleanup() +{ + pre_cleanup + devlink_sp_size_kvd_to_default +} + +trap cleanup EXIT + +setup_prepare + +profiles_test() +{ + local i + + log_info "Running profile tests" + + for i in $KVD_PROFILES; do + RET=0 + devlink_sp_resource_kvd_profile_set $i + log_test "'$i' profile" + done + + # Default is explicitly tested at end to ensure it's actually applied + RET=0 + devlink_sp_resource_kvd_profile_set "default" + log_test "'default' profile" +} + +resources_min_test() +{ + local size + local i + local j + + log_info "Running KVD-minimum tests" + + for i in $KVD_CHILDREN; do + RET=0 + size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]') + devlink_resource_size_set "$size" kvd "$i" + + # In case of linear, need to minimize sub-resources as well + if [[ "$i" == "linear" ]]; then + for j in $KVDL_CHILDREN; do + devlink_resource_size_set 0 kvd linear "$j" + done + fi + + devlink_reload + devlink_sp_size_kvd_to_default + log_test "'$i' minimize [$size]" + done +} + +resources_max_test() +{ + local min_size + local size + local i + local j + + log_info "Running KVD-maximum tests" + for i in $KVD_CHILDREN; do + RET=0 + devlink_sp_resource_minimize + + # Calculate the maximum possible size for the given partition + size=$(devlink_resource_size_get kvd) + for j in $KVD_CHILDREN; do + if [ "$i" != "$j" ]; then + min_size=$(devlink_resource_get kvd "$j" | \ + jq '.["size_min"]') + size=$((size - min_size)) + fi + done + + # Test almost maximum size + devlink_resource_size_set "$((size - 128))" kvd "$i" + devlink_reload + log_test "'$i' almost maximize [$((size - 128))]" + + # Test above maximum size + devlink resource set "$DEVLINK_DEV" \ + path "kvd/$i" size $((size + 128)) &> /dev/null + check_fail $? "Set kvd/$i to size $((size + 128)) should fail" + log_test "'$i' Overflow rejection [$((size + 128))]" + + # Test maximum size + if [ "$i" == "hash_single" ] || [ "$i" == "hash_double" ]; then + echo "SKIP: Observed problem with exact max $i" + continue + fi + + devlink_resource_size_set "$size" kvd "$i" + devlink_reload + log_test "'$i' maximize [$size]" + + devlink_sp_size_kvd_to_default + done +} + +profiles_test +resources_min_test +resources_max_test + +exit "$RET" diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh new file mode 100644 index 000000000000..f7c168decd1e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../mirror_gre_scale.sh + +mirror_gre_get_target() +{ + local should_fail=$1; shift + local target + + target=$(devlink_resource_size_get span_agents) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh new file mode 100644 index 000000000000..d0847e8ea270 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../port_range_scale.sh + +port_range_get_target() +{ + local should_fail=$1; shift + local target + + target=$(devlink_resource_size_get port_range_registers) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh new file mode 100644 index 000000000000..0b71dfbbb447 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../port_scale.sh + +port_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get physical_ports) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh new file mode 100755 index 000000000000..60753d46a2d4 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../../net/forwarding + +VXPORT=4789 + +ALL_TESTS=" + create_vxlan_on_top_of_8021ad_bridge +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +create_vxlan_on_top_of_8021ad_bridge() +{ + RET=0 + + ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + + ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \ + "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx100 up + + ip link set dev $swp1 master br0 + ip link set dev vx100 master br0 + + bridge vlan add vid 100 dev vx100 pvid untagged 2>/dev/null + check_fail $? "802.1ad bridge with VxLAN in Spectrum-1 not rejected" + + bridge vlan add vid 100 dev vx100 pvid untagged 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "802.1ad bridge with VxLAN in Spectrum-1 rejected without extack" + + log_test "create VxLAN on top of 802.1ad bridge" + + ip link del dev vx100 + ip link del dev br0 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh new file mode 100755 index 000000000000..7b98cdd0580d --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source devlink_lib_spectrum.sh + +current_test="" + +cleanup() +{ + pre_cleanup + if [ ! -z $current_test ]; then + ${current_test}_cleanup + fi + devlink_sp_size_kvd_to_default +} + +devlink_sp_read_kvd_defaults +trap cleanup EXIT + +ALL_TESTS=" + router + tc_flower + mirror_gre + tc_police + port + rif_mac_profile + rif_counter + port_range +" + +for current_test in ${TESTS:-$ALL_TESTS}; do + RET_FIN=0 + source ${current_test}_scale.sh + + num_netifs_var=${current_test^^}_NUM_NETIFS + num_netifs=${!num_netifs_var:-$NUM_NETIFS} + + for profile in $KVD_PROFILES; do + RET=0 + devlink_sp_resource_kvd_profile_set $profile + if [[ $RET -gt 0 ]]; then + log_test "'$current_test' [$profile] setting" + continue + fi + + for should_fail in 0 1; do + RET=0 + target=$(${current_test}_get_target "$should_fail") + if ((target == 0)); then + continue + fi + ${current_test}_setup_prepare + setup_wait_n $num_netifs + # Update target in case occupancy of a certain resource + # changed following the test setup. + target=$(${current_test}_get_target "$should_fail") + ${current_test}_test "$target" "$should_fail" + if [[ "$should_fail" -eq 0 ]]; then + log_test "'$current_test' [$profile] $target" + + if ((!RET)); then + tt=${current_test}_traffic_test + if [[ $(type -t $tt) == "function" ]] + then + $tt "$target" + log_test "'$current_test' [$profile] $target traffic test" + fi + fi + else + log_test "'$current_test' [$profile] overflow $target" + fi + ${current_test}_cleanup $target + RET_FIN=$(( RET_FIN || RET )) + done + done +done +current_test="" + +exit "$RET_FIN" diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh new file mode 100644 index 000000000000..d44536276e8a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../rif_counter_scale.sh + +rif_counter_get_target() +{ + local should_fail=$1; shift + local max_cnts + local max_rifs + local target + + max_rifs=$(devlink_resource_size_get rifs) + max_cnts=$(devlink_resource_size_get counters rif) + + # Remove already allocated RIFs. + ((max_rifs -= $(devlink_resource_occ_get rifs))) + + # 10 KVD slots per counter, ingress+egress counters per RIF + ((max_cnts /= 20)) + + # Pointless to run the overflow test if we don't have enough RIFs to + # host all the counters. + if ((max_cnts > max_rifs && should_fail)); then + echo 0 + return + fi + + target=$((max_rifs < max_cnts ? max_rifs : max_cnts)) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh new file mode 100644 index 000000000000..303d7cbe3c45 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../rif_mac_profile_scale.sh + +rif_mac_profile_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get rif_mac_profiles) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh new file mode 100644 index 000000000000..21c4697d5bab --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../router_scale.sh + +router_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get kvd hash_single) + + if [[ $should_fail -eq 0 ]]; then + target=$((target * 85 / 100)) + else + target=$((target + 1)) + fi + + echo $target +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh new file mode 100644 index 000000000000..f9bfd8937765 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../tc_flower_scale.sh + +tc_flower_get_target() +{ + local should_fail=$1; shift + + # 6144 (6x1024) is the theoretical maximum. + # One bank of 512 rules is taken by the 18-byte MC router rule. + # One rule is the ACL catch-all. + # 6144 - 512 - 1 = 5631 + local target=5631 + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh new file mode 100644 index 000000000000..e79ac0dad1f4 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../tc_police_scale.sh + +tc_police_get_target() +{ + local should_fail=$1; shift + local target + + target=$(devlink_resource_size_get global_policers single_rate_policers) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh new file mode 100755 index 000000000000..d8fd875ad527 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh @@ -0,0 +1,334 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test VxLAN flooding. The device stores flood records in a singly linked list +# where each record stores up to five IPv6 addresses of remote VTEPs. The test +# verifies that packets are correctly flooded in various cases such as deletion +# of a record in the middle of the list. +# +# +-----------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 2001:db8:1::1/64 | +# +----|------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | + vxlan0 (vxlan) | | +# | | local 2001:db8:2::1 | | +# | | remote 2001:db8:2::{2..21} | | +# | | id 10 dstport 4789 | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:2::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | R2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../../net/forwarding + +ALL_TESTS="flooding_test" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +switch_create() +{ + # Make sure the bridge uses the MAC address of the local port and + # not that of the VxLAN's device + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 address $(mac_get $swp1) + + ip link add name vxlan0 type vxlan id 10 nolearning \ + udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \ + local 2001:db8:2::1 dstport 4789 + + ip address add 2001:db8:2::1/128 dev lo + + ip link set dev $swp1 master br0 + ip link set dev vxlan0 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev vxlan0 up +} + +switch_destroy() +{ + ip link set dev vxlan0 down + ip link set dev $swp1 down + ip link set dev br0 down + + ip link set dev vxlan0 nomaster + ip link set dev $swp1 nomaster + + ip address del 2001:db8:2::1/128 dev lo + + ip link del dev vxlan0 + + ip link del dev br0 +} + +router1_create() +{ + # This router is in the default VRF, where the VxLAN device is + # performing the L3 lookup + ip link set dev $rp1 up + ip address add 2001:db8:3::1/64 dev $rp1 + ip route add 2001:db8:2::0/64 via 2001:db8:3::2 +} + +router1_destroy() +{ + ip route del 2001:db8:2::0/64 via 2001:db8:3::2 + ip address del 2001:db8:3::1/64 dev $rp1 + ip link set dev $rp1 down +} + +router2_create() +{ + # This router is not in the default VRF, so use simple_if_init() + simple_if_init $rp2 2001:db8:3::2/64 +} + +router2_destroy() +{ + simple_if_fini $rp2 2001:db8:3::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + + h1_create + + switch_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +flooding_remotes_add() +{ + local num_remotes=$1 + local lsb + local i + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \ + dst 2001:db8:2::$lsb + done +} + +flooding_filters_add() +{ + local num_remotes=$1 + local lsb + local i + + tc qdisc add dev $rp2 clsact + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \ + flower ip_proto udp dst_ip 2001:db8:2::$lsb \ + dst_port 4789 skip_sw action drop + done +} + +flooding_filters_del() +{ + local num_remotes=$1 + local i + + for i in $(eval echo {1..$num_remotes}); do + tc filter del dev $rp2 ingress protocol ipv6 pref $i \ + handle $i flower + done + + tc qdisc del dev $rp2 clsact +} + +flooding_check_packets() +{ + local packets=("$@") + local num_remotes=${#packets[@]} + local i + + for i in $(eval echo {1..$num_remotes}); do + tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]} + check_err $? "remote $i - did not get expected number of packets" + done +} + +flooding_test() +{ + # Use 20 remote VTEPs that will be stored in 4 records. The array + # 'packets' will store how many packets are expected to be received + # by each remote VTEP at each stage of the test + declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1) + local num_remotes=20 + + RET=0 + + # Add FDB entries for remote VTEPs and corresponding tc filters on the + # ingress of the nexthop router. These filters will count how many + # packets were flooded to each remote VTEP + flooding_remotes_add $num_remotes + flooding_filters_add $num_remotes + + # Send one packet and make sure it is flooded to all the remote VTEPs + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 1 packet" + + # Delete the third record which corresponds to VTEPs with LSB 12..16 + # and check that packet is flooded correctly when we remove a record + # from the middle of the list + RET=0 + + packets=(2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 2 2 2 2 2) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 2 packets" + + # Delete the first record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 2 2 3 3 3 3 3 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 3 packets" + + # Delete the last record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 2 2 4 4 4 4 4 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::18 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::19 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::20 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::21 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 4 packets" + + # Delete the last record, one entry at a time and make sure single + # entries are correctly removed + RET=0 + + packets=(2 2 2 2 2 4 5 5 5 5 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 5 packets" + + RET=0 + + packets=(2 2 2 2 2 4 5 6 6 6 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 6 packets" + + RET=0 + + packets=(2 2 2 2 2 4 5 6 7 7 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 7 packets" + + RET=0 + + packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 8 packets" + + RET=0 + + packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 9 packets" + + flooding_filters_del $num_remotes +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh new file mode 100755 index 000000000000..20ed98fe5a60 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + default_hw_stats_test + immediate_hw_stats_test + delayed_hw_stats_test + disabled_hw_stats_test +" +NUM_NETIFS=2 + +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +hw_stats_test() +{ + RET=0 + + local name=$1 + local action_hw_stats=$2 + local occ_delta=$3 + local expected_packet_count=$4 + + local orig_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]') + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop $action_hw_stats + check_err $? "Failed to add rule with $name hw_stats" + + local new_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]') + local expected_occ=$((orig_occ + occ_delta)) + [ "$new_occ" == "$expected_occ" ] + check_err $? "Expected occupancy of $expected_occ, got $new_occ" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $swp1mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 101 $expected_packet_count + check_err $? "Did not match incoming packet" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + log_test "$name hw_stats" +} + +default_hw_stats_test() +{ + hw_stats_test "default" "" 2 1 +} + +immediate_hw_stats_test() +{ + hw_stats_test "immediate" "hw_stats immediate" 2 1 +} + +delayed_hw_stats_test() +{ + RET=0 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop hw_stats delayed + check_fail $? "Unexpected success in adding rule with delayed hw_stats" + + log_test "delayed hw_stats" +} + +disabled_hw_stats_test() +{ + hw_stats_test "disabled" "hw_stats disabled" 0 0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + h1mac=$(mac_get $h1) + swp1mac=$(mac_get $swp1) + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +check_tc_action_hw_stats_support + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh new file mode 100644 index 000000000000..d3d9e60d6ddf --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh @@ -0,0 +1,140 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for resource limit of offloaded flower rules. The test adds a given +# number of flower matches for different IPv6 addresses, then check the offload +# indication for all of the tc flower rules. This file contains functions to set +# up a testing topology and run the test, and is meant to be sourced from a test +# script that calls the testing routine with a given number of rules. + +TC_FLOWER_NUM_NETIFS=2 + +tc_flower_h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact +} + +tc_flower_h1_destroy() +{ + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +tc_flower_h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact +} + +tc_flower_h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +tc_flower_setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + vrf_prepare + + tc_flower_h1_create + tc_flower_h2_create +} + +tc_flower_cleanup() +{ + pre_cleanup + + tc_flower_h2_destroy + tc_flower_h1_destroy + + vrf_cleanup + + if [[ -v TC_FLOWER_BATCH_FILE ]]; then + rm -f $TC_FLOWER_BATCH_FILE + fi +} + +tc_flower_addr() +{ + local num=$1; shift + + printf "2001:db8:1::%x" $num +} + +tc_flower_rules_create() +{ + local count=$1; shift + local should_fail=$1; shift + + TC_FLOWER_BATCH_FILE="$(mktemp)" + + for ((i = 0; i < count; ++i)); do + cat >> $TC_FLOWER_BATCH_FILE <<-EOF + filter add dev $h2 ingress \ + prot ipv6 \ + pref 1000 \ + handle 42$i \ + flower $tcflags dst_ip $(tc_flower_addr $i) \ + action drop + EOF + done + + tc -b $TC_FLOWER_BATCH_FILE + check_err_fail $should_fail $? "Rule insertion" +} + +__tc_flower_test() +{ + local count=$1; shift + local should_fail=$1; shift + local last=$((count - 1)) + + tc_flower_rules_create $count $should_fail + + offload_count=$(tc -j -s filter show dev $h2 ingress | + jq -r '[ .[] | select(.kind == "flower") | + .options | .in_hw ]' | jq .[] | wc -l) + [[ $((offload_count - 1)) -eq $count ]] + check_err_fail $should_fail $? "Attempt to offload $count rules (actual result $((offload_count - 1)))" +} + +tc_flower_test() +{ + local count=$1; shift + local should_fail=$1; shift + + # We use lower 16 bits of IPv6 address for match. Also there are only 16 + # bits of rule priority space. + if ((count > 65536)); then + check_err 1 "Invalid count of $count. At most 65536 rules supported" + return + fi + + if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then + check_err 1 "Could not test offloaded functionality" + return + fi + + tcflags="skip_sw" + __tc_flower_test $count $should_fail +} + +tc_flower_traffic_test() +{ + local count=$1; shift + local i; + + for ((i = count - 1; i > 0; i /= 2)); do + $MZ -6 $h1 -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \ + -A $(tc_flower_addr 0) -B $(tc_flower_addr $i) \ + -q -t udp sp=54321,dp=12345 + done + for ((i = count - 1; i > 0; i /= 2)); do + tc_check_packets "dev $h2 ingress" 42$i 1 + check_err $? "Traffic not seen at rule #$i" + done +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh new file mode 100755 index 000000000000..448b75c1545a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that policers shared by different tc filters are correctly reference +# counted by observing policers' occupancy via devlink-resource. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + tc_police_occ_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +switch_create() +{ + simple_if_init $swp1 + tc qdisc add dev $swp1 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +tc_police_occ_get() +{ + devlink_resource_occ_get global_policers single_rate_policers +} + +tc_police_occ_test() +{ + RET=0 + + local occ=$(tc_police_occ_get) + + tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \ + flower skip_sw \ + action police rate 100mbit burst 100k conform-exceed drop/ok + (( occ + 1 == $(tc_police_occ_get) )) + check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))" + + tc filter del dev $swp1 ingress pref 1 handle 101 flower + (( occ == $(tc_police_occ_get) )) + check_err $? "Got occupancy $(tc_police_occ_get), expected $occ" + + tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \ + flower skip_sw \ + action police rate 100mbit burst 100k conform-exceed drop/ok \ + index 10 + tc filter add dev $swp1 ingress pref 2 handle 102 proto ip \ + flower skip_sw action police index 10 + + (( occ + 1 == $(tc_police_occ_get) )) + check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))" + + tc filter del dev $swp1 ingress pref 2 handle 102 flower + (( occ + 1 == $(tc_police_occ_get) )) + check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))" + + tc filter del dev $swp1 ingress pref 1 handle 101 flower + (( occ == $(tc_police_occ_get) )) + check_err $? "Got occupancy $(tc_police_occ_get), expected $occ" + + log_test "tc police occupancy" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh new file mode 100644 index 000000000000..86e787895f78 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: GPL-2.0 + +TC_POLICE_NUM_NETIFS=2 + +tc_police_h1_create() +{ + simple_if_init $h1 +} + +tc_police_h1_destroy() +{ + simple_if_fini $h1 +} + +tc_police_switch_create() +{ + simple_if_init $swp1 + tc qdisc add dev $swp1 clsact +} + +tc_police_switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 +} + +tc_police_addr() +{ + local num=$1; shift + + printf "2001:db8:1::%x" $num +} + +tc_police_rules_create() +{ + local count=$1; shift + local should_fail=$1; shift + + TC_POLICE_BATCH_FILE="$(mktemp)" + + for ((i = 0; i < count; ++i)); do + cat >> $TC_POLICE_BATCH_FILE <<-EOF + filter add dev $swp1 ingress \ + prot ipv6 \ + pref 1000 \ + flower skip_sw dst_ip $(tc_police_addr $i) \ + action police rate 10mbit burst 100k \ + conform-exceed drop/ok + EOF + done + + tc -b $TC_POLICE_BATCH_FILE + check_err_fail $should_fail $? "Rule insertion" +} + +__tc_police_test() +{ + local count=$1; shift + local should_fail=$1; shift + + tc_police_rules_create $count $should_fail + + offload_count=$(tc -j filter show dev $swp1 ingress | + jq "[.[] | select(.options.in_hw == true)] | length") + ((offload_count == count)) + check_err_fail $should_fail $? "tc police offload count" +} + +tc_police_test() +{ + local count=$1; shift + local should_fail=$1; shift + + if ! tc_offload_check $TC_POLICE_NUM_NETIFS; then + check_err 1 "Could not test offloaded functionality" + return + fi + + __tc_police_test $count $should_fail +} + +tc_police_setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + tc_police_h1_create + tc_police_switch_create +} + +tc_police_cleanup() +{ + pre_cleanup + + tc_police_switch_destroy + tc_police_h1_destroy + + vrf_cleanup +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh new file mode 100755 index 000000000000..0441a18f098b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh @@ -0,0 +1,414 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + shared_block_drop_test + egress_redirect_test + multi_mirror_test + matchall_sample_egress_test + matchall_mirror_behind_flower_ingress_test + matchall_sample_behind_flower_ingress_test + matchall_mirror_behind_flower_egress_test + matchall_proto_match_test + police_limits_test + multi_police_test +" +NUM_NETIFS=2 + +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source mlxsw_lib.sh + +switch_create() +{ + simple_if_init $swp1 192.0.2.1/24 + simple_if_init $swp2 192.0.2.2/24 +} + +switch_destroy() +{ + simple_if_fini $swp2 192.0.2.2/24 + simple_if_fini $swp1 192.0.2.1/24 +} + +shared_block_drop_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have mixed-bound + # shared block with a drop rule. + + tc qdisc add dev $swp1 ingress_block 22 clsact + check_err $? "Failed to create clsact with ingress block" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_err $? "Failed to add drop rule to ingress bound block" + + tc qdisc add dev $swp2 ingress_block 22 clsact + check_err $? "Failed to create another clsact with ingress shared block" + + tc qdisc del dev $swp2 clsact + + tc qdisc add dev $swp2 egress_block 22 clsact + check_fail $? "Incorrect success to create another clsact with egress shared block" + + tc filter del block 22 protocol ip pref 1 handle 101 flower + + tc qdisc add dev $swp2 egress_block 22 clsact + check_err $? "Failed to create another clsact with egress shared block after blocker drop rule removed" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_fail $? "Incorrect success to add drop rule to mixed bound block" + + tc qdisc del dev $swp1 clsact + + tc qdisc add dev $swp1 egress_block 22 clsact + check_err $? "Failed to create another clsact with egress shared block" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_err $? "Failed to add drop rule to egress bound shared block" + + tc filter del block 22 protocol ip pref 1 handle 101 flower + + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + log_test "shared block drop" +} + +egress_redirect_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have mirred redirect on + # egress-bound block. + + tc qdisc add dev $swp1 ingress_block 22 clsact + check_err $? "Failed to create clsact with ingress block" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress redirect dev $swp2 + check_err $? "Failed to add redirect rule to ingress bound block" + + tc qdisc add dev $swp2 ingress_block 22 clsact + check_err $? "Failed to create another clsact with ingress shared block" + + tc qdisc del dev $swp2 clsact + + tc qdisc add dev $swp2 egress_block 22 clsact + check_fail $? "Incorrect success to create another clsact with egress shared block" + + tc filter del block 22 protocol ip pref 1 handle 101 flower + + tc qdisc add dev $swp2 egress_block 22 clsact + check_err $? "Failed to create another clsact with egress shared block after blocker redirect rule removed" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress redirect dev $swp2 + check_fail $? "Incorrect success to add redirect rule to mixed bound block" + + tc qdisc del dev $swp1 clsact + + tc qdisc add dev $swp1 egress_block 22 clsact + check_err $? "Failed to create another clsact with egress shared block" + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress redirect dev $swp2 + check_fail $? "Incorrect success to add redirect rule to egress bound shared block" + + tc qdisc del dev $swp2 clsact + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress redirect dev $swp2 + check_fail $? "Incorrect success to add redirect rule to egress bound block" + + tc qdisc del dev $swp1 clsact + + log_test "shared block drop" +} + +multi_mirror_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have multiple mirror + # actions in a single rule. + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress mirror dev $swp2 + check_err $? "Failed to add rule with single mirror action" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 \ + action mirred egress mirror dev $swp2 \ + action mirred egress mirror dev $swp1 + check_fail $? "Incorrect success to add rule with two mirror actions" + + tc qdisc del dev $swp1 clsact + + log_test "multi mirror" +} + +matchall_sample_egress_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have matchall with sample action + # bound on egress. Spectrum-1 specific restriction + mlxsw_only_on_spectrum 1 || return + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress protocol all pref 1 handle 101 \ + matchall skip_sw action sample rate 100 group 1 + check_err $? "Failed to add rule with sample action on ingress" + + tc filter del dev $swp1 ingress protocol all pref 1 handle 101 matchall + + tc filter add dev $swp1 egress protocol all pref 1 handle 101 \ + matchall skip_sw action sample rate 100 group 1 + check_fail $? "Incorrect success to add rule with sample action on egress" + + tc qdisc del dev $swp1 clsact + + log_test "matchall sample egress" +} + +matchall_behind_flower_ingress_test() +{ + local action=$1 + local action_args=$2 + + RET=0 + + # On ingress, all matchall-mirror and matchall-sample + # rules have to be in front of the flower rules + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + + tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \ + matchall skip_sw action $action_args + check_err $? "Failed to add matchall rule in front of a flower rule" + + tc filter del dev $swp1 ingress protocol all pref 9 handle 102 matchall + + tc filter add dev $swp1 ingress protocol all pref 11 handle 102 \ + matchall skip_sw action $action_args + check_fail $? "Incorrect success to add matchall rule behind a flower rule" + + tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower + + tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \ + matchall skip_sw action $action_args + + tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_err $? "Failed to add flower rule behind a matchall rule" + + tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower + + tc filter add dev $swp1 ingress protocol ip pref 8 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_fail $? "Incorrect success to add flower rule in front of a matchall rule" + + tc qdisc del dev $swp1 clsact + + log_test "matchall $action flower ingress" +} + +matchall_mirror_behind_flower_ingress_test() +{ + matchall_behind_flower_ingress_test "mirror" "mirred egress mirror dev $swp2" +} + +matchall_sample_behind_flower_ingress_test() +{ + matchall_behind_flower_ingress_test "sample" "sample rate 100 group 1" +} + +matchall_behind_flower_egress_test() +{ + local action=$1 + local action_args=$2 + + RET=0 + + # On egress, all matchall-mirror rules have to be behind the flower rules + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + + tc filter add dev $swp1 egress protocol all pref 11 handle 102 \ + matchall skip_sw action $action_args + check_err $? "Failed to add matchall rule in front of a flower rule" + + tc filter del dev $swp1 egress protocol all pref 11 handle 102 matchall + + tc filter add dev $swp1 egress protocol all pref 9 handle 102 \ + matchall skip_sw action $action_args + check_fail $? "Incorrect success to add matchall rule behind a flower rule" + + tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower + + tc filter add dev $swp1 egress protocol all pref 11 handle 102 \ + matchall skip_sw action $action_args + + tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_err $? "Failed to add flower rule behind a matchall rule" + + tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower + + tc filter add dev $swp1 egress protocol ip pref 12 handle 101 flower \ + skip_sw dst_ip 192.0.2.2 action drop + check_fail $? "Incorrect success to add flower rule in front of a matchall rule" + + tc qdisc del dev $swp1 clsact + + log_test "matchall $action flower egress" +} + +matchall_mirror_behind_flower_egress_test() +{ + matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2" +} + +matchall_proto_match_test() +{ + RET=0 + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + matchall skip_sw \ + action sample group 1 rate 100 + check_fail $? "Incorrect success to add matchall rule with protocol match" + + tc qdisc del dev $swp1 clsact + + log_test "matchall protocol match" +} + +police_limits_test() +{ + RET=0 + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + flower skip_sw \ + action police rate 0.5kbit burst 1m conform-exceed drop/ok + check_fail $? "Incorrect success to add police action with too low rate" + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + flower skip_sw \ + action police rate 2.5tbit burst 1g conform-exceed drop/ok + check_fail $? "Incorrect success to add police action with too high rate" + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + flower skip_sw \ + action police rate 1.5kbit burst 1m conform-exceed drop/ok + check_err $? "Failed to add police action with low rate" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + flower skip_sw \ + action police rate 1.9tbit burst 1g conform-exceed drop/ok + check_err $? "Failed to add police action with high rate" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + flower skip_sw \ + action police rate 1.5kbit burst 512b conform-exceed drop/ok + check_fail $? "Incorrect success to add police action with too low burst size" + + tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \ + flower skip_sw \ + action police rate 1.5kbit burst 2k conform-exceed drop/ok + check_err $? "Failed to add police action with low burst size" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + tc qdisc del dev $swp1 clsact + + log_test "police rate and burst limits" +} + +multi_police_test() +{ + RET=0 + + # It is forbidden in mlxsw driver to have multiple police + # actions in a single rule. + + tc qdisc add dev $swp1 clsact + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \ + flower skip_sw \ + action police rate 100mbit burst 100k conform-exceed drop/ok + check_err $? "Failed to add rule with single police action" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \ + flower skip_sw \ + action police rate 100mbit burst 100k conform-exceed drop/pipe \ + action police rate 200mbit burst 200k conform-exceed drop/ok + check_fail $? "Incorrect success to add rule with two police actions" + + tc qdisc del dev $swp1 clsact + + log_test "multi police" +} + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + vrf_prepare + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + vrf_cleanup +} + +check_tc_shblock_support + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh new file mode 100755 index 000000000000..bc7ea2df49fb --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh @@ -0,0 +1,658 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that packets are sampled when tc-sample is used and that reported +# metadata is correct. Two sets of hosts (with and without LAG) are used, since +# metadata extraction in mlxsw is a bit different when LAG is involved. +# +# +---------------------------------+ +---------------------------------+ +# | H1 (vrf) | | H3 (vrf) | +# | + $h1 | | + $h3_lag | +# | | 192.0.2.1/28 | | | 192.0.2.17/28 | +# | | | | | | +# | | default via 192.0.2.2 | | | default via 192.0.2.18 | +# +----|----------------------------+ +----|----------------------------+ +# | | +# +----|-----------------------------------------|----------------------------+ +# | | 192.0.2.2/28 | 192.0.2.18/28 | +# | + $rp1 + $rp3_lag | +# | | +# | + $rp2 + $rp4_lag | +# | | 198.51.100.2/28 | 198.51.100.18/28 | +# +----|-----------------------------------------|----------------------------+ +# | | +# +----|----------------------------+ +----|----------------------------+ +# | | default via 198.51.100.2 | | | default via 198.51.100.18 | +# | | | | | | +# | | 198.51.100.1/28 | | | 198.51.100.17/28 | +# | + $h2 | | + $h4_lag | +# | H2 (vrf) | | H4 (vrf) | +# +---------------------------------+ +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + tc_sample_rate_test + tc_sample_max_rate_test + tc_sample_conflict_test + tc_sample_group_conflict_test + tc_sample_md_iif_test + tc_sample_md_lag_iif_test + tc_sample_md_oif_test + tc_sample_md_lag_oif_test + tc_sample_md_out_tc_test + tc_sample_md_out_tc_occ_test + tc_sample_md_latency_test + tc_sample_acl_group_conflict_test + tc_sample_acl_rate_test + tc_sample_acl_max_rate_test +" +NUM_NETIFS=8 +CAPTURE_FILE=$(mktemp) +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source mlxsw_lib.sh + +# Available at https://github.com/Mellanox/libpsample +require_command psample + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 +} + +h1_destroy() +{ + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/28 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 +} + +h2_destroy() +{ + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/28 +} + +h3_create() +{ + ip link set dev $h3 down + ip link add name ${h3}_bond type bond mode 802.3ad + ip link set dev $h3 master ${h3}_bond + + simple_if_init ${h3}_bond 192.0.2.17/28 + + ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18 +} + +h3_destroy() +{ + ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18 + + simple_if_fini ${h3}_bond 192.0.2.17/28 + + ip link set dev $h3 nomaster + ip link del dev ${h3}_bond +} + +h4_create() +{ + ip link set dev $h4 down + ip link add name ${h4}_bond type bond mode 802.3ad + ip link set dev $h4 master ${h4}_bond + + simple_if_init ${h4}_bond 198.51.100.17/28 + + ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18 +} + +h4_destroy() +{ + ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18 + + simple_if_fini ${h4}_bond 198.51.100.17/28 + + ip link set dev $h4 nomaster + ip link del dev ${h4}_bond +} + +router_create() +{ + ip link set dev $rp1 up + __addr_add_del $rp1 add 192.0.2.2/28 + tc qdisc add dev $rp1 clsact + + ip link set dev $rp2 up + __addr_add_del $rp2 add 198.51.100.2/28 + tc qdisc add dev $rp2 clsact + + ip link add name ${rp3}_bond type bond mode 802.3ad + ip link set dev $rp3 master ${rp3}_bond + __addr_add_del ${rp3}_bond add 192.0.2.18/28 + tc qdisc add dev $rp3 clsact + ip link set dev ${rp3}_bond up + + ip link add name ${rp4}_bond type bond mode 802.3ad + ip link set dev $rp4 master ${rp4}_bond + __addr_add_del ${rp4}_bond add 198.51.100.18/28 + tc qdisc add dev $rp4 clsact + ip link set dev ${rp4}_bond up +} + +router_destroy() +{ + ip link set dev ${rp4}_bond down + tc qdisc del dev $rp4 clsact + __addr_add_del ${rp4}_bond del 198.51.100.18/28 + ip link set dev $rp4 nomaster + ip link del dev ${rp4}_bond + + ip link set dev ${rp3}_bond down + tc qdisc del dev $rp3 clsact + __addr_add_del ${rp3}_bond del 192.0.2.18/28 + ip link set dev $rp3 nomaster + ip link del dev ${rp3}_bond + + tc qdisc del dev $rp2 clsact + __addr_add_del $rp2 del 198.51.100.2/28 + ip link set dev $rp2 down + + tc qdisc del dev $rp1 clsact + __addr_add_del $rp1 del 192.0.2.2/28 + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + h3=${NETIFS[p5]} + rp3=${NETIFS[p6]} + h4=${NETIFS[p7]} + rp4=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + h3_create + h4_create + router_create +} + +cleanup() +{ + pre_cleanup + + rm -f $CAPTURE_FILE + + router_destroy + h4_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +psample_capture_start() +{ + rm -f $CAPTURE_FILE + + psample &> $CAPTURE_FILE & + + sleep 1 +} + +psample_capture_stop() +{ + kill_process %% +} + +__tc_sample_rate_test() +{ + local desc=$1; shift + local dip=$1; shift + local pkts pct + + RET=0 + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 32 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \ + -B $dip -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l) + pct=$((100 * (pkts - 10000) / 10000)) + (( -25 <= pct && pct <= 25)) + check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" + + log_test "tc sample rate ($desc)" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_rate_test() +{ + __tc_sample_rate_test "forward" 198.51.100.1 + __tc_sample_rate_test "local receive" 192.0.2.2 +} + +tc_sample_max_rate_test() +{ + RET=0 + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate $((35 * 10 ** 8)) group 1 + check_err $? "Failed to configure sampling rule with max rate" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate $((35 * 10 ** 8 + 1)) \ + group 1 &> /dev/null + check_fail $? "Managed to configure sampling rate above maximum" + + log_test "tc sample maximum rate" +} + +tc_sample_conflict_test() +{ + RET=0 + + # Test that two sampling rules cannot be configured on the same port, + # even when they share the same parameters. + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule" + + tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \ + skip_sw action sample rate 1024 group 1 &> /dev/null + check_fail $? "Managed to configure second sampling rule" + + # Delete the first rule and make sure the second rule can now be + # configured. + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall + + tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule after deletion" + + log_test "tc sample conflict test" + + tc filter del dev $rp1 ingress protocol all pref 2 handle 102 matchall +} + +tc_sample_group_conflict_test() +{ + RET=0 + + # Test that two sampling rules cannot be configured on the same port + # with different groups. + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule" + + tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \ + skip_sw action sample rate 1024 group 2 &> /dev/null + check_fail $? "Managed to configure sampling rule with conflicting group" + + log_test "tc sample group conflict test" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_iif_test() +{ + local rp1_ifindex + + RET=0 + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]') + grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected in-ifindex" + + log_test "tc sample iif" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_lag_iif_test() +{ + local rp3_ifindex + + RET=0 + + tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \ + -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]') + grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected in-ifindex" + + log_test "tc sample lag iif" + + tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_oif_test() +{ + local rp2_ifindex + + RET=0 + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]') + grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected out-ifindex" + + log_test "tc sample oif" + + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_lag_oif_test() +{ + local rp4_ifindex + + RET=0 + + tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \ + -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]') + grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected out-ifindex" + + log_test "tc sample lag oif" + + tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_out_tc_test() +{ + RET=0 + + # Output traffic class is not supported on Spectrum-1. + mlxsw_only_on_spectrum 2+ || return + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + # By default, all the packets should go to the same traffic class (0). + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + grep -q -e "out-tc 0 " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected out-tc (0)" + + # Map all priorities to highest traffic class (7) and check reported + # out-tc. + tc qdisc replace dev $rp2 root handle 1: \ + prio bands 3 priomap 0 0 0 0 0 0 0 0 + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + grep -q -e "out-tc 7 " $CAPTURE_FILE + check_err $? "Sampled packets do not have expected out-tc (7)" + + log_test "tc sample out-tc" + + tc qdisc del dev $rp2 root handle 1: + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_out_tc_occ_test() +{ + local backlog pct occ + + RET=0 + + # Output traffic class occupancy is not supported on Spectrum-1. + mlxsw_only_on_spectrum 2+ || return + + tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule" + + # Configure a shaper on egress to create congestion. + tc qdisc replace dev $rp2 root handle 1: \ + tbf rate 1Mbit burst 256k limit 1M + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q & + + # Allow congestion to reach steady state. + sleep 10 + + backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]') + + # Kill mausezahn. + kill_process %% + + psample_capture_stop + + # Record last congestion sample. + occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \ + cut -d ' ' -f 16) + + pct=$((100 * (occ - backlog) / backlog)) + (( -1 <= pct && pct <= 1)) + check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%" + + log_test "tc sample out-tc-occ" + + tc qdisc del dev $rp2 root handle 1: + tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall +} + +tc_sample_md_latency_test() +{ + RET=0 + + # Egress sampling not supported on Spectrum-1. + mlxsw_only_on_spectrum 2+ || return + + tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \ + skip_sw action sample rate 5 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + grep -q -e "latency " $CAPTURE_FILE + check_err $? "Sampled packets do not have latency attribute" + + log_test "tc sample latency" + + tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall +} + +tc_sample_acl_group_conflict_test() +{ + RET=0 + + # Test that two flower sampling rules cannot be configured on the same + # port with different groups. + + # Policy-based sampling is not supported on Spectrum-1. + mlxsw_only_on_spectrum 2+ || return + + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule" + + tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \ + skip_sw action sample rate 1024 group 1 + check_err $? "Failed to configure sampling rule with same group" + + tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \ + skip_sw action sample rate 1024 group 2 &> /dev/null + check_fail $? "Managed to configure sampling rule with conflicting group" + + log_test "tc sample (w/ flower) group conflict test" + + tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower +} + +__tc_sample_acl_rate_test() +{ + local bind=$1; shift + local port=$1; shift + local pkts pct + + RET=0 + + # Policy-based sampling is not supported on Spectrum-1. + mlxsw_only_on_spectrum 2+ || return + + tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l) + pct=$((100 * (pkts - 10000) / 10000)) + (( -25 <= pct && pct <= 25)) + check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" + + # Setup a filter that should not match any packet and make sure packets + # are not sampled. + tc filter del dev $port $bind protocol ip pref 1 handle 101 flower + + tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \ + skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1 + check_err $? "Failed to configure sampling rule" + + psample_capture_start + + ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + psample_capture_stop + + grep -q -e "group 1 " $CAPTURE_FILE + check_fail $? "Sampled packets when should not" + + log_test "tc sample (w/ flower) rate ($bind)" + + tc filter del dev $port $bind protocol ip pref 1 handle 101 flower +} + +tc_sample_acl_rate_test() +{ + __tc_sample_acl_rate_test ingress $rp1 + __tc_sample_acl_rate_test egress $rp2 +} + +tc_sample_acl_max_rate_test() +{ + RET=0 + + # Policy-based sampling is not supported on Spectrum-1. + mlxsw_only_on_spectrum 2+ || return + + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw action sample rate $((2 ** 24 - 1)) group 1 + check_err $? "Failed to configure sampling rule with max rate" + + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower + + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + skip_sw action sample rate $((2 ** 24)) \ + group 1 &> /dev/null + check_fail $? "Managed to configure sampling rate above maximum" + + log_test "tc sample (w/ flower) maximum rate" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh new file mode 100755 index 000000000000..4687b0a7dffb --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -0,0 +1,1185 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test various aspects of VxLAN offloading which are specific to mlxsw, such +# as sanitization of invalid configurations and offload indication. + +: ${ADDR_FAMILY:=ipv4} +export ADDR_FAMILY + +: ${LOCAL_IP_1:=198.51.100.1} +export LOCAL_IP_1 + +: ${LOCAL_IP_2:=198.51.100.2} +export LOCAL_IP_2 + +: ${PREFIX_LEN:=32} +export PREFIX_LEN + +: ${UDPCSUM_FLAFS:=noudpcsum} +export UDPCSUM_FLAFS + +: ${MC_IP:=239.0.0.1} +export MC_IP + +: ${IP_FLAG:=""} +export IP_FLAG + +: ${ALL_TESTS:=" + sanitization_test + offload_indication_test + sanitization_vlan_aware_test + offload_indication_vlan_aware_test +"} + +lib_dir=$(dirname $0)/../../../net/forwarding +NUM_NETIFS=2 +: ${TIMEOUT:=20000} # ms +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +sanitization_single_dev_test_pass() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + + ip link set dev $swp1 nomaster + + ip link set dev $swp1 master br0 + check_err $? +} + +sanitization_single_dev_test_fail() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 &> /dev/null + check_fail $? + + ip link set dev $swp1 nomaster + + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp1 master br0 &> /dev/null + check_fail $? +} + +sanitization_single_dev_valid_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device - valid configuration" +} + +sanitization_single_dev_vlan_aware_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a vlan-aware bridge" +} + +sanitization_single_dev_mcast_enabled_test() +{ + RET=0 + + ip link add dev br0 type bridge + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a multicast enabled bridge" +} + +sanitization_single_dev_mcast_group_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add name dummy1 up type dummy + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \ + dev dummy1 group $MC_IP + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev dummy1 + ip link del dev br0 + + log_test "vxlan device with a multicast group" +} + +sanitization_single_dev_no_local_ip_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with no local ip" +} + +sanitization_single_dev_learning_enabled_ipv4_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with learning enabled" +} + +sanitization_single_dev_local_interface_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add name dummy1 up type dummy + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 dev dummy1 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev dummy1 + ip link del dev br0 + + log_test "vxlan device with local interface" +} + +sanitization_single_dev_port_range_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \ + srcport 4000 5000 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with udp source port range" +} + +sanitization_single_dev_tos_static_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos 20 local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with static tos" +} + +sanitization_single_dev_ttl_inherit_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl inherit tos inherit local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with inherit ttl" +} + +sanitization_single_dev_udp_checksum_ipv4_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with udp checksum" +} + +sanitization_single_dev_test() +{ + # These tests make sure that we correctly sanitize VxLAN device + # configurations we do not support + sanitization_single_dev_valid_test + sanitization_single_dev_vlan_aware_test + sanitization_single_dev_mcast_enabled_test + sanitization_single_dev_mcast_group_test + sanitization_single_dev_no_local_ip_test + sanitization_single_dev_learning_enabled_"$ADDR_FAMILY"_test + sanitization_single_dev_local_interface_test + sanitization_single_dev_port_range_test + sanitization_single_dev_tos_static_test + sanitization_single_dev_ttl_inherit_test + sanitization_single_dev_udp_checksum_"$ADDR_FAMILY"_test +} + +sanitization_multi_devs_test_pass() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? + ip link set dev vxlan1 master br1 + check_err $? + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link set dev $swp1 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? +} + +sanitization_multi_devs_test_fail() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? + ip link set dev vxlan1 master br1 &> /dev/null + check_fail $? + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link set dev vxlan1 master br1 + check_err $? + ip link set dev $swp1 master br0 + check_err $? + ip link set dev $swp2 master br1 &> /dev/null + check_fail $? +} + +sanitization_multi_devs_valid_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + sanitization_multi_devs_test_pass + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices - valid configuration" +} + +sanitization_multi_devs_ttl_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ + ttl 40 tos inherit local $LOCAL_IP_1 dstport 4789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different ttl" +} + +sanitization_multi_devs_udp_dstport_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 5789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different udp destination port" +} + +sanitization_multi_devs_local_ip_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_2 dstport 4789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different local ip" +} + +sanitization_multi_devs_test() +{ + # The device has a single VTEP, which means all the VxLAN devices + # we offload must share certain properties such as source IP and + # UDP destination port. These tests make sure that we forbid + # configurations that violate this limitation + sanitization_multi_devs_valid_test + sanitization_multi_devs_ttl_test + sanitization_multi_devs_udp_dstport_test + sanitization_multi_devs_local_ip_test +} + +sanitization_test() +{ + sanitization_single_dev_test + sanitization_multi_devs_test +} + +offload_indication_setup_create() +{ + # Create a simple setup with two bridges, each with a VxLAN device + # and one local port + ip link add name br0 type bridge mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + ip link add name br1 type bridge mcast_snooping 0 + ip link set dev br1 addrgenmode none + ip link set dev br1 up + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + + ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo + + ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 +} + +offload_indication_setup_destroy() +{ + ip link del dev vxlan1 + ip link del dev vxlan0 + + ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link del dev br1 + ip link del dev br0 +} + +offload_indication_fdb_flood_test() +{ + RET=0 + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst $LOCAL_IP_2 + + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \ + bridge fdb show brport vxlan0 + check_err $? + + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self + + log_test "vxlan flood entry offload indication" +} + +offload_indication_fdb_bridge_test() +{ + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \ + dst $LOCAL_IP_2 + + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 + check_err $? + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0 + check_err $? + + log_test "vxlan entry offload indication - initial state" + + # Remove FDB entry from the bridge driver and check that corresponding + # entry in the VxLAN driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 master + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 + check_err $? + + log_test "vxlan entry offload indication - after removal from bridge" + + # Add the FDB entry back to the bridge driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 master static + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 + check_err $? + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0 + check_err $? + + log_test "vxlan entry offload indication - after re-add to bridge" + + # Remove FDB entry from the VxLAN driver and check that corresponding + # entry in the bridge driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0 + check_err $? + + log_test "vxlan entry offload indication - after removal from vxlan" + + # Add the FDB entry back to the VxLAN driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst $LOCAL_IP_2 + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 + check_err $? + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0 + check_err $? + + log_test "vxlan entry offload indication - after re-add to vxlan" + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self master +} + +offload_indication_fdb_test() +{ + offload_indication_fdb_flood_test + offload_indication_fdb_bridge_test +} + +offload_indication_decap_route_test() +{ + RET=0 + + busywait "$TIMEOUT" wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + ip link set dev vxlan0 down + busywait "$TIMEOUT" wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + ip link set dev vxlan1 down + busywait "$TIMEOUT" not wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + log_test "vxlan decap route - vxlan device down" + + RET=0 + + ip link set dev vxlan1 up + busywait "$TIMEOUT" wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + ip link set dev vxlan0 up + busywait "$TIMEOUT" wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + log_test "vxlan decap route - vxlan device up" + + RET=0 + + ip address delete $LOCAL_IP_1/$PREFIX_LEN dev lo + busywait "$TIMEOUT" not wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo + busywait "$TIMEOUT" wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + log_test "vxlan decap route - add local route" + + RET=0 + + ip link set dev $swp1 nomaster + busywait "$TIMEOUT" wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + ip link set dev $swp2 nomaster + busywait "$TIMEOUT" not wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + busywait "$TIMEOUT" wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + log_test "vxlan decap route - local ports enslavement" + + RET=0 + + ip link del dev br0 + busywait "$TIMEOUT" wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + ip link del dev br1 + busywait "$TIMEOUT" not wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + log_test "vxlan decap route - bridge device deletion" + + RET=0 + + ip link add name br0 type bridge mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + ip link add name br1 type bridge mcast_snooping 0 + ip link set dev br1 addrgenmode none + ip link set dev br1 up + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + ip link set dev vxlan0 master br0 + ip link set dev vxlan1 master br1 + busywait "$TIMEOUT" wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + ip link del dev vxlan0 + busywait "$TIMEOUT" wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + ip link del dev vxlan1 + busywait "$TIMEOUT" not wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + log_test "vxlan decap route - vxlan device deletion" + + ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 +} + +check_fdb_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac self \ + bridge fdb show dev vxlan0 + check_err $? + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac master \ + bridge fdb show dev vxlan0 + check_err $? + + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show dev vxlan0 + check_err $? +} + +check_vxlan_fdb_not_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep -q self + check_err $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac self \ + bridge fdb show dev vxlan0 + check_err $? + + bridge fdb show dev vxlan0 | grep $zmac | grep -q self + check_err $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show dev vxlan0 + check_err $? +} + +check_bridge_fdb_not_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep -q master + check_err $? + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac master \ + bridge fdb show dev vxlan0 + check_err $? +} + +__offload_indication_join_vxlan_first() +{ + local vid=$1; shift + + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2 + + ip link set dev vxlan0 master br0 + bridge fdb add dev vxlan0 $mac self master static dst $LOCAL_IP_2 + + RET=0 + check_vxlan_fdb_not_offloaded + ip link set dev $swp1 master br0 + sleep .1 + check_fdb_offloaded + log_test "offload indication - attach vxlan first" + + RET=0 + ip link set dev vxlan0 down + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - set vxlan down" + + RET=0 + ip link set dev vxlan0 up + sleep .1 + check_fdb_offloaded + log_test "offload indication - set vxlan up" + + if [[ ! -z $vid ]]; then + RET=0 + bridge vlan del dev vxlan0 vid $vid + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - delete VLAN" + + RET=0 + bridge vlan add dev vxlan0 vid $vid + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - add tagged VLAN" + + RET=0 + bridge vlan add dev vxlan0 vid $vid pvid untagged + sleep .1 + check_fdb_offloaded + log_test "offload indication - add pvid/untagged VLAN" + fi + + RET=0 + ip link set dev $swp1 nomaster + check_vxlan_fdb_not_offloaded + log_test "offload indication - detach port" +} + +offload_indication_join_vxlan_first() +{ + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + __offload_indication_join_vxlan_first + + ip link del dev vxlan0 + ip link del dev br0 +} + +__offload_indication_join_vxlan_last() +{ + local zmac=00:00:00:00:00:00 + + RET=0 + + bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2 + + ip link set dev $swp1 master br0 + + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show dev vxlan0 + check_err $? + + ip link set dev vxlan0 master br0 + + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show dev vxlan0 + check_err $? + + log_test "offload indication - attach vxlan last" +} + +offload_indication_join_vxlan_last() +{ + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + __offload_indication_join_vxlan_last + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_test() +{ + offload_indication_setup_create + offload_indication_fdb_test + offload_indication_decap_route_test + offload_indication_setup_destroy + + log_info "offload indication - replay & cleanup" + offload_indication_join_vxlan_first + offload_indication_join_vxlan_last +} + +sanitization_vlan_aware_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 + ip link set dev br0 addrgenmode none + + ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + # Test that when each VNI is mapped to a different VLAN we can enslave + # a port to the bridge + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged + + ip link set dev $swp1 master br0 + check_err $? + + log_test "vlan-aware - enslavement to vlan-aware bridge" + + # Try to map both VNIs to the same VLAN and make sure configuration + # fails + RET=0 + + bridge vlan add vid 10 dev vxlan20 pvid untagged &> /dev/null + check_fail $? + + log_test "vlan-aware - two vnis mapped to the same vlan" + + # Test that enslavement of a port to a bridge fails when two VNIs + # are mapped to the same VLAN + RET=0 + + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vxlan20 pvid untagged + bridge vlan add vid 10 dev vxlan20 pvid untagged + + ip link set dev $swp1 master br0 &> /dev/null + check_fail $? + + log_test "vlan-aware - failed enslavement to vlan-aware bridge" + + bridge vlan del vid 10 dev vxlan20 + bridge vlan add vid 20 dev vxlan20 pvid untagged + + # Test that when two VXLAN tunnels with conflicting configurations + # (i.e., different TTL) are enslaved to the same VLAN-aware bridge, + # then the enslavement of a port to the bridge is denied. + + # Use the offload indication of the local route to ensure the VXLAN + # configuration was correctly rollbacked. + ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo + + ip link set dev vxlan10 type vxlan ttl 10 + ip link set dev $swp1 master br0 &> /dev/null + check_fail $? + + busywait "$TIMEOUT" not wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + log_test "vlan-aware - failed enslavement to bridge due to conflict" + + ip link set dev vxlan10 type vxlan ttl 20 + ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo + + ip link del dev vxlan20 + ip link del dev vxlan10 + ip link del dev br0 +} + +offload_indication_vlan_aware_setup_create() +{ + # Create a simple setup with two VxLAN devices and a single VLAN-aware + # bridge + ip link add name br0 type bridge mcast_snooping 0 vlan_filtering 1 \ + vlan_default_pvid 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + + ip link set dev $swp1 master br0 + + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo + + ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged +} + +offload_indication_vlan_aware_setup_destroy() +{ + bridge vlan del vid 20 dev vxlan20 + bridge vlan del vid 10 dev vxlan10 + + ip link del dev vxlan20 + ip link del dev vxlan10 + + ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + + ip link set dev $swp1 nomaster + + ip link del dev br0 +} + +offload_indication_vlan_aware_fdb_test() +{ + RET=0 + + log_info "vxlan entry offload indication - vlan-aware" + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \ + dst $LOCAL_IP_2 vlan 10 + + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 + check_err $? + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10 + check_err $? + + log_test "vxlan entry offload indication - initial state" + + # Remove FDB entry from the bridge driver and check that corresponding + # entry in the VxLAN driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 master vlan 10 + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 + check_err $? + + log_test "vxlan entry offload indication - after removal from bridge" + + # Add the FDB entry back to the bridge driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 master static vlan 10 + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 + check_err $? + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10 + check_err $? + + log_test "vxlan entry offload indication - after re-add to bridge" + + # Remove FDB entry from the VxLAN driver and check that corresponding + # entry in the bridge driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10 + check_err $? + + log_test "vxlan entry offload indication - after removal from vxlan" + + # Add the FDB entry back to the VxLAN driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst $LOCAL_IP_2 + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 + check_err $? + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ + de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10 + check_err $? + + log_test "vxlan entry offload indication - after re-add to vxlan" + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self master vlan 10 +} + +offload_indication_vlan_aware_decap_route_test() +{ + RET=0 + + busywait "$TIMEOUT" wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + # Toggle PVID flag on one VxLAN device and make sure route is still + # marked as offloaded + bridge vlan add vid 10 dev vxlan10 untagged + + busywait "$TIMEOUT" wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + # Toggle PVID flag on second VxLAN device and make sure route is no + # longer marked as offloaded + bridge vlan add vid 20 dev vxlan20 untagged + + busywait "$TIMEOUT" not wait_for_offload \ + ip $IP_FLAG route show table local $LOCAL_IP_1 + check_err $? + + # Toggle PVID flag back and make sure route is marked as offloaded + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged + + busywait "$TIMEOUT" wait_for_offload ip $IP_FLAG route show table local \ + $LOCAL_IP_1 + check_err $? + + log_test "vxlan decap route - vni map/unmap" +} + +offload_indication_vlan_aware_join_vxlan_first() +{ + ip link add dev br0 type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 1 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + __offload_indication_join_vxlan_first 1 + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_vlan_aware_join_vxlan_last() +{ + ip link add dev br0 type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 1 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + __offload_indication_join_vxlan_last + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_vlan_aware_l3vni_test() +{ + local zmac=00:00:00:00:00:00 + + RET=0 + + sysctl_set net.ipv6.conf.default.disable_ipv6 1 + ip link add dev br0 type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + ip link set dev $swp1 master br0 + + # The test will use the offload indication on the FDB entry to + # understand if the tunnel is offloaded or not + bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2 + + ip link set dev vxlan0 master br0 + bridge vlan add dev vxlan0 vid 10 pvid untagged + + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 + check_err $? "vxlan tunnel not offloaded when should" + + # Configure a VLAN interface and make sure tunnel is offloaded + ip link add link br0 name br10 up type vlan id 10 + sysctl_set net.ipv6.conf.br10.disable_ipv6 0 + ip -6 address add 2001:db8:1::1/64 dev br10 + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 + check_err $? "vxlan tunnel not offloaded when should" + + # Unlink the VXLAN device, make sure tunnel is no longer offloaded, + # then add it back to the bridge and make sure it is offloaded + ip link set dev vxlan0 nomaster + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 + check_err $? "vxlan tunnel offloaded after unlinked from bridge" + + ip link set dev vxlan0 master br0 + busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 + check_err $? "vxlan tunnel offloaded despite no matching vid" + + bridge vlan add dev vxlan0 vid 10 pvid untagged + busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \ + bridge fdb show brport vxlan0 + check_err $? "vxlan tunnel not offloaded after adding vid" + + log_test "vxlan - l3 vni" + + ip link del dev vxlan0 + ip link del dev br0 + sysctl_restore net.ipv6.conf.default.disable_ipv6 +} + +offload_indication_vlan_aware_test() +{ + offload_indication_vlan_aware_setup_create + offload_indication_vlan_aware_fdb_test + offload_indication_vlan_aware_decap_route_test + offload_indication_vlan_aware_setup_destroy + + log_info "offload indication - replay & cleanup - vlan aware" + offload_indication_vlan_aware_join_vxlan_first + offload_indication_vlan_aware_join_vxlan_last + offload_indication_vlan_aware_l3vni_test +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh new file mode 100755 index 000000000000..38148f51877a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh @@ -0,0 +1,141 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test vetoing of FDB entries that mlxsw can not offload. This exercises several +# different veto vectors to test various rollback scenarios in the vxlan driver. + +: ${LOCAL_IP:=198.51.100.1} +export LOCAL_IP + +: ${REMOTE_IP_1:=198.51.100.2} +export REMOTE_IP_1 + +: ${REMOTE_IP_2:=198.51.100.3} +export REMOTE_IP_2 + +: ${UDPCSUM_FLAFS:=noudpcsum} +export UDPCSUM_FLAFS + +: ${MC_IP:=224.0.0.1} +export MC_IP + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + fdb_create_veto_test + fdb_replace_veto_test + fdb_append_veto_test + fdb_changelink_veto_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link set dev $swp1 up + ip link set dev $swp1 master br0 + ip link set dev $swp2 up + + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP dstport 4789 + ip link set dev vxlan0 master br0 +} + +cleanup() +{ + pre_cleanup + + ip link set dev vxlan0 nomaster + ip link del dev vxlan0 + + ip link set dev $swp2 down + ip link set dev $swp1 nomaster + ip link set dev $swp1 down + + ip link del dev br0 +} + +fdb_create_veto_test() +{ + RET=0 + + bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \ + dst $REMOTE_IP_1 2>/dev/null + check_fail $? "multicast MAC not rejected" + + bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \ + dst $REMOTE_IP_1 2>&1 >/dev/null | grep -q mlxsw_spectrum + check_err $? "multicast MAC rejected without extack" + + log_test "vxlan FDB veto - create" +} + +fdb_replace_veto_test() +{ + RET=0 + + bridge fdb add 00:01:02:03:04:05 dev vxlan0 self static \ + dst $REMOTE_IP_1 + check_err $? "valid FDB rejected" + + bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \ + dst $REMOTE_IP_1 port 1234 2>/dev/null + check_fail $? "FDB with an explicit port not rejected" + + bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \ + dst $REMOTE_IP_1 port 1234 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "FDB with an explicit port rejected without extack" + + log_test "vxlan FDB veto - replace" +} + +fdb_append_veto_test() +{ + RET=0 + + bridge fdb add 00:00:00:00:00:00 dev vxlan0 self static \ + dst $REMOTE_IP_1 + check_err $? "valid FDB rejected" + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \ + dst $REMOTE_IP_2 port 1234 2>/dev/null + check_fail $? "FDB with an explicit port not rejected" + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \ + dst $REMOTE_IP_2 port 1234 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "FDB with an explicit port rejected without extack" + + log_test "vxlan FDB veto - append" +} + +fdb_changelink_veto_test() +{ + RET=0 + + ip link set dev vxlan0 type vxlan \ + group $MC_IP dev lo 2>/dev/null + check_fail $? "FDB with a multicast IP not rejected" + + ip link set dev vxlan0 type vxlan \ + group $MC_IP dev lo 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "FDB with a multicast IP rejected without extack" + + log_test "vxlan FDB veto - changelink" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh new file mode 100755 index 000000000000..66c87aab86f6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN test for IPv6. + +LOCAL_IP=2001:db8:1::1 +REMOTE_IP_1=2001:db8:2::1 +REMOTE_IP_2=2001:db8:3::1 +UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx" +MC_IP=FF02::2 + +source vxlan_fdb_veto.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh new file mode 100755 index 000000000000..af5ea50ed5c0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh @@ -0,0 +1,326 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test VxLAN flooding. The device stores flood records in a singly linked list +# where each record stores up to three IPv4 addresses of remote VTEPs. The test +# verifies that packets are correctly flooded in various cases such as deletion +# of a record in the middle of the list. +# +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 203.0.113.1/24| +# +----|---------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | + vxlan0 (vxlan) | | +# | | local 198.51.100.1 | | +# | | remote 198.51.100.{2..13} | | +# | | id 10 dstport 4789 | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 198.51.100.0/24 via 192.0.2.2 | +# | | +# | + $rp1 | +# | | 192.0.2.1/24 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | R2 (vrf) | +# | + $rp2 | +# | 192.0.2.2/24 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="flooding_test" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 203.0.113.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 203.0.113.1/24 +} + +switch_create() +{ + # Make sure the bridge uses the MAC address of the local port and + # not that of the VxLAN's device + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 address $(mac_get $swp1) + + ip link add name vxlan0 type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + ip address add 198.51.100.1/32 dev lo + + ip link set dev $swp1 master br0 + ip link set dev vxlan0 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev vxlan0 up +} + +switch_destroy() +{ + ip link set dev vxlan0 down + ip link set dev $swp1 down + ip link set dev br0 down + + ip link set dev vxlan0 nomaster + ip link set dev $swp1 nomaster + + ip address del 198.51.100.1/32 dev lo + + ip link del dev vxlan0 + + ip link del dev br0 +} + +router1_create() +{ + # This router is in the default VRF, where the VxLAN device is + # performing the L3 lookup + ip link set dev $rp1 up + ip address add 192.0.2.1/24 dev $rp1 + ip route add 198.51.100.0/24 via 192.0.2.2 +} + +router1_destroy() +{ + ip route del 198.51.100.0/24 via 192.0.2.2 + ip address del 192.0.2.1/24 dev $rp1 + ip link set dev $rp1 down +} + +router2_create() +{ + # This router is not in the default VRF, so use simple_if_init() + simple_if_init $rp2 192.0.2.2/24 +} + +router2_destroy() +{ + simple_if_fini $rp2 192.0.2.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + + h1_create + + switch_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +flooding_remotes_add() +{ + local num_remotes=$1 + local lsb + local i + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \ + dst 198.51.100.$lsb + done +} + +flooding_filters_add() +{ + local num_remotes=$1 + local lsb + local i + + # Prevent unwanted packets from entering the bridge and interfering + # with the test. + tc qdisc add dev br0 clsact + tc filter add dev br0 egress protocol all pref 1 handle 1 \ + matchall skip_hw action drop + tc qdisc add dev $h1 clsact + tc filter add dev $h1 egress protocol all pref 1 handle 1 \ + flower skip_hw dst_mac de:ad:be:ef:13:37 action pass + tc filter add dev $h1 egress protocol all pref 2 handle 2 \ + matchall skip_hw action drop + + tc qdisc add dev $rp2 clsact + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + tc filter add dev $rp2 ingress protocol ip pref $i handle $i \ + flower ip_proto udp dst_ip 198.51.100.$lsb \ + dst_port 4789 skip_sw action drop + done +} + +flooding_filters_del() +{ + local num_remotes=$1 + local i + + for i in $(eval echo {1..$num_remotes}); do + tc filter del dev $rp2 ingress protocol ip pref $i \ + handle $i flower + done + + tc qdisc del dev $rp2 clsact + + tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall + tc filter del dev $h1 egress protocol all pref 1 handle 1 flower + tc qdisc del dev $h1 clsact + tc filter del dev br0 egress protocol all pref 1 handle 1 matchall + tc qdisc del dev br0 clsact +} + +flooding_check_packets() +{ + local packets=("$@") + local num_remotes=${#packets[@]} + local i + + for i in $(eval echo {1..$num_remotes}); do + tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]} + check_err $? "remote $i - did not get expected number of packets" + done +} + +flooding_test() +{ + # Use 12 remote VTEPs that will be stored in 4 records. The array + # 'packets' will store how many packets are expected to be received + # by each remote VTEP at each stage of the test + declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1) + local num_remotes=12 + + RET=0 + + # Add FDB entries for remote VTEPs and corresponding tc filters on the + # ingress of the nexthop router. These filters will count how many + # packets were flooded to each remote VTEP + flooding_remotes_add $num_remotes + flooding_filters_add $num_remotes + + # Send one packet and make sure it is flooded to all the remote VTEPs + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 1 packet" + + # Delete the third record which corresponds to VTEPs with LSB 8..10 + # and check that packet is flooded correctly when we remove a record + # from the middle of the list + RET=0 + + packets=(2 2 2 2 2 2 1 1 1 2 2 2) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.8 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.9 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.10 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 2 packets" + + # Delete the first record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 3 3 3 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.3 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.4 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 3 packets" + + # Delete the last record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 4 4 4 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.11 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.12 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.13 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 4 packets" + + # Delete the last record, one entry at a time and make sure single + # entries are correctly removed + RET=0 + + packets=(2 2 2 4 5 5 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.5 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 5 packets" + + RET=0 + + packets=(2 2 2 4 5 6 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.6 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 6 packets" + + RET=0 + + packets=(2 2 2 4 5 6 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.7 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 7 packets" + + flooding_filters_del $num_remotes +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh new file mode 100755 index 000000000000..f2ea0163ddea --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN test for IPv6. + +ADDR_FAMILY=ipv6 +LOCAL_IP_1=2001:db8:1::1 +LOCAL_IP_2=2001:db8:1::2 +PREFIX_LEN=128 +UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx" +MC_IP=FF02::2 +IP_FLAG="-6" + +ALL_TESTS=" + sanitization_test + offload_indication_test + sanitization_vlan_aware_test + offload_indication_vlan_aware_test +" + +sanitization_single_dev_learning_enabled_ipv6_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with learning enabled" +} + +sanitization_single_dev_udp_checksum_ipv6_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning \ + noudp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \ + local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + log_test "vxlan device without zero udp checksum at RX" + + ip link del dev vxlan0 + + ip link add name vxlan0 up type vxlan id 10 nolearning \ + udp6zerocsumrx noudp6zerocsumtx ttl 20 tos inherit \ + local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + log_test "vxlan device without zero udp checksum at TX" + + ip link del dev vxlan0 + ip link del dev br0 + +} + +source vxlan.sh diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py new file mode 100755 index 000000000000..d05eddcad539 --- /dev/null +++ b/tools/testing/selftests/drivers/net/napi_id.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, NetDrvEpEnv +from lib.py import bkg, cmd, rand_port, NetNSEnter + +def test_napi_id(cfg) -> None: + port = rand_port() + listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr} {port}" + + with bkg(listen_cmd, ksft_wait=3) as server: + cmd(f"echo a | socat - TCP:{cfg.baddr}:{port}", host=cfg.remote, shell=True) + + ksft_eq(0, server.ret) + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + ksft_run([test_napi_id], args=(cfg,)) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c new file mode 100644 index 000000000000..7f49ca6c8637 --- /dev/null +++ b/tools/testing/selftests/drivers/net/napi_id_helper.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <netdb.h> + +#include "../../net/lib/ksft.h" + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage address; + struct addrinfo *result; + struct addrinfo hints; + unsigned int napi_id; + socklen_t addr_len; + socklen_t optlen; + char buf[1024]; + int opt = 1; + int family; + int server; + int client; + int ret; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_PASSIVE; + + ret = getaddrinfo(argv[1], argv[2], &hints, &result); + if (ret != 0) { + fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(ret)); + return 1; + } + + family = result->ai_family; + addr_len = result->ai_addrlen; + + server = socket(family, SOCK_STREAM, IPPROTO_TCP); + if (server < 0) { + perror("socket creation failed"); + freeaddrinfo(result); + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) { + perror("setsockopt"); + freeaddrinfo(result); + return 1; + } + + memcpy(&address, result->ai_addr, result->ai_addrlen); + freeaddrinfo(result); + + if (bind(server, (struct sockaddr *)&address, addr_len) < 0) { + perror("bind failed"); + return 1; + } + + if (listen(server, 1) < 0) { + perror("listen"); + return 1; + } + + ksft_ready(); + + client = accept(server, NULL, 0); + if (client < 0) { + perror("accept"); + return 1; + } + + optlen = sizeof(napi_id); + ret = getsockopt(client, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, + &optlen); + if (ret != 0) { + perror("getsockopt"); + return 1; + } + + read(client, buf, 1024); + + ksft_wait(); + + if (napi_id == 0) { + fprintf(stderr, "napi ID is 0\n"); + return 1; + } + + close(client); + close(server); + + return 0; +} diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py new file mode 100755 index 000000000000..f4be72b2145a --- /dev/null +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test napi threaded states. +""" + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, ksft_ne, ksft_ge +from lib.py import NetDrvEnv, NetdevFamily +from lib.py import cmd, defer, ethtool + + +def _assert_napi_threaded_enabled(nl, napi_id) -> None: + napi = nl.napi_get({'id': napi_id}) + ksft_eq(napi['threaded'], 'enabled') + ksft_ne(napi.get('pid'), None) + + +def _assert_napi_threaded_disabled(nl, napi_id) -> None: + napi = nl.napi_get({'id': napi_id}) + ksft_eq(napi['threaded'], 'disabled') + ksft_eq(napi.get('pid'), None) + + +def _set_threaded_state(cfg, threaded) -> None: + with open(f"/sys/class/net/{cfg.ifname}/threaded", "wb") as fp: + fp.write(str(threaded).encode('utf-8')) + + +def _setup_deferred_cleanup(cfg) -> None: + combined = ethtool(f"-l {cfg.ifname}", json=True)[0].get("combined", 0) + ksft_ge(combined, 2) + defer(ethtool, f"-L {cfg.ifname} combined {combined}") + + threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout + defer(_set_threaded_state, cfg, threaded) + + return combined + + +def napi_init(cfg, nl) -> None: + """ + Test that threaded state (in the persistent NAPI config) gets updated + even when NAPI with given ID is not allocated at the time. + """ + + qcnt = _setup_deferred_cleanup(cfg) + + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 0) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'disabled') + ksft_eq(napi.get('pid'), None) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'enabled') + ksft_ne(napi.get('pid'), None) + + +def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: + """ + Test that when napi threaded is enabled at device level and + then disabled at napi level for one napi, the threaded state + of all napis is preserved after a change in number of queues. + """ + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + ksft_ge(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + qcnt = _setup_deferred_cleanup(cfg) + + # set threaded + _set_threaded_state(cfg, 1) + + # check napi threaded is set for both napis + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_enabled(nl, napi1_id) + + # disable threaded for napi1 + nl.napi_set({'id': napi1_id, 'threaded': 'disabled'}) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_disabled(nl, napi1_id) + + +def change_num_queues(cfg, nl) -> None: + """ + Test that when napi threaded is enabled at device level, + the napi threaded state is preserved after a change in + number of queues. + """ + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + ksft_ge(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + qcnt = _setup_deferred_cleanup(cfg) + + # set threaded + _set_threaded_state(cfg, 1) + + # check napi threaded is set for both napis + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_enabled(nl, napi1_id) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + # check napi threaded is set for both napis + _assert_napi_threaded_enabled(nl, napi0_id) + _assert_napi_threaded_enabled(nl, napi1_id) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, queue_count=2) as cfg: + ksft_run([napi_init, + change_num_queues, + enable_dev_threaded_disable_napi_threaded], + args=(cfg, NetdevFamily())) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh new file mode 100755 index 000000000000..2022f3061738 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This test creates two netdevsim virtual interfaces, assigns one of them (the +# "destination interface") to a new namespace, and assigns IP addresses to both +# interfaces. +# +# It listens on the destination interface using socat and configures a dynamic +# target on netconsole, pointing to the destination IP address. +# +# Finally, it checks whether the message was received properly on the +# destination interface. Note that this test may pollute the kernel log buffer +# (dmesg) and relies on dynamic configuration and namespaces being configured. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT + +# Run the test twice, with different format modes +for FORMAT in "basic" "extended" +do + for IP_VERSION in "ipv6" "ipv4" + do + echo "Running with target mode: ${FORMAT} (${IP_VERSION})" + # Set current loglevel to KERN_INFO(6), and default to + # KERN_NOTICE(5) + echo "6 5" > /proc/sys/kernel/printk + # Create one namespace and two interfaces + set_network "${IP_VERSION}" + # Create a dynamic target for netconsole + create_dynamic_target "${FORMAT}" + # Only set userdata for extended format + if [ "$FORMAT" == "extended" ] + then + # Set userdata "key" with the "value" value + set_user_data + fi + # Listed for netconsole port inside the namespace and + # destination interface + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & + # Wait for socat to start and listen to the port. + wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}" + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + # kill socat in case it is still running + pkill_socat + cleanup + echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2 + done +done + +trap - EXIT +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh new file mode 100755 index 000000000000..d1d23dc67f99 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This is a selftest to test cmdline arguments on netconsole. +# It exercises loading of netconsole from cmdline instead of the dynamic +# reconfiguration. This includes parsing the long netconsole= line and all the +# flow through init_netconsole(). +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +check_netconsole_module + +modprobe netdevsim 2> /dev/null || true +rmmod netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +# check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace and network interfaces +trap do_cleanup EXIT +# Create one namespace and two interfaces +set_network + +# Run the test twice, with different cmdline parameters +for BINDMODE in "ifname" "mac" +do + echo "Running with bind mode: ${BINDMODE}" >&2 + # Create the command line for netconsole, with the configuration from + # the function above + CMDLINE=$(create_cmdline_str "${BINDMODE}") + + # The content of kmsg will be save to the following file + OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}" + + # Load the module, with the cmdline set + modprobe netconsole "${CMDLINE}" + + # Listed for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_msg "${OUTPUT_FILE}" + + # kill socat in case it is still running + pkill_socat + # Unload the module + rmmod netconsole + echo "${BINDMODE} : Test passed" >&2 +done + +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh new file mode 100755 index 000000000000..4a71e01a230c --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# Test netconsole's message fragmentation functionality. +# +# When a message exceeds the maximum packet size, netconsole splits it into +# multiple fragments for transmission. This test verifies: +# - Correct fragmentation of large messages +# - Proper reassembly of fragments at the receiver +# - Preservation of userdata across fragments +# - Behavior with and without kernel release version appending +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# set userdata to a long value. In this case, it is "1-2-3-4...50-" +USERDATA_VALUE=$(printf -- '%.2s-' {1..60}) + +# Convert the header string in a regexp, so, we can remove +# the second header as well. +# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If +# release is appended, you might find something like:L +# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;" +function header_to_regex() { + # header is everything before ; + local HEADER="${1}" + REGEX=$(echo "${HEADER}" | cut -d'=' -f1) + echo "${REGEX}=[0-9]*\/[0-9]*;" +} + +# We have two headers in the message. Remove both to get the full message, +# and extract the full message. +function extract_msg() { + local MSGFILE="${1}" + # Extract the header, which is the very first thing that arrives in the + # first list. + HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1) + HEADER_REGEX=$(header_to_regex "${HEADER}") + + # Remove the two headers from the received message + # This will return the message without any header, similarly to what + # was sent. + sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}" +} + +# Validate the message, which has two messages glued together. +# unwrap them to make sure all the characters were transmitted. +# File will look like the following: +# 13,468,514729715,-,ncfrag=0/1135;<message> +# key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key> +function validate_fragmented_result() { + # Discard the netconsole headers, and assemble the full message + RCVMSG=$(extract_msg "${1}") + + # check for the main message + if ! echo "${RCVMSG}" | grep -q "${MSG}"; then + echo "Message body doesn't match." >&2 + echo "msg received=" "${RCVMSG}" >&2 + exit "${ksft_fail}" + fi + + # check userdata + if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then + echo "message userdata doesn't match" >&2 + echo "msg received=" "${RCVMSG}" >&2 + exit "${ksft_fail}" + fi + # test passed. hooray +} + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target +# Set userdata "key" with the "value" value +set_user_data + + +# TEST 1: Send message and userdata. They will fragment +# ======= +MSG=$(printf -- 'MSG%.3s=' {1..150}) + +# Listen for netconsole port inside the namespace and destination interface +listen_port_and_save_to "${OUTPUT_FILE}" & +# Wait for socat to start and listen to the port. +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +# Send the message +echo "${MSG}: ${TARGET}" > /dev/kmsg +# Wait until socat saves the file to disk +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +# Check if the message was not corrupted +validate_fragmented_result "${OUTPUT_FILE}" + +# TEST 2: Test with smaller message, and without release appended +# ======= +MSG=$(printf -- 'FOOBAR%.3s=' {1..100}) +# Let's disable release and test again. +disable_release_append + +listen_port_and_save_to "${OUTPUT_FILE}" & +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +echo "${MSG}: ${TARGET}" > /dev/kmsg +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +validate_fragmented_result "${OUTPUT_FILE}" +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh new file mode 100755 index 000000000000..06089643b771 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This test verifies that users can successfully create up to +# MAX_USERDATA_ITEMS userdata entries without encountering any failures. +# +# Additionally, it tests for expected failure when attempting to exceed this +# maximum limit. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh +# This is coming from netconsole code. Check for it in drivers/net/netconsole.c +MAX_USERDATA_ITEMS=256 + +# Function to create userdata entries +function create_userdata_max_entries() { + # All these keys should be created without any error + for i in $(seq $MAX_USERDATA_ITEMS) + do + # USERDATA_KEY is used by set_user_data + USERDATA_KEY="key"${i} + set_user_data + done +} + +# Function to verify the entry limit +function verify_entry_limit() { + # Allowing the test to fail without exiting, since the next command + # will fail + set +e + mkdir "${NETCONS_PATH}/userdata/key_that_will_fail" 2> /dev/null + ret="$?" + set -e + if [ "$ret" -eq 0 ]; + then + echo "Adding more than ${MAX_USERDATA_ITEMS} entries in userdata should fail, but it didn't" >&2 + ls "${NETCONS_PATH}/userdata/" >&2 + exit "${ksft_fail}" + fi +} + +# ========== # +# Start here # +# ========== # + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies + +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target +# populate the maximum number of supported keys in userdata +create_userdata_max_entries +# Verify an additional entry is not allowed +verify_entry_limit +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh new file mode 100755 index 000000000000..baf69031089e --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh @@ -0,0 +1,272 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# A test that makes sure that sysdata runtime CPU data is properly set +# when a message is sent. +# +# There are 3 different tests, every time sent using a random CPU. +# - Test #1 +# * Only enable cpu_nr sysdata feature. +# - Test #2 +# * Keep cpu_nr sysdata feature enable and enable userdata. +# - Test #3 +# * keep userdata enabled, and disable sysdata cpu_nr feature. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +# Enable the sysdata cpu_nr feature +function set_cpu_nr() { + if [[ ! -f "${NETCONS_PATH}/userdata/cpu_nr_enabled" ]] + then + echo "Populate CPU configfs path not available in ${NETCONS_PATH}/userdata/cpu_nr_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" +} + +# Enable the taskname to be appended to sysdata +function set_taskname() { + if [[ ! -f "${NETCONS_PATH}/userdata/taskname_enabled" ]] + then + echo "Not able to enable taskname sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/taskname_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/taskname_enabled" +} + +# Enable the release to be appended to sysdata +function set_release() { + if [[ ! -f "${NETCONS_PATH}/userdata/release_enabled" ]] + then + echo "Not able to enable release sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/release_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/release_enabled" +} + +# Enable the msgid to be appended to sysdata +function set_msgid() { + if [[ ! -f "${NETCONS_PATH}/userdata/msgid_enabled" ]] + then + echo "Not able to enable msgid sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/msgid_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/msgid_enabled" +} + +# Disable the sysdata cpu_nr feature +function unset_cpu_nr() { + echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" +} + +# Once called, taskname=<..> will not be appended anymore +function unset_taskname() { + echo 0 > "${NETCONS_PATH}/userdata/taskname_enabled" +} + +function unset_release() { + echo 0 > "${NETCONS_PATH}/userdata/release_enabled" +} + +function unset_msgid() { + echo 0 > "${NETCONS_PATH}/userdata/msgid_enabled" +} + +# Test if MSG contains sysdata +function validate_sysdata() { + # OUTPUT_FILE will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # userdatakey=userdatavalue + # cpu=X + # taskname=<taskname> + # msgid=<id> + + # Echo is what this test uses to create the message. See runtest() + # function + SENDER="echo" + + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then + echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + # Check if cpu=XX exists in the file and matches the one used + # in taskset(1) + if ! grep -q "cpu=${CPU}\+" "${OUTPUT_FILE}"; then + echo "FAIL: 'cpu=${CPU}' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "taskname=${SENDER}" "${OUTPUT_FILE}"; then + echo "FAIL: 'taskname=echo' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "msgid=[0-9]\+$" "${OUTPUT_FILE}"; then + echo "FAIL: 'msgid=<id>' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + rm "${OUTPUT_FILE}" + pkill_socat +} + +function validate_release() { + RELEASE=$(uname -r) + + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "release=${RELEASE}" "${OUTPUT_FILE}"; then + echo "FAIL: 'release=${RELEASE}' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi +} + +# Test if MSG content exists in OUTPUT_FILE but no `cpu=` and `taskname=` +# strings +function validate_no_sysdata() { + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then + echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "cpu=" "${OUTPUT_FILE}"; then + echo "FAIL: 'cpu= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "taskname=" "${OUTPUT_FILE}"; then + echo "FAIL: 'taskname= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "release=" "${OUTPUT_FILE}"; then + echo "FAIL: 'release= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + if grep -q "msgid=" "${OUTPUT_FILE}"; then + echo "FAIL: 'msgid= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + + rm "${OUTPUT_FILE}" +} + +# Start socat, send the message and wait for the file to show up in the file +# system +function runtest { + # Listen for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + taskset -c "${CPU}" echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +} + +# ========== # +# Start here # +# ========== # + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies +# This test also depends on taskset(1). Check for it before starting the test +check_for_taskset + +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target + +#==================================================== +# TEST #1 +# Send message from a random CPU +#==================================================== +# Random CPU in the system +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_1" +MSG="Test #1 from CPU${CPU}" +# Enable the auto population of cpu_nr +set_cpu_nr +# Enable taskname to be appended to sysdata +set_taskname +set_release +set_msgid +runtest +# Make sure the message was received in the dst part +# and exit +validate_release +validate_sysdata + +#==================================================== +# TEST #2 +# This test now adds userdata together with sysdata +# =================================================== +# Get a new random CPU +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_2" +MSG="Test #2 from CPU${CPU}" +set_user_data +runtest +validate_release +validate_sysdata + +# =================================================== +# TEST #3 +# Unset all sysdata, fail if any userdata is set +# =================================================== +CPU=$((RANDOM % $(nproc))) +OUTPUT_FILE="/tmp/${TARGET}_3" +MSG="Test #3 from CPU${CPU}" +unset_cpu_nr +unset_taskname +unset_release +unset_msgid +runtest +# At this time, cpu= shouldn't be present in the msg +validate_no_sysdata + +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netcons_torture.sh new file mode 100755 index 000000000000..2ce9ee3719d1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_torture.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# Repeatedly send kernel messages, toggles netconsole targets on and off, +# creates and deletes targets in parallel, and toggles the source interface to +# simulate stress conditions. +# +# This test aims to verify the robustness of netconsole under dynamic +# configurations and concurrent operations. +# +# The major goal is to run this test with LOCKDEP, Kmemleak and KASAN to make +# sure no issues is reported. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +# Number of times the main loop run +ITERATIONS=${1:-150} + +# Only test extended format +FORMAT="extended" +# And ipv6 only +IP_VERSION="ipv6" + +# Create, enable and delete some targets. +create_and_delete_random_target() { + COUNT=2 + RND_PREFIX=$(mktemp -u netcons_rnd_XXXX_) + + if [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}${COUNT}" ] || \ + [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}0" ]; then + echo "Function didn't finish yet, skipping it." >&2 + return + fi + + # enable COUNT targets + for i in $(seq ${COUNT}) + do + RND_TARGET="${RND_PREFIX}"${i} + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" + + # Basic population so the target can come up + _create_dynamic_target "${FORMAT}" "${RND_TARGET_PATH}" + done + + echo "netconsole selftest: ${COUNT} additional targets were created" > /dev/kmsg + # disable them all + for i in $(seq ${COUNT}) + do + RND_TARGET="${RND_PREFIX}"${i} + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" + if [[ $(cat "${RND_TARGET_PATH}/enabled") -eq 1 ]] + then + echo 0 > "${RND_TARGET_PATH}"/enabled + fi + rmdir "${RND_TARGET_PATH}" + done +} + +# Disable and enable the target mid-air, while messages +# are being transmitted. +toggle_netcons_target() { + for i in $(seq 2) + do + if [ ! -d "${NETCONS_PATH}" ] + then + break + fi + echo 0 > "${NETCONS_PATH}"/enabled 2> /dev/null || true + # Try to enable a bit harder, given it might fail to enable + # Write to `enabled` might fail depending on the lock, which is + # highly contentious here + for _ in $(seq 5) + do + echo 1 > "${NETCONS_PATH}"/enabled 2> /dev/null || true + done + done +} + +toggle_iface(){ + ip link set "${SRCIF}" down + ip link set "${SRCIF}" up +} + +# Start here + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network "${IP_VERSION}" +# Create a dynamic target for netconsole +create_dynamic_target "${FORMAT}" + +for i in $(seq "$ITERATIONS") +do + for _ in $(seq 10) + do + echo "${MSG}: ${TARGET} ${i}" > /dev/kmsg + done + wait + + if (( i % 30 == 0 )); then + toggle_netcons_target & + fi + + if (( i % 50 == 0 )); then + # create some targets, enable them, send msg and disable + # all in a parallel thread + create_and_delete_random_target & + fi + + if (( i % 70 == 0 )); then + toggle_iface & + fi +done +wait + +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile new file mode 100644 index 000000000000..1a228c5430f5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS := \ + devlink.sh \ + devlink_in_netns.sh \ + devlink_trap.sh \ + ethtool-coalesce.sh \ + ethtool-features.sh \ + ethtool-fec.sh \ + ethtool-pause.sh \ + fib.sh \ + fib_notifications.sh \ + hw_stats_l3.sh \ + macsec-offload.sh \ + nexthop.sh \ + peer.sh \ + psample.sh \ + tc-mq-visibility.sh \ + udp_tunnel_nic.sh \ +# end of TEST_PROGS + +TEST_FILES := \ + ethtool-common.sh +# end of TEST_FILES + +include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/netdevsim/config b/tools/testing/selftests/drivers/net/netdevsim/config new file mode 100644 index 000000000000..5117c78ddf0a --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/config @@ -0,0 +1,11 @@ +CONFIG_DUMMY=y +CONFIG_GENEVE=m +CONFIG_IPV6=y +CONFIG_MACSEC=m +CONFIG_NETDEVSIM=m +CONFIG_NET_SCH_MQPRIO=y +CONFIG_NET_SCH_MULTIQ=y +CONFIG_NET_SCH_PRIO=y +CONFIG_PSAMPLE=y +CONFIG_PTP_1588_CLOCK_MOCK=y +CONFIG_VXLAN=m diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh new file mode 100755 index 000000000000..1b529ccaf050 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -0,0 +1,879 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="fw_flash_test params_test \ + params_default_test regions_test reload_test \ + netns_reload_test resource_test dev_info_test \ + empty_reporter_test dummy_reporter_test rate_test" +NUM_NETIFS=0 +source $lib_dir/lib.sh + +BUS_ADDR=10 +PORT_COUNT=4 +VF_COUNT=4 +DEV_NAME=netdevsim$BUS_ADDR +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/ +DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/ +DL_HANDLE=netdevsim/$DEV_NAME + +wait_for_devlink() +{ + "$@" | grep -q $DL_HANDLE +} + +devlink_wait() +{ + local timeout=$1 + + busywait "$timeout" wait_for_devlink devlink dev +} + +fw_flash_test() +{ + DUMMYFILE=$(find /lib/firmware -type f -printf '%P\n' | head -1) + RET=0 + + if [ -z "$DUMMYFILE" ] + then + echo "SKIP: unable to find suitable dummy firmware file" + return + fi + + echo "10"> $DEBUGFS_DIR/fw_update_flash_chunk_time_ms + + devlink dev flash $DL_HANDLE file $DUMMYFILE + check_err $? "Failed to flash with status updates on" + + devlink dev flash $DL_HANDLE file $DUMMYFILE component fw.mgmt + check_err $? "Failed to flash with component attribute" + + devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite settings + check_fail $? "Flash with overwrite settings should be rejected" + + echo "1"> $DEBUGFS_DIR/fw_update_overwrite_mask + check_err $? "Failed to change allowed overwrite mask" + + devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite settings + check_err $? "Failed to flash with settings overwrite enabled" + + devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite identifiers + check_fail $? "Flash with overwrite settings should be identifiers" + + echo "3"> $DEBUGFS_DIR/fw_update_overwrite_mask + check_err $? "Failed to change allowed overwrite mask" + + devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite identifiers overwrite settings + check_err $? "Failed to flash with settings and identifiers overwrite enabled" + + echo "n"> $DEBUGFS_DIR/fw_update_status + check_err $? "Failed to disable status updates" + + devlink dev flash $DL_HANDLE file $DUMMYFILE + check_err $? "Failed to flash with status updates off" + + log_test "fw flash test" +} + +param_get() +{ + local name=$1 + local attr=${2:-value} + local cmode=${3:-driverinit} + + cmd_jq "devlink dev param show $DL_HANDLE name $name -j" \ + '.[][][].values[] | select(.cmode == "'"$cmode"'").'"$attr" +} + +param_set() +{ + local name=$1 + local value=$2 + local cmode=${3:-driverinit} + + devlink dev param set $DL_HANDLE name $name cmode $cmode value $value +} + +param_set_default() +{ + local name=$1 + local cmode=${2:-driverinit} + + devlink dev param set $DL_HANDLE name $name default cmode $cmode +} + +check_value() +{ + local name=$1 + local phase_name=$2 + local expected_param_value=$3 + local expected_debugfs_value=$4 + local cmode=${5:-driverinit} + local value + local attr="value" + + if [[ "$phase_name" == *"default"* ]]; then + attr="default" + fi + + value=$(param_get $name $attr $cmode) + check_err $? "Failed to get $name param $attr" + [ "$value" == "$expected_param_value" ] + check_err $? "Unexpected $phase_name $name param $attr" + value=$(<$DEBUGFS_DIR/$name) + check_err $? "Failed to get $name debugfs value" + [ "$value" == "$expected_debugfs_value" ] + check_err $? "Unexpected $phase_name $name debugfs value" +} + +params_test() +{ + RET=0 + + local max_macs + local test1 + + check_value max_macs initial 32 32 + check_value test1 initial true Y + + param_set max_macs 16 + check_err $? "Failed to set max_macs param value" + param_set test1 false + check_err $? "Failed to set test1 param value" + + check_value max_macs post-set 16 32 + check_value test1 post-set false Y + + devlink dev reload $DL_HANDLE + + check_value max_macs post-reload 16 16 + check_value test1 post-reload false N + + log_test "params test" +} + +value_to_debugfs() +{ + local value=$1 + + case "$value" in + true) + echo "Y" + ;; + false) + echo "N" + ;; + *) + echo "$value" + ;; + esac +} + +test_default() +{ + local param_name=$1 + local new_value=$2 + local expected_default=$3 + local cmode=${4:-driverinit} + local default_debugfs + local new_debugfs + local expected_debugfs + + default_debugfs=$(value_to_debugfs $expected_default) + new_debugfs=$(value_to_debugfs $new_value) + + expected_debugfs=$default_debugfs + check_value $param_name initial-default $expected_default $expected_debugfs $cmode + + param_set $param_name $new_value $cmode + check_err $? "Failed to set $param_name to $new_value" + + expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$new_debugfs" || echo "$default_debugfs") + check_value $param_name post-set $new_value $expected_debugfs $cmode + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload device" + + expected_debugfs=$new_debugfs + check_value $param_name post-reload-new-value $new_value $expected_debugfs $cmode + + param_set_default $param_name $cmode + check_err $? "Failed to set $param_name to default" + + expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$default_debugfs" || echo "$new_debugfs") + check_value $param_name post-set-default $expected_default $expected_debugfs $cmode + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload device" + + expected_debugfs=$default_debugfs + check_value $param_name post-reload-default $expected_default $expected_debugfs $cmode +} + +params_default_test() +{ + RET=0 + + if ! devlink dev param help 2>&1 | grep -q "value VALUE | default"; then + echo "SKIP: devlink cli missing default feature" + return + fi + + # Remove side effects of previous tests. Use plain param_set, because + # param_set_default is a feature under test here. + param_set max_macs 32 driverinit + check_err $? "Failed to reset max_macs to default value" + param_set test1 true driverinit + check_err $? "Failed to reset test1 to default value" + param_set test2 1234 runtime + check_err $? "Failed to reset test2 to default value" + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload device for clean state" + + test_default max_macs 16 32 driverinit + test_default test1 false true driverinit + test_default test2 100 1234 runtime + + log_test "params default test" +} + +check_region_size() +{ + local name=$1 + local size + + size=$(devlink region show $DL_HANDLE/$name -j | jq -e -r '.[][].size') + check_err $? "Failed to get $name region size" + [ $size -eq 32768 ] + check_err $? "Invalid $name region size" +} + +check_region_snapshot_count() +{ + local name=$1 + local phase_name=$2 + local expected_count=$3 + local count + + count=$(devlink region show $DL_HANDLE/$name -j | jq -e -r '.[][].snapshot | length') + [ $count -eq $expected_count ] + check_err $? "Unexpected $phase_name snapshot count" +} + +regions_test() +{ + RET=0 + + local count + + check_region_size dummy + check_region_snapshot_count dummy initial 0 + + echo ""> $DEBUGFS_DIR/take_snapshot + check_err $? "Failed to take first dummy region snapshot" + check_region_snapshot_count dummy post-first-snapshot 1 + + echo ""> $DEBUGFS_DIR/take_snapshot + check_err $? "Failed to take second dummy region snapshot" + check_region_snapshot_count dummy post-second-snapshot 2 + + echo ""> $DEBUGFS_DIR/take_snapshot + check_err $? "Failed to take third dummy region snapshot" + check_region_snapshot_count dummy post-third-snapshot 3 + + devlink region del $DL_HANDLE/dummy snapshot 1 + check_err $? "Failed to delete first dummy region snapshot" + + check_region_snapshot_count dummy post-first-delete 2 + + devlink region new $DL_HANDLE/dummy snapshot 25 + check_err $? "Failed to create a new snapshot with id 25" + + check_region_snapshot_count dummy post-first-request 3 + + devlink region dump $DL_HANDLE/dummy snapshot 25 >> /dev/null + check_err $? "Failed to dump snapshot with id 25" + + devlink region read $DL_HANDLE/dummy snapshot 25 addr 0 len 1 >> /dev/null + check_err $? "Failed to read snapshot with id 25 (1 byte)" + + devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len 128 >> /dev/null + check_err $? "Failed to read snapshot with id 25 (128 bytes)" + + devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len $((1<<32)) >> /dev/null + check_err $? "Failed to read snapshot with id 25 (oversized)" + + devlink region read $DL_HANDLE/dummy snapshot 25 addr $((1<<32)) len 128 >> /dev/null 2>&1 + check_fail $? "Bad read of snapshot with id 25 did not fail" + + devlink region del $DL_HANDLE/dummy snapshot 25 + check_err $? "Failed to delete snapshot with id 25" + + check_region_snapshot_count dummy post-second-delete 2 + + sid=$(devlink -j region new $DL_HANDLE/dummy | jq '.[][][][]') + check_err $? "Failed to create a new snapshot with id allocated by the kernel" + + check_region_snapshot_count dummy post-first-request 3 + + devlink region dump $DL_HANDLE/dummy snapshot $sid >> /dev/null + check_err $? "Failed to dump a snapshot with id allocated by the kernel" + + devlink region del $DL_HANDLE/dummy snapshot $sid + check_err $? "Failed to delete snapshot with id allocated by the kernel" + + check_region_snapshot_count dummy post-first-request 2 + + log_test "regions test" +} + +reload_test() +{ + RET=0 + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload" + + echo "y"> $DEBUGFS_DIR/fail_reload + check_err $? "Failed to setup devlink reload to fail" + + devlink dev reload $DL_HANDLE + check_fail $? "Unexpected success of devlink reload" + + echo "n"> $DEBUGFS_DIR/fail_reload + check_err $? "Failed to setup devlink reload not to fail" + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload after set not to fail" + + echo "y"> $DEBUGFS_DIR/dont_allow_reload + check_err $? "Failed to forbid devlink reload" + + devlink dev reload $DL_HANDLE + check_fail $? "Unexpected success of devlink reload" + + echo "n"> $DEBUGFS_DIR/dont_allow_reload + check_err $? "Failed to re-enable devlink reload" + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload after re-enable" + + log_test "reload test" +} + +netns_reload_test() +{ + RET=0 + + ip netns add testns1 + check_err $? "Failed add netns \"testns1\"" + ip netns add testns2 + check_err $? "Failed add netns \"testns2\"" + + devlink dev reload $DL_HANDLE netns testns1 + check_err $? "Failed to reload into netns \"testns1\"" + + devlink -N testns1 dev reload $DL_HANDLE netns testns2 + check_err $? "Failed to reload from netns \"testns1\" into netns \"testns2\"" + + ip netns del testns2 + ip netns del testns1 + + # Wait until netns async cleanup is done. + devlink_wait 2000 + + log_test "netns reload test" +} + +DUMMYDEV="dummytest" + +res_val_get() +{ + local netns=$1 + local parentname=$2 + local name=$3 + local type=$4 + + cmd_jq "devlink -N $netns resource show $DL_HANDLE -j" \ + ".[][][] | select(.name == \"$parentname\").resources[] \ + | select(.name == \"$name\").$type" +} + +resource_test() +{ + RET=0 + + ip netns add testns1 + check_err $? "Failed add netns \"testns1\"" + ip netns add testns2 + check_err $? "Failed add netns \"testns2\"" + + devlink dev reload $DL_HANDLE netns testns1 + check_err $? "Failed to reload into netns \"testns1\"" + + # Create dummy dev to add the address and routes on. + + ip -n testns1 link add name $DUMMYDEV type dummy + check_err $? "Failed create dummy device" + ip -n testns1 link set $DUMMYDEV up + check_err $? "Failed bring up dummy device" + ip -n testns1 a a 192.0.1.1/24 dev $DUMMYDEV + check_err $? "Failed add an IP address to dummy device" + + local occ=$(res_val_get testns1 IPv4 fib occ) + local limit=$((occ+1)) + + # Set fib size limit to handle one another route only. + + devlink -N testns1 resource set $DL_HANDLE path IPv4/fib size $limit + check_err $? "Failed to set IPv4/fib resource size" + local size_new=$(res_val_get testns1 IPv4 fib size_new) + [ "$size_new" -eq "$limit" ] + check_err $? "Unexpected \"size_new\" value (got $size_new, expected $limit)" + + devlink -N testns1 dev reload $DL_HANDLE + check_err $? "Failed to reload" + local size=$(res_val_get testns1 IPv4 fib size) + [ "$size" -eq "$limit" ] + check_err $? "Unexpected \"size\" value (got $size, expected $limit)" + + # Insert 2 routes, the first is going to be inserted, + # the second is expected to fail to be inserted. + + ip -n testns1 r a 192.0.2.0/24 via 192.0.1.2 + check_err $? "Failed to add route" + + ip -n testns1 r a 192.0.3.0/24 via 192.0.1.2 + check_fail $? "Unexpected successful route add over limit" + + # Now create another dummy in second network namespace and + # insert two routes. That is over the limit of the netdevsim + # instance in the first namespace. Move the netdevsim instance + # into the second namespace and expect it to fail. + + ip -n testns2 link add name $DUMMYDEV type dummy + check_err $? "Failed create dummy device" + ip -n testns2 link set $DUMMYDEV up + check_err $? "Failed bring up dummy device" + ip -n testns2 a a 192.0.1.1/24 dev $DUMMYDEV + check_err $? "Failed add an IP address to dummy device" + ip -n testns2 r a 192.0.2.0/24 via 192.0.1.2 + check_err $? "Failed to add route" + ip -n testns2 r a 192.0.3.0/24 via 192.0.1.2 + check_err $? "Failed to add route" + + devlink -N testns1 dev reload $DL_HANDLE netns testns2 + check_fail $? "Unexpected successful reload from netns \"testns1\" into netns \"testns2\"" + + devlink -N testns2 resource set $DL_HANDLE path IPv4/fib size ' -1' + check_err $? "Failed to reset IPv4/fib resource size" + + devlink -N testns2 dev reload $DL_HANDLE netns 1 + check_err $? "Failed to reload devlink back" + + ip netns del testns2 + ip netns del testns1 + + # Wait until netns async cleanup is done. + devlink_wait 2000 + + log_test "resource test" +} + +info_get() +{ + local name=$1 + + cmd_jq "devlink dev info $DL_HANDLE -j" ".[][][\"$name\"]" "-e" +} + +dev_info_test() +{ + RET=0 + + driver=$(info_get "driver") + check_err $? "Failed to get driver name" + [ "$driver" == "netdevsim" ] + check_err $? "Unexpected driver name $driver" + + log_test "dev_info test" +} + +empty_reporter_test() +{ + RET=0 + + devlink health show $DL_HANDLE reporter empty >/dev/null + check_err $? "Failed show empty reporter" + + devlink health dump show $DL_HANDLE reporter empty >/dev/null + check_err $? "Failed show dump of empty reporter" + + devlink health diagnose $DL_HANDLE reporter empty >/dev/null + check_err $? "Failed diagnose empty reporter" + + devlink health recover $DL_HANDLE reporter empty + check_err $? "Failed recover empty reporter" + + log_test "empty reporter test" +} + +check_reporter_info() +{ + local name=$1 + local expected_state=$2 + local expected_error=$3 + local expected_recover=$4 + local expected_grace_period=$5 + local expected_auto_recover=$6 + + local show=$(devlink health show $DL_HANDLE reporter $name -j | jq -e -r ".[][][]") + check_err $? "Failed show $name reporter" + + local state=$(echo $show | jq -r ".state") + [ "$state" == "$expected_state" ] + check_err $? "Unexpected \"state\" value (got $state, expected $expected_state)" + + local error=$(echo $show | jq -r ".error") + [ "$error" == "$expected_error" ] + check_err $? "Unexpected \"error\" value (got $error, expected $expected_error)" + + local recover=`echo $show | jq -r ".recover"` + [ "$recover" == "$expected_recover" ] + check_err $? "Unexpected \"recover\" value (got $recover, expected $expected_recover)" + + local grace_period=$(echo $show | jq -r ".grace_period") + check_err $? "Failed get $name reporter grace_period" + [ "$grace_period" == "$expected_grace_period" ] + check_err $? "Unexpected \"grace_period\" value (got $grace_period, expected $expected_grace_period)" + + local auto_recover=$(echo $show | jq -r ".auto_recover") + [ "$auto_recover" == "$expected_auto_recover" ] + check_err $? "Unexpected \"auto_recover\" value (got $auto_recover, expected $expected_auto_recover)" +} + +dummy_reporter_test() +{ + RET=0 + + check_reporter_info dummy healthy 0 0 0 true + + devlink health set $DL_HANDLE reporter dummy auto_recover false + check_err $? "Failed to dummy reporter auto_recover option" + + check_reporter_info dummy healthy 0 0 0 false + + local BREAK_MSG="foo bar" + echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health + check_err $? "Failed to break dummy reporter" + + check_reporter_info dummy error 1 0 0 false + + local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j) + check_err $? "Failed show dump of dummy reporter" + + local dump_break_msg=$(echo $dump | jq -r ".break_message") + [ "$dump_break_msg" == "$BREAK_MSG" ] + check_err $? "Unexpected dump break message value (got $dump_break_msg, expected $BREAK_MSG)" + + devlink health dump clear $DL_HANDLE reporter dummy + check_err $? "Failed clear dump of dummy reporter" + + devlink health recover $DL_HANDLE reporter dummy + check_err $? "Failed recover dummy reporter" + + check_reporter_info dummy healthy 1 1 0 false + + devlink health set $DL_HANDLE reporter dummy auto_recover true + check_err $? "Failed to dummy reporter auto_recover option" + + check_reporter_info dummy healthy 1 1 0 true + + echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health + check_err $? "Failed to break dummy reporter" + + check_reporter_info dummy healthy 2 2 0 true + + local diagnose=$(devlink health diagnose $DL_HANDLE reporter dummy -j -p) + check_err $? "Failed show diagnose of dummy reporter" + + local rcvrd_break_msg=$(echo $diagnose | jq -r ".recovered_break_message") + [ "$rcvrd_break_msg" == "$BREAK_MSG" ] + check_err $? "Unexpected recovered break message value (got $rcvrd_break_msg, expected $BREAK_MSG)" + + devlink health set $DL_HANDLE reporter dummy grace_period 10 + check_err $? "Failed to dummy reporter grace_period option" + + check_reporter_info dummy healthy 2 2 10 true + + echo "Y"> $DEBUGFS_DIR/health/fail_recover + check_err $? "Failed set dummy reporter recovery to fail" + + echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health + check_fail $? "Unexpected success of dummy reporter break" + + check_reporter_info dummy error 3 2 10 true + + devlink health recover $DL_HANDLE reporter dummy + check_fail $? "Unexpected success of dummy reporter recover" + + echo "N"> $DEBUGFS_DIR/health/fail_recover + check_err $? "Failed set dummy reporter recovery to be successful" + + devlink health recover $DL_HANDLE reporter dummy + check_err $? "Failed recover dummy reporter" + + check_reporter_info dummy healthy 3 3 10 true + + echo 8192 > $DEBUGFS_DIR/health/binary_len + check_err $? "Failed set dummy reporter binary len to 8192" + + local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j) + check_err $? "Failed show dump of dummy reporter" + + devlink health dump clear $DL_HANDLE reporter dummy + check_err $? "Failed clear dump of dummy reporter" + + log_test "dummy reporter test" +} + +rate_leafs_get() +{ + local handle=$1 + + cmd_jq "devlink port function rate show -j" \ + '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))' +} + +rate_nodes_get() +{ + local handle=$1 + + cmd_jq "devlink port function rate show -j" \ + '.[] | to_entries | .[] | select(.value.type == "node") | .key | select(contains("'$handle'"))' +} + +rate_attr_set() +{ + local handle=$1 + local name=$2 + local value=$3 + local units=$4 + + devlink port function rate set $handle $name $value$units +} + +rate_attr_get() +{ + local handle=$1 + local name=$2 + + cmd_jq "devlink port function rate show $handle -j" '.[][].'$name +} + +rate_attr_tx_rate_check() +{ + local handle=$1 + local name=$2 + local rate=$3 + local debug_file=$4 + + rate_attr_set $handle $name $rate mbit + check_err $? "Failed to set $name value" + + local debug_value=$(cat $debug_file) + check_err $? "Failed to read $name value from debugfs" + [ "$debug_value" == "$rate" ] + check_err $? "Unexpected $name debug value $debug_value != $rate" + + local api_value=$(( $(rate_attr_get $handle $name) * 8 / 1000000 )) + check_err $? "Failed to get $name attr value" + [ "$api_value" == "$rate" ] + check_err $? "Unexpected $name attr value $api_value != $rate" +} + +rate_attr_parent_check() +{ + local handle=$1 + local parent=$2 + local debug_file=$3 + + rate_attr_set $handle parent $parent + check_err $? "Failed to set parent" + + debug_value=$(cat $debug_file) + check_err $? "Failed to get parent debugfs value" + [ "$debug_value" == "$parent" ] + check_err $? "Unexpected parent debug value $debug_value != $parent" + + api_value=$(rate_attr_get $r_obj parent) + check_err $? "Failed to get parent attr value" + [ "$api_value" == "$parent" ] + check_err $? "Unexpected parent attr value $api_value != $parent" +} + +rate_attr_tc_bw_check() +{ + local handle=$1 + local tc_bw=$2 + local debug_file=$3 + + local tc_bw_str="" + for bw in $tc_bw; do + local tc=${bw%%:*} + local value=${bw##*:} + tc_bw_str="$tc_bw_str $tc:$value" + done + tc_bw_str=${tc_bw_str# } + + rate_attr_set "$handle" tc-bw "$tc_bw_str" + check_err $? "Failed to set tc-bw values" + + for bw in $tc_bw; do + local tc=${bw%%:*} + local value=${bw##*:} + local debug_value + debug_value=$(cat "$debug_file"/tc"${tc}"_bw) + check_err $? "Failed to read tc-bw value from debugfs for tc$tc" + [ "$debug_value" == "$value" ] + check_err $? "Unexpected tc-bw debug value for tc$tc: $debug_value != $value" + done + + for bw in $tc_bw; do + local tc=${bw%%:*} + local expected_value=${bw##*:} + local api_value + api_value=$(rate_attr_get "$handle" tc_"$tc") + if [ "$api_value" = "null" ]; then + api_value=0 + fi + [ "$api_value" == "$expected_value" ] + check_err $? "Unexpected tc-bw value for tc$tc: $api_value != $expected_value" + done +} + +rate_node_add() +{ + local handle=$1 + + devlink port function rate add $handle +} + +rate_node_del() +{ + local handle=$1 + + devlink port function rate del $handle +} + +rate_test() +{ + RET=0 + + echo $VF_COUNT > /sys/bus/netdevsim/devices/$DEV_NAME/sriov_numvfs + devlink dev eswitch set $DL_HANDLE mode switchdev + local leafs=`rate_leafs_get $DL_HANDLE` + local num_leafs=`echo $leafs | wc -w` + [ "$num_leafs" == "$VF_COUNT" ] + check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs" + + rate=10 + for r_obj in $leafs + do + rate_attr_tx_rate_check $r_obj tx_share $rate \ + $DEBUGFS_DIR/ports/${r_obj##*/}/tx_share + rate=$(($rate+10)) + done + + rate=100 + for r_obj in $leafs + do + rate_attr_tx_rate_check $r_obj tx_max $rate \ + $DEBUGFS_DIR/ports/${r_obj##*/}/tx_max + rate=$(($rate+100)) + done + + local tc_bw="0:0 1:40 2:0 3:0 4:0 5:0 6:60 7:0" + for r_obj in $leafs + do + rate_attr_tc_bw_check "$r_obj" "$tc_bw" \ + "$DEBUGFS_DIR"/ports/"${r_obj##*/}" + done + + local node1_name='group1' + local node1="$DL_HANDLE/$node1_name" + rate_node_add "$node1" + check_err $? "Failed to add node $node1" + + local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` + [ $num_nodes == 1 ] + check_err $? "Expected 1 rate node in output but got $num_nodes" + + local node_tx_share=10 + rate_attr_tx_rate_check $node1 tx_share $node_tx_share \ + $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_share + + local node_tx_max=100 + rate_attr_tx_rate_check $node1 tx_max $node_tx_max \ + $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max + + + local tc_bw="0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0" + rate_attr_tc_bw_check $node1 "$tc_bw" \ + "$DEBUGFS_DIR"/rate_nodes/"${node1##*/}" + + + rate_node_del "$node1" + check_err $? "Failed to delete node $node1" + local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` + [ $num_nodes == 0 ] + check_err $? "Expected 0 rate node but got $num_nodes" + + local node1_name='group1' + local node1="$DL_HANDLE/$node1_name" + rate_node_add "$node1" + check_err $? "Failed to add node $node1" + + rate_attr_parent_check $r_obj $node1_name \ + $DEBUGFS_DIR/ports/${r_obj##*/}/rate_parent + + local node2_name='group2' + local node2="$DL_HANDLE/$node2_name" + rate_node_add "$node2" + check_err $? "Failed to add node $node2" + + rate_attr_parent_check $node2 $node1_name \ + $DEBUGFS_DIR/rate_nodes/$node2_name/rate_parent + rate_node_del "$node2" + check_err $? "Failed to delete node $node2" + rate_attr_set "$r_obj" noparent + check_err $? "Failed to unset $r_obj parent node" + rate_node_del "$node1" + check_err $? "Failed to delete node $node1" + + log_test "rate test" +} + +setup_prepare() +{ + modprobe netdevsim + echo "$BUS_ADDR $PORT_COUNT" > /sys/bus/netdevsim/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done +} + +cleanup() +{ + pre_cleanup + echo "$BUS_ADDR" > /sys/bus/netdevsim/del_device + modprobe -r netdevsim +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh new file mode 100755 index 000000000000..7effd35369e1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="check_devlink_test check_ports_test" +NUM_NETIFS=0 +source $lib_dir/lib.sh + +BUS_ADDR=10 +PORT_COUNT=4 +DEV_NAME=netdevsim$BUS_ADDR +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/ +DL_HANDLE=netdevsim/$DEV_NAME +NETNS_NAME=testns1 + +port_netdev_get() +{ + local port_index=$1 + + cmd_jq "devlink -N $NETNS_NAME port show -j" \ + ".[][\"$DL_HANDLE/$port_index\"].netdev" "-e" +} + +check_ports_test() +{ + RET=0 + + for i in $(seq 0 $(expr $PORT_COUNT - 1)); do + netdev_name=$(port_netdev_get $i) + check_err $? "Failed to get netdev name for port $DL_HANDLE/$i" + ip -n $NETNS_NAME link show $netdev_name &> /dev/null + check_err $? "Failed to find netdev $netdev_name" + done + + log_test "check ports test" +} + +check_devlink_test() +{ + RET=0 + + devlink -N $NETNS_NAME dev show $DL_HANDLE &> /dev/null + check_err $? "Failed to show devlink instance" + + log_test "check devlink test" +} + +setup_prepare() +{ + modprobe netdevsim + ip netns add $NETNS_NAME + ip netns exec $NETNS_NAME \ + echo "$BUS_ADDR $PORT_COUNT" > /sys/bus/netdevsim/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done +} + +cleanup() +{ + pre_cleanup + echo "$BUS_ADDR" > /sys/bus/netdevsim/del_device + ip netns del $NETNS_NAME + modprobe -r netdevsim +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh new file mode 100755 index 000000000000..b64d98ca0df7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh @@ -0,0 +1,514 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking devlink-trap functionality. It makes use of +# netdevsim which implements the required callbacks. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + init_test + trap_action_test + trap_metadata_test + bad_trap_test + bad_trap_action_test + trap_stats_test + trap_group_action_test + bad_trap_group_test + trap_group_stats_test + trap_policer_test + trap_policer_bind_test + port_del_test + dev_del_test +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ +SLEEP_TIME=1 +NETDEV="" +NUM_NETIFS=0 +source $lib_dir/lib.sh + +DEVLINK_DEV= +source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} + +require_command udevadm + +modprobe netdevsim &> /dev/null +if [ ! -d "$NETDEVSIM_PATH" ]; then + echo "SKIP: No netdevsim support" + exit 1 +fi + +if [ -d "${NETDEVSIM_PATH}/devices/netdevsim${DEV_ADDR}" ]; then + echo "SKIP: Device netdevsim${DEV_ADDR} already exists" + exit 1 +fi + +check_netdev_down() +{ + state=$(cat /sys/class/net/${NETDEV}/flags) + + if [ $((state & 1)) -ne 0 ]; then + echo "WARNING: unexpected interface UP, disable NetworkManager?" + + ip link set dev $NETDEV down + fi +} + +init_test() +{ + RET=0 + + test $(devlink_traps_num_get) -ne 0 + check_err $? "No traps were registered" + + log_test "Initialization" +} + +trap_action_test() +{ + local orig_action + local trap_name + local action + + RET=0 + + for trap_name in $(devlink_traps_get); do + # The action of non-drop traps cannot be changed. + if [ $(devlink_trap_type_get $trap_name) = "drop" ]; then + devlink_trap_action_set $trap_name "trap" + action=$(devlink_trap_action_get $trap_name) + if [ $action != "trap" ]; then + check_err 1 "Trap $trap_name did not change action to trap" + fi + + devlink_trap_action_set $trap_name "drop" + action=$(devlink_trap_action_get $trap_name) + if [ $action != "drop" ]; then + check_err 1 "Trap $trap_name did not change action to drop" + fi + else + orig_action=$(devlink_trap_action_get $trap_name) + + devlink_trap_action_set $trap_name "trap" + action=$(devlink_trap_action_get $trap_name) + if [ $action != $orig_action ]; then + check_err 1 "Trap $trap_name changed action when should not" + fi + + devlink_trap_action_set $trap_name "drop" + action=$(devlink_trap_action_get $trap_name) + if [ $action != $orig_action ]; then + check_err 1 "Trap $trap_name changed action when should not" + fi + fi + done + + log_test "Trap action" +} + +trap_metadata_test() +{ + local trap_name + + RET=0 + + for trap_name in $(devlink_traps_get); do + devlink_trap_metadata_test $trap_name "input_port" + check_err $? "Input port not reported as metadata of trap $trap_name" + if [ $trap_name == "ingress_flow_action_drop" ] || + [ $trap_name == "egress_flow_action_drop" ]; then + devlink_trap_metadata_test $trap_name "flow_action_cookie" + check_err $? "Flow action cookie not reported as metadata of trap $trap_name" + fi + done + + log_test "Trap metadata" +} + +bad_trap_test() +{ + RET=0 + + devlink_trap_action_set "made_up_trap" "drop" + check_fail $? "Did not get an error for non-existing trap" + + log_test "Non-existing trap" +} + +bad_trap_action_test() +{ + local traps_arr + local trap_name + + RET=0 + + # Pick first trap. + traps_arr=($(devlink_traps_get)) + trap_name=${traps_arr[0]} + + devlink_trap_action_set $trap_name "made_up_action" + check_fail $? "Did not get an error for non-existing trap action" + + log_test "Non-existing trap action" +} + +trap_stats_test() +{ + local trap_name + + RET=0 + + check_netdev_down + for trap_name in $(devlink_traps_get); do + devlink_trap_stats_idle_test $trap_name + check_err $? "Stats of trap $trap_name not idle when netdev down" + + ip link set dev $NETDEV up + + if [ $(devlink_trap_type_get $trap_name) = "drop" ]; then + devlink_trap_action_set $trap_name "trap" + devlink_trap_stats_idle_test $trap_name + check_fail $? "Stats of trap $trap_name idle when action is trap" + + devlink_trap_action_set $trap_name "drop" + devlink_trap_stats_idle_test $trap_name + check_err $? "Stats of trap $trap_name not idle when action is drop" + + echo "y"> $DEBUGFS_DIR/fail_trap_drop_counter_get + devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null + check_fail $? "Managed to read trap (hard dropped) statistics when should not" + echo "n"> $DEBUGFS_DIR/fail_trap_drop_counter_get + devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null + check_err $? "Did not manage to read trap (hard dropped) statistics when should" + + devlink_trap_drop_stats_idle_test $trap_name + check_fail $? "Drop stats of trap $trap_name idle when should not" + else + devlink_trap_stats_idle_test $trap_name + check_fail $? "Stats of non-drop trap $trap_name idle when should not" + fi + + ip link set dev $NETDEV down + done + + log_test "Trap statistics" +} + +trap_group_action_test() +{ + local curr_group group_name + local trap_name + local trap_type + local action + + RET=0 + + for group_name in $(devlink_trap_groups_get); do + devlink_trap_group_action_set $group_name "trap" + + for trap_name in $(devlink_traps_get); do + curr_group=$(devlink_trap_group_get $trap_name) + if [ $curr_group != $group_name ]; then + continue + fi + + trap_type=$(devlink_trap_type_get $trap_name) + if [ $trap_type != "drop" ]; then + continue + fi + + action=$(devlink_trap_action_get $trap_name) + if [ $action != "trap" ]; then + check_err 1 "Trap $trap_name did not change action to trap" + fi + done + + devlink_trap_group_action_set $group_name "drop" + + for trap_name in $(devlink_traps_get); do + curr_group=$(devlink_trap_group_get $trap_name) + if [ $curr_group != $group_name ]; then + continue + fi + + trap_type=$(devlink_trap_type_get $trap_name) + if [ $trap_type != "drop" ]; then + continue + fi + + action=$(devlink_trap_action_get $trap_name) + if [ $action != "drop" ]; then + check_err 1 "Trap $trap_name did not change action to drop" + fi + done + done + + log_test "Trap group action" +} + +bad_trap_group_test() +{ + RET=0 + + devlink_trap_group_action_set "made_up_trap_group" "drop" + check_fail $? "Did not get an error for non-existing trap group" + + log_test "Non-existing trap group" +} + +trap_group_stats_test() +{ + local group_name + + RET=0 + + check_netdev_down + for group_name in $(devlink_trap_groups_get); do + devlink_trap_group_stats_idle_test $group_name + check_err $? "Stats of trap group $group_name not idle when netdev down" + + ip link set dev $NETDEV up + + devlink_trap_group_action_set $group_name "trap" + devlink_trap_group_stats_idle_test $group_name + check_fail $? "Stats of trap group $group_name idle when action is trap" + + devlink_trap_group_action_set $group_name "drop" + ip link set dev $NETDEV down + done + + log_test "Trap group statistics" +} + +trap_policer_test() +{ + local packets_t0 + local packets_t1 + + RET=0 + + if [ $(devlink_trap_policers_num_get) -eq 0 ]; then + check_err 1 "Failed to dump policers" + fi + + devlink trap policer set $DEVLINK_DEV policer 1337 &> /dev/null + check_fail $? "Did not get an error for setting a non-existing policer" + devlink trap policer show $DEVLINK_DEV policer 1337 &> /dev/null + check_fail $? "Did not get an error for getting a non-existing policer" + + devlink trap policer set $DEVLINK_DEV policer 1 rate 2000 burst 16 + check_err $? "Failed to set valid parameters for a valid policer" + if [ $(devlink_trap_policer_rate_get 1) -ne 2000 ]; then + check_err 1 "Policer rate was not changed" + fi + if [ $(devlink_trap_policer_burst_get 1) -ne 16 ]; then + check_err 1 "Policer burst size was not changed" + fi + + devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null + check_fail $? "Policer rate was changed to rate lower than limit" + devlink trap policer set $DEVLINK_DEV policer 1 rate 9000 &> /dev/null + check_fail $? "Policer rate was changed to rate higher than limit" + devlink trap policer set $DEVLINK_DEV policer 1 burst 2 &> /dev/null + check_fail $? "Policer burst size was changed to burst size lower than limit" + devlink trap policer set $DEVLINK_DEV policer 1 rate 65537 &> /dev/null + check_fail $? "Policer burst size was changed to burst size higher than limit" + echo "y" > $DEBUGFS_DIR/fail_trap_policer_set + devlink trap policer set $DEVLINK_DEV policer 1 rate 3000 &> /dev/null + check_fail $? "Managed to set policer rate when should not" + echo "n" > $DEBUGFS_DIR/fail_trap_policer_set + if [ $(devlink_trap_policer_rate_get 1) -ne 2000 ]; then + check_err 1 "Policer rate was changed to an invalid value" + fi + if [ $(devlink_trap_policer_burst_get 1) -ne 16 ]; then + check_err 1 "Policer burst size was changed to an invalid value" + fi + + packets_t0=$(devlink_trap_policer_rx_dropped_get 1) + sleep .5 + packets_t1=$(devlink_trap_policer_rx_dropped_get 1) + if [ ! $packets_t1 -gt $packets_t0 ]; then + check_err 1 "Policer drop counter was not incremented" + fi + + echo "y"> $DEBUGFS_DIR/fail_trap_policer_counter_get + devlink -s trap policer show $DEVLINK_DEV policer 1 &> /dev/null + check_fail $? "Managed to read policer drop counter when should not" + echo "n"> $DEBUGFS_DIR/fail_trap_policer_counter_get + devlink -s trap policer show $DEVLINK_DEV policer 1 &> /dev/null + check_err $? "Did not manage to read policer drop counter when should" + + log_test "Trap policer" +} + +trap_group_check_policer() +{ + local group_name=$1; shift + + devlink -j -p trap group show $DEVLINK_DEV group $group_name \ + | jq -e '.[][][]["policer"]' &> /dev/null +} + +trap_policer_bind_test() +{ + RET=0 + + devlink trap group set $DEVLINK_DEV group l2_drops policer 1 + check_err $? "Failed to bind a valid policer" + if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then + check_err 1 "Bound policer was not changed" + fi + + devlink trap group set $DEVLINK_DEV group l2_drops policer 1337 \ + &> /dev/null + check_fail $? "Did not get an error for binding a non-existing policer" + if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then + check_err 1 "Bound policer was changed when should not" + fi + + devlink trap group set $DEVLINK_DEV group l2_drops policer 0 + check_err $? "Failed to unbind a policer when using ID 0" + trap_group_check_policer "l2_drops" + check_fail $? "Trap group has a policer after unbinding with ID 0" + + devlink trap group set $DEVLINK_DEV group l2_drops policer 1 + check_err $? "Failed to bind a valid policer" + + devlink trap group set $DEVLINK_DEV group l2_drops nopolicer + check_err $? "Failed to unbind a policer when using 'nopolicer' keyword" + trap_group_check_policer "l2_drops" + check_fail $? "Trap group has a policer after unbinding with 'nopolicer' keyword" + + devlink trap group set $DEVLINK_DEV group l2_drops policer 1 + check_err $? "Failed to bind a valid policer" + + echo "y"> $DEBUGFS_DIR/fail_trap_group_set + devlink trap group set $DEVLINK_DEV group l2_drops policer 2 \ + &> /dev/null + check_fail $? "Managed to bind a policer when should not" + echo "n"> $DEBUGFS_DIR/fail_trap_group_set + devlink trap group set $DEVLINK_DEV group l2_drops policer 2 + check_err $? "Did not manage to bind a policer when should" + + devlink trap group set $DEVLINK_DEV group l2_drops action drop \ + policer 1337 &> /dev/null + check_fail $? "Did not get an error for partially modified trap group" + + log_test "Trap policer binding" +} + +port_del_test() +{ + local group_name + local i + + # The test never fails. It is meant to exercise different code paths + # and make sure we properly dismantle a port while packets are + # in-flight. + RET=0 + + devlink_traps_enable_all + + for i in $(seq 1 10); do + ip link set dev $NETDEV up + + sleep $SLEEP_TIME + + netdevsim_port_destroy + netdevsim_port_create + udevadm settle + done + + devlink_traps_disable_all + + log_test "Port delete" +} + +dev_del_test() +{ + local group_name + local i + + # The test never fails. It is meant to exercise different code paths + # and make sure we properly unregister traps while packets are + # in-flight. + RET=0 + + devlink_traps_enable_all + + for i in $(seq 1 10); do + ip link set dev $NETDEV up + + sleep $SLEEP_TIME + + cleanup + setup_prepare + done + + devlink_traps_disable_all + + log_test "Device delete" +} + +netdevsim_dev_create() +{ + echo "$DEV_ADDR 0" > ${NETDEVSIM_PATH}/new_device +} + +netdevsim_dev_destroy() +{ + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device +} + +netdevsim_port_create() +{ + echo 1 > ${NETDEVSIM_PATH}/devices/${DEV}/new_port +} + +netdevsim_port_destroy() +{ + echo 1 > ${NETDEVSIM_PATH}/devices/${DEV}/del_port +} + +setup_prepare() +{ + local netdev + + netdevsim_dev_create + + if [ ! -d "${NETDEVSIM_PATH}/devices/${DEV}" ]; then + echo "Failed to create netdevsim device" + exit 1 + fi + + netdevsim_port_create + + if [ ! -d "${NETDEVSIM_PATH}/devices/${DEV}/net/" ]; then + echo "Failed to create netdevsim port" + exit 1 + fi + + # Wait for udev to rename newly created netdev. + udevadm settle + + NETDEV=$(ls ${NETDEVSIM_PATH}/devices/${DEV}/net/) +} + +cleanup() +{ + pre_cleanup + netdevsim_port_destroy + netdevsim_dev_destroy +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh new file mode 100755 index 000000000000..9adfba8f87e6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh @@ -0,0 +1,132 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +function get_value { + local query="${SETTINGS_MAP[$1]}" + + echo $(ethtool -c $NSIM_NETDEV | \ + awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[ \t]/, "", $2); print $2}') +} + +function update_current_settings { + for key in ${!SETTINGS_MAP[@]}; do + CURRENT_SETTINGS[$key]=$(get_value $key) + done + echo ${CURRENT_SETTINGS[@]} +} + +if ! ethtool -h | grep -q coalesce; then + echo "SKIP: No --coalesce support in ethtool" + exit 4 +fi + +NSIM_NETDEV=$(make_netdev) + +set -o pipefail + +declare -A SETTINGS_MAP=( + ["rx-frames-low"]="rx-frame-low" + ["tx-frames-low"]="tx-frame-low" + ["rx-frames-high"]="rx-frame-high" + ["tx-frames-high"]="tx-frame-high" + ["rx-usecs"]="rx-usecs" + ["rx-frames"]="rx-frames" + ["rx-usecs-irq"]="rx-usecs-irq" + ["rx-frames-irq"]="rx-frames-irq" + ["tx-usecs"]="tx-usecs" + ["tx-frames"]="tx-frames" + ["tx-usecs-irq"]="tx-usecs-irq" + ["tx-frames-irq"]="tx-frames-irq" + ["stats-block-usecs"]="stats-block-usecs" + ["pkt-rate-low"]="pkt-rate-low" + ["rx-usecs-low"]="rx-usecs-low" + ["tx-usecs-low"]="tx-usecs-low" + ["pkt-rate-high"]="pkt-rate-high" + ["rx-usecs-high"]="rx-usecs-high" + ["tx-usecs-high"]="tx-usecs-high" + ["sample-interval"]="sample-interval" +) + +declare -A CURRENT_SETTINGS=( + ["rx-frames-low"]="" + ["tx-frames-low"]="" + ["rx-frames-high"]="" + ["tx-frames-high"]="" + ["rx-usecs"]="" + ["rx-frames"]="" + ["rx-usecs-irq"]="" + ["rx-frames-irq"]="" + ["tx-usecs"]="" + ["tx-frames"]="" + ["tx-usecs-irq"]="" + ["tx-frames-irq"]="" + ["stats-block-usecs"]="" + ["pkt-rate-low"]="" + ["rx-usecs-low"]="" + ["tx-usecs-low"]="" + ["pkt-rate-high"]="" + ["rx-usecs-high"]="" + ["tx-usecs-high"]="" + ["sample-interval"]="" +) + +declare -A EXPECTED_SETTINGS=( + ["rx-frames-low"]="" + ["tx-frames-low"]="" + ["rx-frames-high"]="" + ["tx-frames-high"]="" + ["rx-usecs"]="" + ["rx-frames"]="" + ["rx-usecs-irq"]="" + ["rx-frames-irq"]="" + ["tx-usecs"]="" + ["tx-frames"]="" + ["tx-usecs-irq"]="" + ["tx-frames-irq"]="" + ["stats-block-usecs"]="" + ["pkt-rate-low"]="" + ["rx-usecs-low"]="" + ["tx-usecs-low"]="" + ["pkt-rate-high"]="" + ["rx-usecs-high"]="" + ["tx-usecs-high"]="" + ["sample-interval"]="" +) + +# populate the expected settings map +for key in ${!SETTINGS_MAP[@]}; do + EXPECTED_SETTINGS[$key]=$(get_value $key) +done + +# test +for key in ${!SETTINGS_MAP[@]}; do + value=$((RANDOM % $((2**32-1)))) + + ethtool -C $NSIM_NETDEV "$key" "$value" + + EXPECTED_SETTINGS[$key]="$value" + expected=${EXPECTED_SETTINGS[@]} + current=$(update_current_settings) + + check $? "$current" "$expected" + set +x +done + +# bool settings which ethtool displays on the same line +ethtool -C $NSIM_NETDEV adaptive-rx on +s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on TX: off") +check $? "$s" "" + +ethtool -C $NSIM_NETDEV adaptive-tx on +s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on TX: on") +check $? "$s" "" + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh new file mode 100644 index 000000000000..80160579e0cc --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +NSIM_ID=$((RANDOM % 1024)) +NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID +NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0 +NSIM_NETDEV= +num_passes=0 +num_errors=0 + +function cleanup_nsim { + if [ -e $NSIM_DEV_SYS ]; then + echo $NSIM_ID > /sys/bus/netdevsim/del_device + fi +} + +function cleanup { + cleanup_nsim +} + +trap cleanup EXIT + +function check { + local code=$1 + local str=$2 + local exp_str=$3 + local exp_fail=$4 + + [ -z "$exp_fail" ] && cop="-ne" || cop="-eq" + + if [ $code $cop 0 ]; then + ((num_errors++)) + return + fi + + if [ "$str" != "$exp_str" ]; then + echo -e "Expected: '$exp_str', got '$str'" + ((num_errors++)) + return + fi + + ((num_passes++)) +} + +function make_netdev { + # Make a netdevsim + old_netdevs=$(ls /sys/class/net) + + if ! $(lsmod | grep -q netdevsim); then + modprobe netdevsim + fi + + echo $NSIM_ID $@ > /sys/bus/netdevsim/new_device + udevadm settle + # get new device name + ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/ +} diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh new file mode 100644 index 000000000000..bc210dc6ad2d --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +NSIM_NETDEV=$(make_netdev) + +set -o pipefail + +FEATS=" + tx-checksum-ip-generic + tx-scatter-gather + tx-tcp-segmentation + generic-segmentation-offload + generic-receive-offload" + +for feat in $FEATS ; do + s=$(ethtool --json -k $NSIM_NETDEV | jq ".[].\"$feat\".active" 2>/dev/null) + check $? "$s" true + + s=$(ethtool --json -k $NSIM_NETDEV | jq ".[].\"$feat\".fixed" 2>/dev/null) + check $? "$s" false +done + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh new file mode 100755 index 000000000000..6c52ce1b0450 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +NSIM_NETDEV=$(make_netdev) +[ a$ETHTOOL == a ] && ETHTOOL=ethtool + +set -o pipefail + +# Since commit 2b3ddcb35357 ("ethtool: fec: Change the prompt ...") +# in ethtool CLI the Configured lines start with Supported/Configured. +configured=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2 | head -1 | cut -d' ' -f1) + +# netdevsim starts out with None/None +s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) +check $? "$s" "$configured FEC encodings: None +Active FEC encoding: None" + +# Test Auto +$ETHTOOL --set-fec $NSIM_NETDEV encoding auto +check $? +s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) +check $? "$s" "$configured FEC encodings: Auto +Active FEC encoding: Off" + +# Test case in-sensitivity +for o in off Off OFF; do + $ETHTOOL --set-fec $NSIM_NETDEV encoding $o + check $? + s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) + check $? "$s" "$configured FEC encodings: Off +Active FEC encoding: Off" +done + +for o in BaseR baser BAser; do + $ETHTOOL --set-fec $NSIM_NETDEV encoding $o + check $? + s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) + check $? "$s" "$configured FEC encodings: BaseR +Active FEC encoding: BaseR" +done + +for o in llrs rs; do + $ETHTOOL --set-fec $NSIM_NETDEV encoding $o + check $? + s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) + check $? "$s" "$configured FEC encodings: ${o^^} +Active FEC encoding: ${o^^}" +done + +# Test multiple bits +$ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs +check $? +s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) +check $? "$s" "$configured FEC encodings: RS LLRS +Active FEC encoding: LLRS" + +$ETHTOOL --set-fec $NSIM_NETDEV encoding rs off auto +check $? +s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) +check $? "$s" "$configured FEC encodings: Auto Off RS +Active FEC encoding: RS" + +# Make sure other link modes are rejected +$ETHTOOL --set-fec $NSIM_NETDEV encoding FIBRE 2>/dev/null +check $? '' '' 1 + +$ETHTOOL --set-fec $NSIM_NETDEV encoding bla-bla-bla 2>/dev/null +check $? '' '' 1 + +# Try JSON +$ETHTOOL --json --show-fec $NSIM_NETDEV | jq empty >>/dev/null 2>&1 +if [ $? -eq 0 ]; then + $ETHTOOL --set-fec $NSIM_NETDEV encoding auto + check $? + + s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]') + check $? "$s" '"Auto"' + s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]') + check $? "$s" '"Off"' + + $ETHTOOL --set-fec $NSIM_NETDEV encoding auto RS + check $? + + s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]') + check $? "$s" '"Auto" +"RS"' + s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]') + check $? "$s" '"RS"' +fi + +# Test error injection +echo 11 > $NSIM_DEV_DFS/ethtool/get_err + +$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1 +check $? '' '' 1 + +echo 0 > $NSIM_DEV_DFS/ethtool/get_err +echo 11 > $NSIM_DEV_DFS/ethtool/set_err + +$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1 +check $? + +$ETHTOOL --set-fec $NSIM_NETDEV encoding RS 2>/dev/null +check $? '' '' 1 + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh new file mode 100755 index 000000000000..b4a7abfe5454 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +# Bail if ethtool is too old +if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then + echo "SKIP: No --include-statistics support in ethtool" + exit 4 +fi + +NSIM_NETDEV=$(make_netdev) + +set -o pipefail + +echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx +echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx + +s=$(ethtool --json -a $NSIM_NETDEV | jq '.[].statistics') +check $? "$s" "null" + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics') +check $? "$s" "{}" + +echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length') +check $? "$s" "1" + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames') +check $? "$s" "2" + +echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length') +check $? "$s" "2" + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.rx_pause_frames') +check $? "$s" "1" +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames') +check $? "$s" "2" + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh new file mode 100755 index 000000000000..6800de816e8b --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh @@ -0,0 +1,402 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the FIB offload API. It makes use of netdevsim +# which registers a listener to the FIB notification chain. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ipv4_identical_routes + ipv4_tos + ipv4_metric + ipv4_replace + ipv4_delete + ipv4_plen + ipv4_replay + ipv4_flush + ipv4_error_path + ipv4_delete_fail + ipv6_add + ipv6_metric + ipv6_append_single + ipv6_replace_single + ipv6_metric_multipath + ipv6_append_multipath + ipv6_replace_multipath + ipv6_append_multipath_to_single + ipv6_delete_single + ipv6_delete_multipath + ipv6_replay_single + ipv6_replay_multipath + ipv6_error_path + ipv6_delete_fail +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/fib_offload_lib.sh + +DEVLINK_DEV= +source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} + +ipv4_identical_routes() +{ + fib_ipv4_identical_routes_test "testns1" +} + +ipv4_tos() +{ + fib_ipv4_tos_test "testns1" +} + +ipv4_metric() +{ + fib_ipv4_metric_test "testns1" +} + +ipv4_replace() +{ + fib_ipv4_replace_test "testns1" +} + +ipv4_delete() +{ + fib_ipv4_delete_test "testns1" +} + +ipv4_plen() +{ + fib_ipv4_plen_test "testns1" +} + +ipv4_replay_metric() +{ + fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay_tos() +{ + fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay_plen() +{ + fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay() +{ + ipv4_replay_metric + ipv4_replay_tos + ipv4_replay_plen +} + +ipv4_flush() +{ + fib_ipv4_flush_test "testns1" +} + +ipv4_error_path_add() +{ + local lsb + + RET=0 + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 192.0.2.${lsb}/32 dev dummy1 \ + &> /dev/null + done + + log_test "IPv4 error path - add" + + ip -n testns1 link del dev dummy1 +} + +ipv4_error_path_replay() +{ + local lsb + + RET=0 + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 100 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 192.0.2.${lsb}/32 dev dummy1 + done + + devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV &> /dev/null + + log_test "IPv4 error path - replay" + + ip -n testns1 link del dev dummy1 + + # Successfully reload after deleting all the routes. + devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 100 + devlink -N testns1 dev reload $DEVLINK_DEV +} + +ipv4_error_path() +{ + # Test the different error paths of the notifiers by limiting the size + # of the "IPv4/fib" resource. + ipv4_error_path_add + ipv4_error_path_replay +} + +ipv4_delete_fail() +{ + RET=0 + + echo "y" > $DEBUGFS_DIR/fib/fail_route_delete + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + ip -n testns1 route add 192.0.2.0/24 dev dummy1 + ip -n testns1 route del 192.0.2.0/24 dev dummy1 &> /dev/null + + # We should not be able to delete the netdev if we are leaking a + # reference. + ip -n testns1 link del dev dummy1 + + log_test "IPv4 route delete failure" + + echo "n" > $DEBUGFS_DIR/fib/fail_route_delete +} + +ipv6_add() +{ + fib_ipv6_add_test "testns1" +} + +ipv6_metric() +{ + fib_ipv6_metric_test "testns1" +} + +ipv6_append_single() +{ + fib_ipv6_append_single_test "testns1" +} + +ipv6_replace_single() +{ + fib_ipv6_replace_single_test "testns1" +} + +ipv6_metric_multipath() +{ + fib_ipv6_metric_multipath_test "testns1" +} + +ipv6_append_multipath() +{ + fib_ipv6_append_multipath_test "testns1" +} + +ipv6_replace_multipath() +{ + fib_ipv6_replace_multipath_test "testns1" +} + +ipv6_append_multipath_to_single() +{ + fib_ipv6_append_multipath_to_single_test "testns1" +} + +ipv6_delete_single() +{ + fib_ipv6_delete_single_test "testns1" +} + +ipv6_delete_multipath() +{ + fib_ipv6_delete_multipath_test "testns1" +} + +ipv6_replay_single() +{ + fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV" +} + +ipv6_replay_multipath() +{ + fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV" +} + +ipv6_error_path_add_single() +{ + local lsb + + RET=0 + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 2001:db8:1::${lsb}/128 dev dummy1 \ + &> /dev/null + done + + log_test "IPv6 error path - add single" + + ip -n testns1 link del dev dummy1 +} + +ipv6_error_path_add_multipath() +{ + local lsb + + RET=0 + + for i in $(seq 1 2); do + ip -n testns1 link add name dummy$i type dummy + ip -n testns1 link set dev dummy$i up + ip -n testns1 address add 2001:db8:$i::1/64 dev dummy$i + done + + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 2001:db8:10::${lsb}/128 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 &> /dev/null + done + + log_test "IPv6 error path - add multipath" + + for i in $(seq 1 2); do + ip -n testns1 link del dev dummy$i + done +} + +ipv6_error_path_replay() +{ + local lsb + + RET=0 + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 100 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 2001:db8:1::${lsb}/128 dev dummy1 + done + + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV &> /dev/null + + log_test "IPv6 error path - replay" + + ip -n testns1 link del dev dummy1 + + # Successfully reload after deleting all the routes. + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 100 + devlink -N testns1 dev reload $DEVLINK_DEV +} + +ipv6_error_path() +{ + # Test the different error paths of the notifiers by limiting the size + # of the "IPv6/fib" resource. + ipv6_error_path_add_single + ipv6_error_path_add_multipath + ipv6_error_path_replay +} + +ipv6_delete_fail() +{ + RET=0 + + echo "y" > $DEBUGFS_DIR/fib/fail_route_delete + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + ip -n testns1 route add 2001:db8:1::/64 dev dummy1 + ip -n testns1 route del 2001:db8:1::/64 dev dummy1 &> /dev/null + + # We should not be able to delete the netdev if we are leaking a + # reference. + ip -n testns1 link del dev dummy1 + + log_test "IPv6 route delete failure" + + echo "n" > $DEBUGFS_DIR/fib/fail_route_delete +} + +fib_notify_on_flag_change_set() +{ + local notify=$1; shift + + ip netns exec testns1 sysctl -qw net.ipv4.fib_notify_on_flag_change=$notify + ip netns exec testns1 sysctl -qw net.ipv6.fib_notify_on_flag_change=$notify + + log_info "Set fib_notify_on_flag_change to $notify" +} + +setup_prepare() +{ + local netdev + + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + ip netns add testns1 + if [ $? -ne 0 ]; then + echo "Failed to add netns \"testns1\"" + exit 1 + fi + + devlink dev reload $DEVLINK_DEV netns testns1 + if [ $? -ne 0 ]; then + echo "Failed to reload into netns \"testns1\"" + exit 1 + fi +} + +cleanup() +{ + pre_cleanup + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +fib_notify_on_flag_change_set 1 +tests_run + +fib_notify_on_flag_change_set 0 +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh new file mode 100755 index 000000000000..9896580c3d85 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh @@ -0,0 +1,430 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ipv4_route_addition_test + ipv4_route_deletion_test + ipv4_route_replacement_test + ipv4_route_offload_failed_test + ipv6_route_addition_test + ipv6_route_deletion_test + ipv6_route_replacement_test + ipv6_route_offload_failed_test +" + +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ +NUM_NETIFS=0 +source $lib_dir/lib.sh + +check_rt_offload_failed() +{ + local outfile=$1; shift + local line + + # Make sure that the first notification was emitted without + # RTM_F_OFFLOAD_FAILED flag and the second with RTM_F_OFFLOAD_FAILED + # flag + head -n 1 $outfile | grep -q "rt_offload_failed" + if [[ $? -eq 0 ]]; then + return 1 + fi + + head -n 2 $outfile | tail -n 1 | grep -q "rt_offload_failed" +} + +check_rt_trap() +{ + local outfile=$1; shift + local line + + # Make sure that the first notification was emitted without RTM_F_TRAP + # flag and the second with RTM_F_TRAP flag + head -n 1 $outfile | grep -q "rt_trap" + if [[ $? -eq 0 ]]; then + return 1 + fi + + head -n 2 $outfile | tail -n 1 | grep -q "rt_trap" +} + +route_notify_check() +{ + local outfile=$1; shift + local expected_num_lines=$1; shift + local offload_failed=${1:-0}; shift + + # check the monitor results + lines=`wc -l $outfile | cut "-d " -f1` + test $lines -eq $expected_num_lines + check_err $? "$expected_num_lines notifications were expected but $lines were received" + + if [[ $expected_num_lines -eq 1 ]]; then + return + fi + + if [[ $offload_failed -eq 0 ]]; then + check_rt_trap $outfile + check_err $? "Wrong RTM_F_TRAP flags in notifications" + else + check_rt_offload_failed $outfile + check_err $? "Wrong RTM_F_OFFLOAD_FAILED flags in notifications" + fi +} + +route_addition_check() +{ + local ip=$1; shift + local notify=$1; shift + local route=$1; shift + local expected_num_notifications=$1; shift + local offload_failed=${1:-0}; shift + + ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify + + local outfile=$(mktemp) + + $IP monitor route &> $outfile & + sleep 1 + $IP route add $route dev dummy1 + sleep 1 + kill_process %% + + route_notify_check $outfile $expected_num_notifications $offload_failed + rm -f $outfile + + $IP route del $route dev dummy1 +} + +ipv4_route_addition_test() +{ + RET=0 + + local ip="ipv4" + local route=192.0.2.0/24 + + # Make sure a single notification will be emitted for the programmed + # route. + local notify=0 + local expected_num_notifications=1 + # route_addition_check will assign value to RET. + route_addition_check $ip $notify $route $expected_num_notifications + + # Make sure two notifications will be emitted for the programmed route. + notify=1 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications + + # notify=2 means emit notifications only for failed route installation, + # make sure a single notification will be emitted for the programmed + # route. + notify=2 + expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications + + log_test "IPv4 route addition" +} + +route_deletion_check() +{ + local ip=$1; shift + local notify=$1; shift + local route=$1; shift + local expected_num_notifications=$1; shift + + ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify + $IP route add $route dev dummy1 + sleep 1 + + local outfile=$(mktemp) + + $IP monitor route &> $outfile & + sleep 1 + $IP route del $route dev dummy1 + sleep 1 + kill_process %% + + route_notify_check $outfile $expected_num_notifications + rm -f $outfile +} + +ipv4_route_deletion_test() +{ + RET=0 + + local ip="ipv4" + local route=192.0.2.0/24 + local expected_num_notifications=1 + + # Make sure a single notification will be emitted for the deleted route, + # regardless of fib_notify_on_flag_change value. + local notify=0 + # route_deletion_check will assign value to RET. + route_deletion_check $ip $notify $route $expected_num_notifications + + notify=1 + route_deletion_check $ip $notify $route $expected_num_notifications + + log_test "IPv4 route deletion" +} + +route_replacement_check() +{ + local ip=$1; shift + local notify=$1; shift + local route=$1; shift + local expected_num_notifications=$1; shift + + ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify + $IP route add $route dev dummy1 + sleep 1 + + local outfile=$(mktemp) + + $IP monitor route &> $outfile & + sleep 1 + $IP route replace $route dev dummy2 + sleep 1 + kill_process %% + + route_notify_check $outfile $expected_num_notifications + rm -f $outfile + + $IP route del $route dev dummy2 +} + +ipv4_route_replacement_test() +{ + RET=0 + + local ip="ipv4" + local route=192.0.2.0/24 + + $IP link add name dummy2 type dummy + $IP link set dev dummy2 up + + # Make sure a single notification will be emitted for the new route. + local notify=0 + local expected_num_notifications=1 + # route_replacement_check will assign value to RET. + route_replacement_check $ip $notify $route $expected_num_notifications + + # Make sure two notifications will be emitted for the new route. + notify=1 + expected_num_notifications=2 + route_replacement_check $ip $notify $route $expected_num_notifications + + # notify=2 means emit notifications only for failed route installation, + # make sure a single notification will be emitted for the new route. + notify=2 + expected_num_notifications=1 + route_replacement_check $ip $notify $route $expected_num_notifications + + $IP link del name dummy2 + + log_test "IPv4 route replacement" +} + +ipv4_route_offload_failed_test() +{ + + RET=0 + + local ip="ipv4" + local route=192.0.2.0/24 + local offload_failed=1 + + echo "y"> $DEBUGFS_DIR/fib/fail_route_offload + check_err $? "Failed to setup route offload to fail" + + # Make sure a single notification will be emitted for the programmed + # route. + local notify=0 + local expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + # Make sure two notifications will be emitted for the new route. + notify=1 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + # notify=2 means emit notifications only for failed route installation, + # make sure two notifications will be emitted for the new route. + notify=2 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + echo "n"> $DEBUGFS_DIR/fib/fail_route_offload + check_err $? "Failed to setup route offload not to fail" + + log_test "IPv4 route offload failed" +} + +ipv6_route_addition_test() +{ + RET=0 + + local ip="ipv6" + local route=2001:db8:1::/64 + + # Make sure a single notification will be emitted for the programmed + # route. + local notify=0 + local expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications + + # Make sure two notifications will be emitted for the programmed route. + notify=1 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications + + # notify=2 means emit notifications only for failed route installation, + # make sure a single notification will be emitted for the programmed + # route. + notify=2 + expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications + + log_test "IPv6 route addition" +} + +ipv6_route_deletion_test() +{ + RET=0 + + local ip="ipv6" + local route=2001:db8:1::/64 + local expected_num_notifications=1 + + # Make sure a single notification will be emitted for the deleted route, + # regardless of fib_notify_on_flag_change value. + local notify=0 + route_deletion_check $ip $notify $route $expected_num_notifications + + notify=1 + route_deletion_check $ip $notify $route $expected_num_notifications + + log_test "IPv6 route deletion" +} + +ipv6_route_replacement_test() +{ + RET=0 + + local ip="ipv6" + local route=2001:db8:1::/64 + + $IP link add name dummy2 type dummy + $IP link set dev dummy2 up + + # Make sure a single notification will be emitted for the new route. + local notify=0 + local expected_num_notifications=1 + route_replacement_check $ip $notify $route $expected_num_notifications + + # Make sure two notifications will be emitted for the new route. + notify=1 + expected_num_notifications=2 + route_replacement_check $ip $notify $route $expected_num_notifications + + # notify=2 means emit notifications only for failed route installation, + # make sure a single notification will be emitted for the new route. + notify=2 + expected_num_notifications=1 + route_replacement_check $ip $notify $route $expected_num_notifications + + $IP link del name dummy2 + + log_test "IPv6 route replacement" +} + +ipv6_route_offload_failed_test() +{ + + RET=0 + + local ip="ipv6" + local route=2001:db8:1::/64 + local offload_failed=1 + + echo "y"> $DEBUGFS_DIR/fib/fail_route_offload + check_err $? "Failed to setup route offload to fail" + + # Make sure a single notification will be emitted for the programmed + # route. + local notify=0 + local expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + # Make sure two notifications will be emitted for the new route. + notify=1 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + # notify=2 means emit notifications only for failed route installation, + # make sure two notifications will be emitted for the new route. + notify=2 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications \ + $offload_failed + + echo "n"> $DEBUGFS_DIR/fib/fail_route_offload + check_err $? "Failed to setup route offload not to fail" + + log_test "IPv6 route offload failed" +} + +setup_prepare() +{ + modprobe netdevsim &> /dev/null + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + ip netns add testns1 + + if [ $? -ne 0 ]; then + echo "Failed to add netns \"testns1\"" + exit 1 + fi + + devlink dev reload $DEVLINK_DEV netns testns1 + + if [ $? -ne 0 ]; then + echo "Failed to reload into netns \"testns1\"" + exit 1 + fi + + IP="ip -n testns1" + + $IP link add name dummy1 type dummy + $IP link set dev dummy1 up +} + +cleanup() +{ + pre_cleanup + + $IP link del name dummy1 + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh new file mode 100755 index 000000000000..cba5ac08426b --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh @@ -0,0 +1,421 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + l3_reporting_test + l3_fail_next_test + l3_counter_test + l3_rollback_test + l3_monitor_test +" + +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR_1=1337 +DEV_ADDR_2=1057 +DEV_ADDR_3=5417 +NUM_NETIFS=0 +source $lib_dir/lib.sh + +DUMMY_IFINDEX= + +DEV_ADDR() +{ + local n=$1; shift + local var=DEV_ADDR_$n + + echo ${!var} +} + +DEV() +{ + echo netdevsim$(DEV_ADDR $1) +} + +DEVLINK_DEV() +{ + echo netdevsim/$(DEV $1) +} + +SYSFS_NET_DIR() +{ + echo /sys/bus/netdevsim/devices/$(DEV $1)/net/ +} + +DEBUGFS_DIR() +{ + echo /sys/kernel/debug/netdevsim/$(DEV $1)/ +} + +nsim_add() +{ + local n=$1; shift + + echo "$(DEV_ADDR $n) 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $(SYSFS_NET_DIR $n) ] ; do :; done +} + +nsim_reload() +{ + local n=$1; shift + local ns=$1; shift + + devlink dev reload $(DEVLINK_DEV $n) netns $ns + + if [ $? -ne 0 ]; then + echo "Failed to reload $(DEV $n) into netns \"testns1\"" + exit 1 + fi + +} + +nsim_del() +{ + local n=$1; shift + + echo "$(DEV_ADDR $n)" > ${NETDEVSIM_PATH}/del_device +} + +nsim_hwstats_toggle() +{ + local action=$1; shift + local instance=$1; shift + local netdev=$1; shift + local type=$1; shift + + local ifindex=$($IP -j link show dev $netdev | jq '.[].ifindex') + + echo $ifindex > $(DEBUGFS_DIR $instance)/hwstats/$type/$action +} + +nsim_hwstats_enable() +{ + nsim_hwstats_toggle enable_ifindex "$@" +} + +nsim_hwstats_disable() +{ + nsim_hwstats_toggle disable_ifindex "$@" +} + +nsim_hwstats_fail_next_enable() +{ + nsim_hwstats_toggle fail_next_enable "$@" +} + +setup_prepare() +{ + modprobe netdevsim &> /dev/null + nsim_add 1 + nsim_add 2 + nsim_add 3 + + ip netns add testns1 + + if [ $? -ne 0 ]; then + echo "Failed to add netns \"testns1\"" + exit 1 + fi + + nsim_reload 1 testns1 + nsim_reload 2 testns1 + nsim_reload 3 testns1 + + IP="ip -n testns1" + + $IP link add name dummy1 type dummy + $IP link set dev dummy1 up + DUMMY_IFINDEX=$($IP -j link show dev dummy1 | jq '.[].ifindex') +} + +cleanup() +{ + pre_cleanup + + $IP link del name dummy1 + ip netns del testns1 + nsim_del 3 + nsim_del 2 + nsim_del 1 + modprobe -r netdevsim &> /dev/null +} + +netdev_hwstats_used() +{ + local netdev=$1; shift + local type=$1; shift + + $IP -j stats show dev "$netdev" group offload subgroup hw_stats_info | + jq '.[].info.l3_stats.used' +} + +netdev_check_used() +{ + local netdev=$1; shift + local type=$1; shift + + [[ $(netdev_hwstats_used $netdev $type) == "true" ]] +} + +netdev_check_unused() +{ + local netdev=$1; shift + local type=$1; shift + + [[ $(netdev_hwstats_used $netdev $type) == "false" ]] +} + +netdev_hwstats_request() +{ + local netdev=$1; shift + local type=$1; shift + + $IP -j stats show dev "$netdev" group offload subgroup hw_stats_info | + jq ".[].info.${type}_stats.request" +} + +netdev_check_requested() +{ + local netdev=$1; shift + local type=$1; shift + + [[ $(netdev_hwstats_request $netdev $type) == "true" ]] +} + +netdev_check_unrequested() +{ + local netdev=$1; shift + local type=$1; shift + + [[ $(netdev_hwstats_request $netdev $type) == "false" ]] +} + +reporting_test() +{ + local type=$1; shift + local instance=1 + + RET=0 + + [[ -n $(netdev_hwstats_used dummy1 $type) ]] + check_err $? "$type stats not reported" + + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used before either device or netdevsim request" + + nsim_hwstats_enable $instance dummy1 $type + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used before device request" + netdev_check_unrequested dummy1 $type + check_err $? "$type stats reported as requested before device request" + + $IP stats set dev dummy1 ${type}_stats on + netdev_check_used dummy1 $type + check_err $? "$type stats reported as not used after both device and netdevsim request" + netdev_check_requested dummy1 $type + check_err $? "$type stats reported as not requested after device request" + + nsim_hwstats_disable $instance dummy1 $type + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used after netdevsim request withdrawn" + + nsim_hwstats_enable $instance dummy1 $type + netdev_check_used dummy1 $type + check_err $? "$type stats reported as not used after netdevsim request reenabled" + + $IP stats set dev dummy1 ${type}_stats off + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used after device request withdrawn" + netdev_check_unrequested dummy1 $type + check_err $? "$type stats reported as requested after device request withdrawn" + + nsim_hwstats_disable $instance dummy1 $type + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used after both requests withdrawn" + + log_test "Reporting of $type stats usage" +} + +l3_reporting_test() +{ + reporting_test l3 +} + +__fail_next_test() +{ + local instance=$1; shift + local type=$1; shift + + RET=0 + + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used before either device or netdevsim request" + + nsim_hwstats_enable $instance dummy1 $type + nsim_hwstats_fail_next_enable $instance dummy1 $type + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used before device request" + netdev_check_unrequested dummy1 $type + check_err $? "$type stats reported as requested before device request" + + $IP stats set dev dummy1 ${type}_stats on 2>/dev/null + check_fail $? "$type stats request not bounced as it should have been" + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used after bounce" + netdev_check_unrequested dummy1 $type + check_err $? "$type stats reported as requested after bounce" + + $IP stats set dev dummy1 ${type}_stats on + check_err $? "$type stats request failed when it shouldn't have" + netdev_check_used dummy1 $type + check_err $? "$type stats reported as not used after both device and netdevsim request" + netdev_check_requested dummy1 $type + check_err $? "$type stats reported as not requested after device request" + + $IP stats set dev dummy1 ${type}_stats off + nsim_hwstats_disable $instance dummy1 $type + + log_test "Injected failure of $type stats enablement (netdevsim #$instance)" +} + +fail_next_test() +{ + __fail_next_test 1 "$@" + __fail_next_test 2 "$@" + __fail_next_test 3 "$@" +} + +l3_fail_next_test() +{ + fail_next_test l3 +} + +get_hwstat() +{ + local netdev=$1; shift + local type=$1; shift + local selector=$1; shift + + $IP -j stats show dev $netdev group offload subgroup ${type}_stats | + jq ".[0].stats64.${selector}" +} + +counter_test() +{ + local type=$1; shift + local instance=1 + + RET=0 + + nsim_hwstats_enable $instance dummy1 $type + $IP stats set dev dummy1 ${type}_stats on + netdev_check_used dummy1 $type + check_err $? "$type stats reported as not used after both device and netdevsim request" + + # Netdevsim counts 10pps on ingress. We should see maybe a couple + # packets, unless things take a reeealy long time. + local pkts=$(get_hwstat dummy1 l3 rx.packets) + ((pkts < 10)) + check_err $? "$type stats show >= 10 packets after first enablement" + + sleep 2.5 + + local pkts=$(get_hwstat dummy1 l3 rx.packets) + ((pkts >= 20)) + check_err $? "$type stats show < 20 packets after 2.5s passed" + + $IP stats set dev dummy1 ${type}_stats off + + sleep 2 + + $IP stats set dev dummy1 ${type}_stats on + local pkts=$(get_hwstat dummy1 l3 rx.packets) + ((pkts < 10)) + check_err $? "$type stats show >= 10 packets after second enablement" + + $IP stats set dev dummy1 ${type}_stats off + nsim_hwstats_fail_next_enable $instance dummy1 $type + $IP stats set dev dummy1 ${type}_stats on 2>/dev/null + check_fail $? "$type stats request not bounced as it should have been" + + sleep 2 + + $IP stats set dev dummy1 ${type}_stats on + local pkts=$(get_hwstat dummy1 l3 rx.packets) + ((pkts < 10)) + check_err $? "$type stats show >= 10 packets after post-fail enablement" + + $IP stats set dev dummy1 ${type}_stats off + + log_test "Counter values in $type stats" +} + +l3_counter_test() +{ + counter_test l3 +} + +rollback_test() +{ + local type=$1; shift + + RET=0 + + nsim_hwstats_enable 1 dummy1 l3 + nsim_hwstats_enable 2 dummy1 l3 + nsim_hwstats_enable 3 dummy1 l3 + + # The three netdevsim instances are registered in order of their number + # one after another. It is reasonable to expect that whatever + # notifications take place hit no. 2 in between hitting nos. 1 and 3, + # whatever the actual order. This allows us to test that a fail caused + # by no. 2 does not leave the system in a partial state, and rolls + # everything back. + + nsim_hwstats_fail_next_enable 2 dummy1 l3 + $IP stats set dev dummy1 ${type}_stats on 2>/dev/null + check_fail $? "$type stats request not bounced as it should have been" + + netdev_check_unused dummy1 $type + check_err $? "$type stats reported as used after bounce" + netdev_check_unrequested dummy1 $type + check_err $? "$type stats reported as requested after bounce" + + sleep 2 + + $IP stats set dev dummy1 ${type}_stats on + check_err $? "$type stats request not upheld as it should have been" + + local pkts=$(get_hwstat dummy1 l3 rx.packets) + ((pkts < 10)) + check_err $? "$type stats show $pkts packets after post-fail enablement" + + $IP stats set dev dummy1 ${type}_stats off + + nsim_hwstats_disable 3 dummy1 l3 + nsim_hwstats_disable 2 dummy1 l3 + nsim_hwstats_disable 1 dummy1 l3 + + log_test "Failure in $type stats enablement rolled back" +} + +l3_rollback_test() +{ + rollback_test l3 +} + +l3_monitor_test() +{ + hw_stats_monitor_test dummy1 l3 \ + "nsim_hwstats_enable 1 dummy1 l3" \ + "nsim_hwstats_disable 1 dummy1 l3" \ + "$IP" +} + +trap cleanup EXIT + +setup_prepare +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh b/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh new file mode 100755 index 000000000000..98033e6667d2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +NSIM_NETDEV=$(make_netdev) +MACSEC_NETDEV=macsec_nsim + +set -o pipefail + +if ! ethtool -k $NSIM_NETDEV | grep -q 'macsec-hw-offload: on'; then + echo "SKIP: netdevsim doesn't support MACsec offload" + exit 4 +fi + +if ! ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac 2>/dev/null; then + echo "SKIP: couldn't create macsec device" + exit 4 +fi +ip link del $MACSEC_NETDEV + +# +# test macsec offload API +# + +ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}" type macsec port 4 offload mac +check $? + +ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}2" type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac +check $? + +ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}3" type macsec sci abbacdde01020304 offload mac +check $? + +ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}4" type macsec port 8 offload mac 2> /dev/null +check $? '' '' 1 + +ip macsec add "${MACSEC_NETDEV}" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 +check $? + +ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef" +check $? + +ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ + key 00 0123456789abcdef0123456789abcdef +check $? + +ip macsec add "${MACSEC_NETDEV}" rx port 1235 address "1c:ed:de:ad:be:ef" 2> /dev/null +check $? '' '' 1 + +# can't disable macsec offload when SAs are configured +ip link set "${MACSEC_NETDEV}" type macsec offload off 2> /dev/null +check $? '' '' 1 + +ip macsec offload "${MACSEC_NETDEV}" off 2> /dev/null +check $? '' '' 1 + +# toggle macsec offload via rtnetlink +ip link set "${MACSEC_NETDEV}2" type macsec offload off +check $? + +ip link set "${MACSEC_NETDEV}2" type macsec offload mac +check $? + +# toggle macsec offload via genetlink +ip macsec offload "${MACSEC_NETDEV}2" off +check $? + +ip macsec offload "${MACSEC_NETDEV}2" mac +check $? + +for dev in ${MACSEC_NETDEV}{,2,3} ; do + ip link del $dev + check $? +done + + +# +# test ethtool features when toggling offload +# + +ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac +TMP_FEATS_ON_1="$(ethtool -k $MACSEC_NETDEV)" + +ip link set $MACSEC_NETDEV type macsec offload off +TMP_FEATS_OFF_1="$(ethtool -k $MACSEC_NETDEV)" + +ip link set $MACSEC_NETDEV type macsec offload mac +TMP_FEATS_ON_2="$(ethtool -k $MACSEC_NETDEV)" + +[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_2" ] +check $? + +ip link del $MACSEC_NETDEV + +ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec +check $? + +TMP_FEATS_OFF_2="$(ethtool -k $MACSEC_NETDEV)" +[ "$TMP_FEATS_OFF_1" = "$TMP_FEATS_OFF_2" ] +check $? + +ip link set $MACSEC_NETDEV type macsec offload mac +check $? + +TMP_FEATS_ON_3="$(ethtool -k $MACSEC_NETDEV)" +[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_3" ] +check $? + + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh new file mode 100755 index 000000000000..01d0c044a5fc --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -0,0 +1,1058 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the nexthop offload API. It makes use of netdevsim +# which registers a listener to the nexthop notification chain. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + nexthop_single_add_test + nexthop_single_add_err_test + nexthop_group_add_test + nexthop_group_add_err_test + nexthop_res_group_add_test + nexthop_res_group_add_err_test + nexthop_group_replace_test + nexthop_group_replace_err_test + nexthop_res_group_replace_test + nexthop_res_group_replace_err_test + nexthop_res_group_idle_timer_test + nexthop_res_group_idle_timer_del_test + nexthop_res_group_increase_idle_timer_test + nexthop_res_group_decrease_idle_timer_test + nexthop_res_group_unbalanced_timer_test + nexthop_res_group_unbalanced_timer_del_test + nexthop_res_group_no_unbalanced_timer_test + nexthop_res_group_short_unbalanced_timer_test + nexthop_res_group_increase_unbalanced_timer_test + nexthop_res_group_decrease_unbalanced_timer_test + nexthop_res_group_force_migrate_busy_test + nexthop_single_replace_test + nexthop_single_replace_err_test + nexthop_single_in_group_replace_test + nexthop_single_in_group_replace_err_test + nexthop_single_in_res_group_replace_test + nexthop_single_in_res_group_replace_err_test + nexthop_single_in_group_delete_test + nexthop_single_in_group_delete_err_test + nexthop_single_in_res_group_delete_test + nexthop_single_in_res_group_delete_err_test + nexthop_replay_test + nexthop_replay_err_test +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/ +NUM_NETIFS=0 +source $lib_dir/lib.sh + +DEVLINK_DEV= +source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} + +nexthop_check() +{ + local nharg="$1"; shift + local expected="$1"; shift + + out=$($IP nexthop show ${nharg} | sed -e 's/ *$//') + if [[ "$out" != "$expected" ]]; then + return 1 + fi + + return 0 +} + +nexthop_bucket_nhid_count_check() +{ + local group_id=$1; shift + local expected + local count + local nhid + local ret + + while (($# > 0)); do + nhid=$1; shift + expected=$1; shift + + count=$($IP nexthop bucket show id $group_id nhid $nhid | + grep "trap" | wc -l) + if ((expected != count)); then + return 1 + fi + done + + return 0 +} + +nexthop_resource_check() +{ + local expected_occ=$1; shift + + occ=$($DEVLINK -jp resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="nexthops") | .["occ"]') + + if [ $expected_occ -ne $occ ]; then + return 1 + fi + + return 0 +} + +nexthop_resource_set() +{ + local size=$1; shift + + $DEVLINK resource set $DEVLINK_DEV path nexthops size $size + $DEVLINK dev reload $DEVLINK_DEV +} + +nexthop_single_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 1 + nexthop_resource_check 0 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Single nexthop add and delete" +} + +nexthop_single_add_err_test() +{ + RET=0 + + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop addition succeeded when should fail" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + $IP nexthop add id 10 group 1,20/2,39 + nexthop_check "id 10" "id 10 group 1,20/2,39 trap" + check_err $? "Unexpected weighted nexthop group entry" + + nexthop_resource_check 61 + check_err $? "Wrong weighted nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Nexthop group add and delete" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_add_err_test() +{ + RET=0 + + nexthop_resource_set 2 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 &> /dev/null + check_fail $? "Nexthop group addition succeeded when should fail" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_res_group_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 type resilient buckets 4 + nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_bucket_nhid_count_check 10 1 2 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 2 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + $IP nexthop add id 10 group 1,3/2,2 type resilient buckets 5 + nexthop_check "id 10" "id 10 group 1,3/2,2 type resilient buckets 5 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected weighted nexthop group entry" + + nexthop_bucket_nhid_count_check 10 1 3 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 2 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 7 + check_err $? "Wrong weighted nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Resilient nexthop group add and delete" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_add_err_test() +{ + RET=0 + + nexthop_resource_set 2 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 type resilient buckets 4 &> /dev/null + check_fail $? "Nexthop group addition succeeded when should fail" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Resilient nexthop group add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 + nexthop_check "id 10" "id 10 group 1/2/3 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 &> /dev/null + check_fail $? "Nexthop group replacement succeeded when should fail" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Nexthop group replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_res_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 6 + + $IP nexthop replace id 10 group 1/2/3 type resilient + nexthop_check "id 10" "id 10 group 1/2/3 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_bucket_nhid_count_check 10 1 2 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 2 2 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 3 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 9 + check_err $? "Wrong nexthop occupancy" + + log_test "Resilient nexthop group replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_replace_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 6 + + ip netns exec testns1 \ + echo 1 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace + $IP nexthop replace id 10 group 1/2/3 type resilient &> /dev/null + check_fail $? "Nexthop group replacement succeeded when should fail" + + nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_bucket_nhid_count_check 10 1 3 + check_err $? "Wrong nexthop buckets count" + nexthop_bucket_nhid_count_check 10 2 3 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 9 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Resilient nexthop group replace failure" + + $IP nexthop flush &> /dev/null + ip netns exec testns1 \ + echo 0 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace +} + +nexthop_res_mark_buckets_busy() +{ + local group_id=$1; shift + local nhid=$1; shift + local count=$1; shift + local index + + for index in $($IP -j nexthop bucket show id $group_id nhid $nhid | + jq '.[].bucket.index' | head -n ${count:--0}) + do + echo $group_id $index \ + > $DEBUGFS_NET_DIR/fib/nexthop_bucket_activity + done +} + +nexthop_res_num_nhid_buckets() +{ + local group_id=$1; shift + local nhid=$1; shift + + $IP -j nexthop bucket show id $group_id nhid $nhid | jq length +} + +nexthop_res_group_idle_timer_test() +{ + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient buckets 8 idle_timer 4 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "Group expected to be unbalanced" + + sleep 6 + + nexthop_bucket_nhid_count_check 10 1 2 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after idle timer" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_idle_timer_del_test() +{ + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1,50/2,50/3,1 \ + type resilient buckets 8 idle_timer 6 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient + + nexthop_bucket_nhid_count_check 10 1 4 2 4 3 0 + check_err $? "Group expected to be unbalanced" + + sleep 4 + + # Deletion prompts group replacement. Check that the bucket timers + # are kept. + $IP nexthop delete id 3 + + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "Group expected to still be unbalanced" + + sleep 4 + + nexthop_bucket_nhid_count_check 10 1 2 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after idle timer (with delete)" + + $IP nexthop flush &> /dev/null +} + +__nexthop_res_group_increase_timer_test() +{ + local timer=$1; shift + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 4 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + sleep 2 + $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8 + sleep 4 + + # 6 seconds, past the original timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group still expected to be unbalanced" + + sleep 4 + + # 10 seconds, past the new timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after $timer increase" + + $IP nexthop flush &> /dev/null +} + +__nexthop_res_group_decrease_timer_test() +{ + local timer=$1; shift + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 8 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + sleep 2 + $IP nexthop replace id 10 group 1/2,3 type resilient $timer 4 + sleep 4 + + # 6 seconds, past the new timer, before the old timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after $timer decrease" + + $IP nexthop flush &> /dev/null +} + +__nexthop_res_group_increase_timer_del_test() +{ + local timer=$1; shift + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1,100/2,100/3,1 \ + type resilient buckets 8 $timer 4 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1,100/2,300/3,1 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + sleep 2 + $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8 + sleep 4 + + # 6 seconds, past the original timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group still expected to be unbalanced" + + sleep 4 + + # 10 seconds, past the new timer. + nexthop_bucket_nhid_count_check 10 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after $timer increase" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_increase_idle_timer_test() +{ + __nexthop_res_group_increase_timer_test idle_timer +} + +nexthop_res_group_decrease_idle_timer_test() +{ + __nexthop_res_group_decrease_timer_test idle_timer +} + +nexthop_res_group_unbalanced_timer_test() +{ + local i + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient \ + buckets 8 idle_timer 6 unbalanced_timer 10 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + for i in 1 2; do + sleep 4 + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "$i: Group expected to be unbalanced" + nexthop_res_mark_buckets_busy 10 1 + done + + # 3 x sleep 4 > unbalanced timer 10 + sleep 4 + nexthop_bucket_nhid_count_check 10 1 2 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after unbalanced timer" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_unbalanced_timer_del_test() +{ + local i + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1,50/2,50/3,1 type resilient \ + buckets 8 idle_timer 6 unbalanced_timer 10 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient + + # Check that NH delete does not reset unbalanced time. + sleep 4 + $IP nexthop delete id 3 + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "1: Group expected to be unbalanced" + nexthop_res_mark_buckets_busy 10 1 + + sleep 4 + nexthop_bucket_nhid_count_check 10 1 4 2 4 + check_err $? "2: Group expected to be unbalanced" + nexthop_res_mark_buckets_busy 10 1 + + # 3 x sleep 4 > unbalanced timer 10 + sleep 4 + nexthop_bucket_nhid_count_check 10 1 2 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after unbalanced timer (with delete)" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_no_unbalanced_timer_test() +{ + local i + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient buckets 8 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + for i in $(seq 3); do + sleep 60 + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "$i: Group expected to be unbalanced" + nexthop_res_mark_buckets_busy 10 1 + done + + log_test "Buckets never force-migrated without unbalanced timer" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_short_unbalanced_timer_test() +{ + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient \ + buckets 8 idle_timer 120 unbalanced_timer 4 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + sleep 5 + + nexthop_bucket_nhid_count_check 10 2 6 + check_err $? "Group expected to be balanced" + + log_test "Bucket migration after unbalanced < idle timer" + + $IP nexthop flush &> /dev/null +} + +nexthop_res_group_increase_unbalanced_timer_test() +{ + __nexthop_res_group_increase_timer_test unbalanced_timer +} + +nexthop_res_group_decrease_unbalanced_timer_test() +{ + __nexthop_res_group_decrease_timer_test unbalanced_timer +} + +nexthop_res_group_force_migrate_busy_test() +{ + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + RET=0 + + $IP nexthop add id 10 group 1/2 type resilient \ + buckets 8 idle_timer 120 + nexthop_res_mark_buckets_busy 10 1 + $IP nexthop replace id 10 group 1/2,3 type resilient + + nexthop_bucket_nhid_count_check 10 2 6 + check_fail $? "Group expected to be unbalanced" + + $IP nexthop replace id 10 group 2 type resilient + nexthop_bucket_nhid_count_check 10 2 8 + check_err $? "All buckets expected to have migrated" + + log_test "Busy buckets force-migrated when NH removed" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_replace_err_test() +{ + RET=0 + + # This is supposed to cause the replace to fail because the new nexthop + # is programmed before deleting the replaced one. + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop replace succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Single nexthop replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 + check_err $? "Failed to replace nexthop when should not" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null + check_fail $? "Nexthop replacement succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_res_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 4 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 + check_err $? "Failed to replace nexthop when should not" + + nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_bucket_nhid_count_check 10 1 2 2 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in resilient group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_res_group_replace_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 4 + + ip netns exec testns1 \ + echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null + check_fail $? "Nexthop replacement succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_bucket_nhid_count_check 10 1 2 2 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in resilient group failure" + + $IP nexthop flush &> /dev/null + ip netns exec testns1 \ + echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace +} + +nexthop_single_in_group_delete_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop del id 1 + nexthop_check "id 10" "id 10 group 2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_delete_err_test() +{ + RET=0 + + # First, nexthop 1 will be deleted, which will reduce the occupancy to + # 5. Afterwards, a replace notification will be sent for nexthop group + # 10 with only two nexthops. Since the new group is allocated before + # the old is deleted, the replacement will fail as it will result in an + # occupancy of 7. + nexthop_resource_set 6 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2/3 + + $IP nexthop del id 1 + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_res_group_delete_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 type resilient buckets 4 + + $IP nexthop del id 1 + nexthop_check "id 10" "id 10 group 2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_bucket_nhid_count_check 10 2 4 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in resilient group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_res_group_delete_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2/3 type resilient buckets 6 + + ip netns exec testns1 \ + echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace + $IP nexthop del id 1 + + # We failed to replace the two nexthop buckets that were originally + # assigned to nhid 1. + nexthop_bucket_nhid_count_check 10 2 2 3 2 + check_err $? "Wrong nexthop buckets count" + + nexthop_resource_check 8 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in resilient group failure" + + $IP nexthop flush &> /dev/null + ip netns exec testns1 \ + echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace +} + +nexthop_replay_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after reload" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop replay" + + $IP nexthop flush &> /dev/null +} + +nexthop_replay_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + # Reduce size of nexthop resource so that reload will fail. + $DEVLINK resource set $DEVLINK_DEV path nexthops size 3 + $DEVLINK dev reload $DEVLINK_DEV &> /dev/null + check_fail $? "Reload succeeded when should fail" + + $DEVLINK resource set $DEVLINK_DEV path nexthops size 9999 + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + log_test "Nexthop replay failure" + + $IP nexthop flush &> /dev/null +} + +setup_prepare() +{ + local netdev + + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + set -e + + ip netns add testns1 + devlink dev reload $DEVLINK_DEV netns testns1 + + IP="ip -netns testns1" + DEVLINK="devlink -N testns1" + + $IP link add name dummy1 up type dummy + $IP address add 192.0.2.1/24 dev dummy1 + + set +e +} + +cleanup() +{ + pre_cleanup + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +xfail_on_slow tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh new file mode 100755 index 000000000000..7f32b5600925 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh @@ -0,0 +1,144 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +lib_dir=$(dirname $0)/../../../net +source $lib_dir/lib.sh + +NSIM_DEV_1_ID=$((256 + RANDOM % 256)) +NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID +NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +socat_check() +{ + if [ ! -x "$(command -v socat)" ]; then + echo "socat command not found. Skipping test" + return 1 + fi + + return 0 +} + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_DEV_1_SYS/net -exec basename {} \;) + NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_DEV_2_SYS/net -exec basename {} \;) + + ip link set $NSIM_DEV_1_NAME netns nssv + ip link set $NSIM_DEV_2_NAME netns nscl + + ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME + ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME + + ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up + ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +### +### Code start +### + +socat_check || exit 4 + +modprobe netdevsim + +# linking + +echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_DEV_1_FD=$((256 + RANDOM % 256)) +exec {NSIM_DEV_1_FD}</var/run/netns/nssv +NSIM_DEV_1_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_DEV_1_NAME/ifindex) + +NSIM_DEV_2_FD=$((256 + RANDOM % 256)) +exec {NSIM_DEV_2_FD}</var/run/netns/nscl +NSIM_DEV_2_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_DEV_2_NAME/ifindex) + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:2000" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "linking with non-existent netdevsim should fail" + cleanup_ns + exit 1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX 2000:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "linking with non-existent netnsid should fail" + cleanup_ns + exit 1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "linking with self should fail" + cleanup_ns + exit 1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +# argument error checking + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:a" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "invalid arg should fail" + cleanup_ns + exit 1 +fi + +# send/recv packets + +tmp_file=$(mktemp) +ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file & +pid=$! +res=0 + +wait_local_port_listen nssv 1234 tcp + +echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234 + +count=$(cat $tmp_file | wc -c) +if [[ $count -ne 3 ]]; then + echo "expected 3 bytes, got $count" + res=1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL + +kill $pid +echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit $res diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh new file mode 100755 index 000000000000..e689ff7a0b12 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the psample module. It makes use of netdevsim +# which periodically generates "sampled" packets. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + psample_enable_test + psample_group_num_test + psample_md_test +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/ +CAPTURE_FILE=$(mktemp) +NUM_NETIFS=0 +source $lib_dir/lib.sh + +DEVLINK_DEV= +source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} + +# Available at https://github.com/Mellanox/libpsample +require_command psample + +psample_capture() +{ + rm -f $CAPTURE_FILE + + timeout 2 ip netns exec testns1 psample &> $CAPTURE_FILE +} + +psample_enable_test() +{ + RET=0 + + echo 1 > $PSAMPLE_DIR/enable + check_err $? "Failed to enable sampling when should not" + + echo 1 > $PSAMPLE_DIR/enable 2>/dev/null + check_fail $? "Sampling enablement succeeded when should fail" + + psample_capture + if [ $(cat $CAPTURE_FILE | wc -l) -eq 0 ]; then + check_err 1 "Failed to capture sampled packets" + fi + + echo 0 > $PSAMPLE_DIR/enable + check_err $? "Failed to disable sampling when should not" + + echo 0 > $PSAMPLE_DIR/enable 2>/dev/null + check_fail $? "Sampling disablement succeeded when should fail" + + psample_capture + if [ $(cat $CAPTURE_FILE | wc -l) -ne 0 ]; then + check_err 1 "Captured sampled packets when should not" + fi + + log_test "psample enable / disable" +} + +psample_group_num_test() +{ + RET=0 + + echo 1234 > $PSAMPLE_DIR/group_num + echo 1 > $PSAMPLE_DIR/enable + + psample_capture + grep -q -e "group 1234" $CAPTURE_FILE + check_err $? "Sampled packets reported with wrong group number" + + # New group number should only be used after disable / enable. + echo 4321 > $PSAMPLE_DIR/group_num + + psample_capture + grep -q -e "group 4321" $CAPTURE_FILE + check_fail $? "Group number changed while sampling is active" + + echo 0 > $PSAMPLE_DIR/enable && echo 1 > $PSAMPLE_DIR/enable + + psample_capture + grep -q -e "group 4321" $CAPTURE_FILE + check_err $? "Group number did not change after restarting sampling" + + log_test "psample group number" + + echo 0 > $PSAMPLE_DIR/enable +} + +psample_md_test() +{ + RET=0 + + echo 1 > $PSAMPLE_DIR/enable + + echo 1234 > $PSAMPLE_DIR/in_ifindex + echo 4321 > $PSAMPLE_DIR/out_ifindex + psample_capture + + grep -q -e "in-ifindex 1234" $CAPTURE_FILE + check_err $? "Sampled packets reported with wrong in-ifindex" + + grep -q -e "out-ifindex 4321" $CAPTURE_FILE + check_err $? "Sampled packets reported with wrong out-ifindex" + + echo 5 > $PSAMPLE_DIR/out_tc + psample_capture + + grep -q -e "out-tc 5" $CAPTURE_FILE + check_err $? "Sampled packets reported with wrong out-tc" + + echo $((2**16 - 1)) > $PSAMPLE_DIR/out_tc + psample_capture + + grep -q -e "out-tc " $CAPTURE_FILE + check_fail $? "Sampled packets reported with out-tc when should not" + + echo 1 > $PSAMPLE_DIR/out_tc + echo 10000 > $PSAMPLE_DIR/out_tc_occ_max + psample_capture + + grep -q -e "out-tc-occ " $CAPTURE_FILE + check_err $? "Sampled packets not reported with out-tc-occ when should" + + echo 0 > $PSAMPLE_DIR/out_tc_occ_max + psample_capture + + grep -q -e "out-tc-occ " $CAPTURE_FILE + check_fail $? "Sampled packets reported with out-tc-occ when should not" + + echo 10000 > $PSAMPLE_DIR/latency_max + psample_capture + + grep -q -e "latency " $CAPTURE_FILE + check_err $? "Sampled packets not reported with latency when should" + + echo 0 > $PSAMPLE_DIR/latency_max + psample_capture + + grep -q -e "latency " $CAPTURE_FILE + check_fail $? "Sampled packets reported with latency when should not" + + log_test "psample metadata" + + echo 0 > $PSAMPLE_DIR/enable +} + +setup_prepare() +{ + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + set -e + + ip netns add testns1 + devlink dev reload $DEVLINK_DEV netns testns1 + + set +e +} + +cleanup() +{ + pre_cleanup + rm -f $CAPTURE_FILE + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/settings b/tools/testing/selftests/drivers/net/netdevsim/settings new file mode 100644 index 000000000000..a62d2fa1275c --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/settings @@ -0,0 +1 @@ +timeout=600 diff --git a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh new file mode 100755 index 000000000000..b411fe66510f --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +set -o pipefail + +n_children() { + n=$(tc qdisc show dev $NDEV | grep '^qdisc' | wc -l) + echo $((n - 1)) +} + +tcq() { + tc qdisc $1 dev $NDEV ${@:2} +} + +n_child_assert() { + n=$(n_children) + if [ $n -ne $1 ]; then + echo "ERROR ($root): ${@:2}, expected $1 have $n" + ((num_errors++)) + else + ((num_passes++)) + fi +} + + +for root in mq mqprio; do + NDEV=$(make_netdev 1 4) + + opts= + [ $root == "mqprio" ] && opts='hw 0 num_tc 1 map 0 0 0 0 queues 1@0' + + tcq add root handle 100: $root $opts + n_child_assert 4 'Init' + + # All defaults + + for n in 3 2 1 2 3 4 1 4; do + ethtool -L $NDEV combined $n + n_child_assert $n "Change queues to $n while down" + done + + ip link set dev $NDEV up + + for n in 3 2 1 2 3 4 1 4; do + ethtool -L $NDEV combined $n + n_child_assert $n "Change queues to $n while up" + done + + # One real one + tcq replace parent 100:4 handle 204: pfifo_fast + n_child_assert 4 "One real queue" + + ethtool -L $NDEV combined 1 + n_child_assert 2 "One real queue, one default" + + ethtool -L $NDEV combined 4 + n_child_assert 4 "One real queue, rest default" + + # Remove real one + tcq del parent 100:4 handle 204: + + # Replace default with pfifo + tcq replace parent 100:1 handle 205: pfifo limit 1000 + n_child_assert 3 "Deleting real one, replacing default one with pfifo" + + ethtool -L $NDEV combined 1 + n_child_assert 1 "Grafted, one" + + cleanup_nsim +done + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh new file mode 100755 index 000000000000..4c859ecdad94 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -0,0 +1,942 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +VNI_GEN=$RANDOM +NSIM_ID=$((RANDOM % 1024)) +NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID +NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID +NSIM_NETDEV= +HAS_ETHTOOL= +STATIC_ENTRIES= +EXIT_STATUS=0 +num_cases=0 +num_errors=0 + +clean_up_devs=( ) + +function err_cnt { + echo "ERROR:" $@ + EXIT_STATUS=1 + ((num_errors++)) + ((num_cases++)) +} + +function pass_cnt { + ((num_cases++)) +} + +function cleanup_tuns { + for dev in "${clean_up_devs[@]}"; do + [ -e /sys/class/net/$dev ] && ip link del dev $dev + done + clean_up_devs=( ) +} + +function cleanup_nsim { + if [ -e $NSIM_DEV_SYS ]; then + echo $NSIM_ID > /sys/bus/netdevsim/del_device + fi +} + +function cleanup { + cleanup_tuns + cleanup_nsim +} + +trap cleanup EXIT + +function new_vxlan { + local dev=$1 + local dstport=$2 + local lower=$3 + local ipver=$4 + local flags=$5 + + local group ipfl + + [ "$ipver" != '6' ] && group=239.1.1.1 || group=fff1::1 + [ "$ipver" != '6' ] || ipfl="-6" + + [[ ! "$flags" =~ "external" ]] && flags="$flags id $((VNI_GEN++))" + + ip $ipfl link add $dev type vxlan \ + group $group \ + dev $lower \ + dstport $dstport \ + $flags + + ip link set dev $dev up + + clean_up_devs=("${clean_up_devs[@]}" $dev) + + check_tables +} + +function new_geneve { + local dev=$1 + local dstport=$2 + local ipver=$3 + local flags=$4 + + local group ipfl + + [ "$ipver" != '6' ] && remote=1.1.1.2 || group=::2 + [ "$ipver" != '6' ] || ipfl="-6" + + [[ ! "$flags" =~ "external" ]] && flags="$flags vni $((VNI_GEN++))" + + ip $ipfl link add $dev type geneve \ + remote $remote \ + dstport $dstport \ + $flags + + ip link set dev $dev up + + clean_up_devs=("${clean_up_devs[@]}" $dev) + + check_tables +} + +function del_dev { + local dev=$1 + + ip link del dev $dev + check_tables +} + +# Helpers for netdevsim port/type encoding +function mke { + local port=$1 + local type=$2 + + echo $((port << 16 | type)) +} + +function pre { + local val=$1 + + echo -e "port: $((val >> 16))\ttype: $((val & 0xffff))" +} + +function pre_ethtool { + local val=$1 + local port=$((val >> 16)) + local type=$((val & 0xffff)) + + case $type in + 1) + type_name="vxlan" + ;; + 2) + type_name="geneve" + ;; + 4) + type_name="vxlan-gpe" + ;; + *) + type_name="bit X" + ;; + esac + + echo "port $port, $type_name" +} + +function check_table { + local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1 + local -n expected=$2 + local last=$3 + + read -a have < $path + + if [ ${#expected[@]} -ne ${#have[@]} ]; then + echo "check_table: BAD NUMBER OF ITEMS" + return 0 + fi + + for i in "${!expected[@]}"; do + if [ -n "$HAS_ETHTOOL" -a ${expected[i]} -ne 0 ]; then + pp_expected=`pre_ethtool ${expected[i]}` + ethtool --show-tunnels $NSIM_NETDEV | grep "$pp_expected" >/dev/null + if [ $? -ne 0 -a $last -ne 0 ]; then + err_cnt "ethtool table $1 on port $port: $pfx - $msg" + echo " check_table: ethtool does not contain '$pp_expected'" + ethtool --show-tunnels $NSIM_NETDEV + return 0 + + fi + fi + + if [ ${expected[i]} != ${have[i]} ]; then + if [ $last -ne 0 ]; then + err_cnt "table $1 on port $port: $pfx - $msg" + echo " check_table: wrong entry $i" + echo " expected: `pre ${expected[i]}`" + echo " have: `pre ${have[i]}`" + return 0 + fi + return 1 + fi + done + + pass_cnt + return 0 +} + +function check_tables { + # Need retries in case we have workqueue making the changes + local retries=10 + + while ! check_table 0 exp0 $((retries == 0)); do + sleep 0.02 + ((retries--)) + done + while ! check_table 1 exp1 $((retries == 0)); do + sleep 0.02 + ((retries--)) + done + + if [ -n "$HAS_ETHTOOL" -a -n "${STATIC_ENTRIES[0]}" ]; then + fail=0 + for i in "${!STATIC_ENTRIES[@]}"; do + pp_expected=`pre_ethtool ${STATIC_ENTRIES[i]}` + cnt=$(ethtool --show-tunnels $NSIM_NETDEV | grep -c "$pp_expected") + if [ $cnt -ne 1 ]; then + err_cnt "ethtool static entry: $pfx - $msg" + echo " check_table: ethtool does not contain '$pp_expected'" + ethtool --show-tunnels $NSIM_NETDEV + fail=1 + fi + done + [ $fail == 0 ] && pass_cnt + fi +} + +function print_table { + local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1 + read -a have < $path + + tree $NSIM_DEV_DFS/ + + echo "Port $port table $1:" + + for i in "${!have[@]}"; do + echo " `pre ${have[i]}`" + done + +} + +function print_tables { + print_table 0 + print_table 1 +} + +function get_netdev_name { + local -n old=$1 + + udevadm settle + new=$(ls /sys/class/net) + + for netdev in $new; do + for check in $old; do + [ $netdev == $check ] && break + done + + if [ $netdev != $check ]; then + echo $netdev + break + fi + done +} + +### +### Code start +### + +# Probe ethtool support +ethtool -h | grep show-tunnels 2>&1 >/dev/null && HAS_ETHTOOL=y + +modprobe netdevsim + +# Basic test +pfx="basic" + +for port in 0 1; do + old_netdevs=$(ls /sys/class/net) + if [ $port -eq 0 ]; then + echo $NSIM_ID > /sys/bus/netdevsim/new_device + else + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_SYS/new_port + fi + NSIM_NETDEV=`get_netdev_name old_netdevs` + ip link set dev $NSIM_NETDEV up + + msg="new NIC device created" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + check_tables + + msg="VxLAN v4 devices" + exp0=( `mke 4789 1` 0 0 0 ) + new_vxlan vxlan0 4789 $NSIM_NETDEV + new_vxlan vxlan1 4789 $NSIM_NETDEV + + msg="VxLAN v4 devices go down" + exp0=( 0 0 0 0 ) + ip link set dev vxlan1 down + ip link set dev vxlan0 down + check_tables + + msg="VxLAN v6 devices" + exp0=( `mke 4789 1` 0 0 0 ) + new_vxlan vxlanA 4789 $NSIM_NETDEV 6 + + for ifc in vxlan0 vxlan1; do + ip link set dev $ifc up + done + + new_vxlan vxlanB 4789 $NSIM_NETDEV 6 + + msg="another VxLAN v6 devices" + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + new_vxlan vxlanC 4790 $NSIM_NETDEV 6 + + msg="Geneve device" + exp1=( `mke 6081 2` 0 0 0 ) + new_geneve gnv0 6081 + + msg="NIC device goes down" + ip link set dev $NSIM_NETDEV down + if [ $port -eq 1 ]; then + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + fi + check_tables + msg="NIC device goes up again" + ip link set dev $NSIM_NETDEV up + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + exp1=( `mke 6081 2` 0 0 0 ) + check_tables + + cleanup_tuns + + msg="tunnels destroyed" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + check_tables + + modprobe -r geneve + modprobe -r vxlan + modprobe -r udp_tunnel + + check_tables +done + +modprobe -r netdevsim + +# Module tests +pfx="module tests" + +if modinfo netdevsim | grep udp_tunnel >/dev/null; then + err_cnt "netdevsim depends on udp_tunnel" +else + pass_cnt +fi + +modprobe netdevsim + +old_netdevs=$(ls /sys/class/net) +port=0 +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port +echo 0 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` + +msg="create VxLANs" +exp0=( `mke 10000 1` 0 0 0 ) +new_vxlan vxlan0 10000 $NSIM_NETDEV + +exp0=( 0 0 0 0 ) + +modprobe -r netdevsim +modprobe netdevsim + +# Overflow the table + +function overflow_table0 { + local pfx=$1 + + msg="create VxLANs 1/5" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="create VxLANs 2/5" + exp0=( `mke 10000 1` `mke 10001 1` 0 0 ) + new_vxlan vxlan1 10001 $NSIM_NETDEV + + msg="create VxLANs 3/5" + exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` 0 ) + new_vxlan vxlan2 10002 $NSIM_NETDEV + + msg="create VxLANs 4/5" + exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` `mke 10003 1` ) + new_vxlan vxlan3 10003 $NSIM_NETDEV + + msg="create VxLANs 5/5" + new_vxlan vxlan4 10004 $NSIM_NETDEV +} + +function overflow_table1 { + local pfx=$1 + + msg="create GENEVE 1/5" + exp1=( `mke 20000 2` 0 0 0 ) + new_geneve gnv0 20000 + + msg="create GENEVE 2/5" + exp1=( `mke 20000 2` `mke 20001 2` 0 0 ) + new_geneve gnv1 20001 + + msg="create GENEVE 3/5" + exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` 0 ) + new_geneve gnv2 20002 + + msg="create GENEVE 4/5" + exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` `mke 20003 2` ) + new_geneve gnv3 20003 + + msg="create GENEVE 5/5" + new_geneve gnv4 20004 +} + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + fi + + echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` + ip link set dev $NSIM_NETDEV up + + overflow_table0 "overflow NIC table" + overflow_table1 "overflow NIC table" + + msg="replace VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) + del_dev vxlan1 + + msg="vacate VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) + del_dev vxlan2 + + msg="replace GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` ) + del_dev gnv1 + + msg="vacate GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` ) + del_dev gnv2 + + msg="table sharing - share" + exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` ) + new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external" + + msg="table sharing - overflow" + new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external" + msg="table sharing - overflow v6" + new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external" + + exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` ) + del_dev gnv4 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# Sync all +pfx="sync all" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port +echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + fi + + echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` + ip link set dev $NSIM_NETDEV up + + overflow_table0 "overflow NIC table" + overflow_table1 "overflow NIC table" + + msg="replace VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) + del_dev vxlan1 + + msg="vacate VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) + del_dev vxlan2 + + msg="replace GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` ) + del_dev gnv1 + + msg="vacate GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` ) + del_dev gnv2 + + msg="table sharing - share" + exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` ) + new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external" + + msg="table sharing - overflow" + new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external" + msg="table sharing - overflow v6" + new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external" + + exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` ) + del_dev gnv4 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# Destroy full NIC +pfx="destroy full" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + fi + + echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` + ip link set dev $NSIM_NETDEV up + + overflow_table0 "destroy NIC" + overflow_table1 "destroy NIC" + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# IPv4 only +pfx="IPv4 only" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port +echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + fi + + echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` + ip link set dev $NSIM_NETDEV up + + msg="create VxLANs v6" + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="create VxLANs v6" + new_vxlan vxlanA1 10000 $NSIM_NETDEV 6 + + ip link set dev vxlanA0 down + ip link set dev vxlanA0 up + check_tables + + msg="create VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="down VxLANs v4" + exp0=( 0 0 0 0 ) + ip link set dev vxlan0 down + check_tables + + msg="up VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + ip link set dev vxlan0 up + check_tables + + msg="destroy VxLANs v4" + exp0=( 0 0 0 0 ) + del_dev vxlan0 + + msg="recreate VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + del_dev vxlanA0 + del_dev vxlanA1 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# Failures +pfx="error injection" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + fi + + echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` + ip link set dev $NSIM_NETDEV up + + echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error + + msg="1 - create VxLANs v6" + exp0=( 0 0 0 0 ) + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="1 - create VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="1 - remove VxLANs v4" + del_dev vxlan0 + + msg="1 - remove VxLANs v6" + exp0=( 0 0 0 0 ) + del_dev vxlanA0 + + msg="2 - create GENEVE" + exp1=( `mke 20000 2` 0 0 0 ) + new_geneve gnv0 20000 + + msg="2 - destroy GENEVE" + echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error + exp1=( `mke 20000 2` 0 0 0 ) + del_dev gnv0 + + msg="2 - create second GENEVE" + exp1=( 0 `mke 20001 2` 0 0 ) + new_geneve gnv0 20001 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# netdev flags +pfx="netdev flags" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + fi + + echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` + ip link set dev $NSIM_NETDEV up + + msg="create VxLANs v6" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="create VxLANs v4" + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="turn off" + exp0=( 0 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + check_tables + + msg="turn on" + exp0=( `mke 10000 1` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="remove both" + del_dev vxlanA0 + exp0=( 0 0 0 0 ) + del_dev vxlan0 + check_tables + + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + + msg="create VxLANs v4 - off" + exp0=( 0 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="created off - turn on" + exp0=( `mke 10000 1` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# device initiated reset +pfx="reset notification" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + fi + + echo $port > $NSIM_DEV_SYS/new_port + NSIM_NETDEV=`get_netdev_name old_netdevs` + ip link set dev $NSIM_NETDEV up + + msg="create VxLANs v6" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="create VxLANs v4" + new_vxlan vxlan0 10000 $NSIM_NETDEV + + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset + check_tables + + msg="NIC device goes down" + ip link set dev $NSIM_NETDEV down + if [ $port -eq 1 ]; then + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + fi + check_tables + + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset + check_tables + + msg="NIC device goes up again" + ip link set dev $NSIM_NETDEV up + exp0=( `mke 10000 1` 0 0 0 ) + check_tables + + msg="remove both" + del_dev vxlanA0 + exp0=( 0 0 0 0 ) + del_dev vxlan0 + check_tables + + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset + check_tables + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# shared port tables +pfx="table sharing" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +echo 0 > $NSIM_DEV_DFS/udp_ports_open_only +echo 1 > $NSIM_DEV_DFS/udp_ports_shared + +old_netdevs=$(ls /sys/class/net) +echo 1 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` +old_netdevs=$(ls /sys/class/net) +echo 2 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV2=`get_netdev_name old_netdevs` + +msg="VxLAN v4 devices" +exp0=( `mke 4789 1` 0 0 0 ) +exp1=( 0 0 0 0 ) +new_vxlan vxlan0 4789 $NSIM_NETDEV +new_vxlan vxlan1 4789 $NSIM_NETDEV2 + +msg="VxLAN v4 devices go down" +exp0=( 0 0 0 0 ) +ip link set dev vxlan1 down +ip link set dev vxlan0 down +check_tables + +for ifc in vxlan0 vxlan1; do + ip link set dev $ifc up +done + +msg="VxLAN v6 device" +exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) +new_vxlan vxlanC 4790 $NSIM_NETDEV 6 + +msg="Geneve device" +exp1=( `mke 6081 2` 0 0 0 ) +new_geneve gnv0 6081 + +msg="NIC device goes down" +ip link set dev $NSIM_NETDEV down +check_tables + +msg="NIC device goes up again" +ip link set dev $NSIM_NETDEV up +check_tables + +for i in `seq 2`; do + msg="turn feature off - 1, rep $i" + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + check_tables + + msg="turn feature off - 2, rep $i" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload off + check_tables + + msg="turn feature on - 1, rep $i" + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + exp1=( `mke 6081 2` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="turn feature on - 2, rep $i" + ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload on + check_tables +done + +msg="tunnels destroyed 1" +cleanup_tuns +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +overflow_table0 "overflow NIC table" + +msg="re-add a port" + +echo 2 > $NSIM_DEV_SYS/del_port +echo 2 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` +check_tables + +msg="replace VxLAN in overflow table" +exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) +del_dev vxlan1 + +msg="vacate VxLAN in overflow table" +exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) +del_dev vxlan2 + +echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset +check_tables + +msg="tunnels destroyed 2" +cleanup_tuns +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +echo 1 > $NSIM_DEV_SYS/del_port +echo 2 > $NSIM_DEV_SYS/del_port + +cleanup_nsim + +# Static IANA port +pfx="static IANA vxlan" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +echo 1 > $NSIM_DEV_DFS/udp_ports_static_iana_vxlan +STATIC_ENTRIES=( `mke 4789 1` ) + +port=1 +old_netdevs=$(ls /sys/class/net) +echo $port > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` + +msg="check empty" +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +msg="add on static port" +new_vxlan vxlan0 4789 $NSIM_NETDEV +new_vxlan vxlan1 4789 $NSIM_NETDEV + +msg="add on different port" +exp0=( `mke 4790 1` 0 0 0 ) +new_vxlan vxlan2 4790 $NSIM_NETDEV + +cleanup_tuns + +msg="tunnels destroyed" +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +msg="different type" +new_geneve gnv0 4789 + +cleanup_tuns +cleanup_nsim + +# END + +modprobe -r netdevsim + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $num_cases checks" +else + echo "FAILED $num_errors/$num_cases checks" +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py new file mode 100755 index 000000000000..408bd54d6779 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netpoll_basic.py @@ -0,0 +1,396 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Author: Breno Leitao <leitao@debian.org> +""" + This test aims to evaluate the netpoll polling mechanism (as in + netpoll_poll_dev()). It presents a complex scenario where the network + attempts to send a packet but fails, prompting it to poll the NIC from within + the netpoll TX side. + + This has been a crucial path in netpoll that was previously untested. Jakub + suggested using a single RX/TX queue, pushing traffic to the NIC, and then + sending netpoll messages (via netconsole) to trigger the poll. + + In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If + so, the test passes, otherwise it will be skipped. This test is very dependent on + the driver and environment, given we are trying to trigger a tricky scenario. +""" + +import errno +import logging +import os +import random +import string +import threading +import time +from typing import Optional + +from lib.py import ( + bpftrace, + CmdExitFailure, + defer, + ethtool, + GenerateTraffic, + ksft_exit, + ksft_pr, + ksft_run, + KsftFailEx, + KsftSkipEx, + NetDrvEpEnv, + KsftXfailEx, +) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole" +NETCONS_REMOTE_PORT: int = 6666 +NETCONS_LOCAL_PORT: int = 1514 + +# Max number of netcons messages to send. Each iteration will setup +# netconsole and send MAX_WRITES messages +ITERATIONS: int = 20 +# Number of writes to /dev/kmsg per iteration +MAX_WRITES: int = 40 +# MAPS contains the information coming from bpftrace it will have only one +# key: "hits", which tells the number of times netpoll_poll_dev() was called +MAPS: dict[str, int] = {} +# Thread to run bpftrace in parallel +BPF_THREAD: Optional[threading.Thread] = None +# Time bpftrace will be running in parallel. +BPFTRACE_TIMEOUT: int = 10 + + +def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]: + """ + Read the ringsize using ethtool. This will be used to restore it after the test + """ + try: + ethtool_result = ethtool(f"-g {interface_name}", json=True)[0] + rxs = ethtool_result["rx"] + txs = ethtool_result["tx"] + except (KeyError, IndexError) as exception: + raise KsftSkipEx( + f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them." + ) from exception + + return rxs, txs + + +def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool: + """Try to the number of RX and TX ringsize.""" + rxs = ring_size[0] + txs = ring_size[1] + + logging.debug("Setting ring size to %d/%d", rxs, txs) + try: + ethtool(f"-G {interface_name} rx {rxs} tx {txs}") + except CmdExitFailure: + # This might fail on real device, retry with a higher value, + # worst case, keep it as it is. + return False + + return True + + +def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]: + """Read the number of RX, TX and combined queues using ethtool""" + + try: + ethtool_result = ethtool(f"-l {interface_name}", json=True)[0] + rxq = ethtool_result.get("rx", -1) + txq = ethtool_result.get("tx", -1) + combined = ethtool_result.get("combined", -1) + + except IndexError as exception: + raise KsftSkipEx( + f"Failed to read queues numbers: {exception}. Not going to mess with them." + ) from exception + + return rxq, txq, combined + + +def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None: + """Set the number of RX, TX and combined queues using ethtool""" + rxq, txq, combined = queues + + cmdline = f"-L {interface_name}" + + if rxq != -1: + cmdline += f" rx {rxq}" + if txq != -1: + cmdline += f" tx {txq}" + if combined != -1: + cmdline += f" combined {combined}" + + logging.debug("calling: ethtool %s", cmdline) + + try: + ethtool(cmdline) + except CmdExitFailure as exception: + raise KsftSkipEx( + f"Failed to configure RX/TX queues: {exception}. Ethtool not available?" + ) from exception + + +def netcons_generate_random_target_name() -> str: + """Generate a random target name starting with 'netcons'""" + random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8)) + return f"netcons_{random_suffix}" + + +def netcons_create_target( + config_data: dict[str, str], + target_name: str, +) -> None: + """Create a netconsole dynamic target against the interfaces""" + logging.debug("Using netconsole name: %s", target_name) + try: + os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True) + logging.debug( + "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name + ) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise KsftFailEx( + f"Failed to create netconsole target directory: {exception}" + ) from exception + + try: + for key, value in config_data.items(): + path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}" + logging.debug("Writing %s to %s", key, path) + with open(path, "w", encoding="utf-8") as file: + # Always convert to string to write to file + file.write(str(value)) + + # Read all configuration values for debugging purposes + for debug_key in config_data.keys(): + with open( + f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}", + "r", + encoding="utf-8", + ) as file: + content = file.read() + logging.debug( + "%s/%s/%s : %s", + NETCONSOLE_CONFIGFS_PATH, + target_name, + debug_key, + content.strip(), + ) + + except Exception as exception: + raise KsftFailEx( + f"Failed to configure netconsole target: {exception}" + ) from exception + + +def netcons_configure_target( + cfg: NetDrvEpEnv, interface_name: str, target_name: str +) -> None: + """Configure netconsole on the interface with the given target name""" + config_data = { + "extended": "1", + "dev_name": interface_name, + "local_port": NETCONS_LOCAL_PORT, + "remote_port": NETCONS_REMOTE_PORT, + "local_ip": cfg.addr, + "remote_ip": cfg.remote_addr, + "remote_mac": "00:00:00:00:00:00", # Not important for this test + "enabled": "1", + } + + netcons_create_target(config_data, target_name) + logging.debug( + "Created netconsole target: %s on interface %s", target_name, interface_name + ) + + +def netcons_delete_target(name: str) -> None: + """Delete a netconsole dynamic target""" + target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}" + try: + if os.path.exists(target_path): + os.rmdir(target_path) + except OSError as exception: + raise KsftFailEx( + f"Failed to delete netconsole target: {exception}" + ) from exception + + +def netcons_load_module() -> None: + """Try to load the netconsole module""" + os.system("modprobe netconsole") + + +def bpftrace_call() -> None: + """Call bpftrace to find how many times netpoll_poll_dev() is called. + Output is saved in the global variable `maps`""" + + # This is going to update the global variable, that will be seen by the + # main function + global MAPS # pylint: disable=W0603 + + # This will be passed to bpftrace as in bpftrace -e "expr" + expr = "kprobe:netpoll_poll_dev { @hits = count(); }" + + MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True) + logging.debug("BPFtrace output: %s", MAPS) + + +def bpftrace_start(): + """Start a thread to call `call_bpf` in a parallel thread""" + global BPF_THREAD # pylint: disable=W0603 + + BPF_THREAD = threading.Thread(target=bpftrace_call) + BPF_THREAD.start() + if not BPF_THREAD.is_alive(): + raise KsftSkipEx("BPFtrace thread is not alive. Skipping test") + + +def bpftrace_stop() -> None: + """Stop the bpftrace thread""" + if BPF_THREAD: + BPF_THREAD.join() + + +def bpftrace_any_hit(join: bool) -> bool: + """Check if netpoll_poll_dev() was called by checking the global variable `maps`""" + if not BPF_THREAD: + raise KsftFailEx("BPFtrace didn't start") + + if BPF_THREAD.is_alive(): + if join: + # Wait for bpftrace to finish + BPF_THREAD.join() + else: + # bpftrace is still running, so, we will not check the result yet + return False + + logging.debug("MAPS coming from bpftrace = %s", MAPS) + if "hits" not in MAPS.keys(): + raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}") + + logging.debug("Got a total of %d hits", MAPS["hits"]) + return MAPS["hits"] > 0 + + +def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: + """Print messages to the console, trying to trigger a netpoll poll""" + # Start bpftrace in parallel, so, it is watching + # netpoll_poll_dev() while we are sending netconsole messages + bpftrace_start() + defer(bpftrace_stop) + + do_netpoll_flush(cfg, ifname, target_name) + + if bpftrace_any_hit(join=True): + ksft_pr("netpoll_poll_dev() was called. Success") + return + + raise KsftXfailEx("netpoll_poll_dev() was not called during the test...") + + +def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: + """Print messages to the console, trying to trigger a netpoll poll""" + netcons_configure_target(cfg, ifname, target_name) + retry = 0 + + for i in range(int(ITERATIONS)): + if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False): + # bpftrace is done, stop sending messages + break + + msg = f"netcons test #{i}" + with open("/dev/kmsg", "w", encoding="utf-8") as kmsg: + for j in range(MAX_WRITES): + try: + kmsg.write(f"{msg}-{j}\n") + except OSError as exception: + # in some cases, kmsg can be busy, so, we will retry + time.sleep(1) + retry += 1 + if retry < 5: + logging.info("Failed to write to kmsg. Retrying") + # Just retry a few times + continue + raise KsftFailEx( + f"Failed to write to kmsg: {exception}" + ) from exception + + netcons_delete_target(target_name) + netcons_configure_target(cfg, ifname, target_name) + # If we sleep here, we will have a better chance of triggering + # This number is based on a few tests I ran while developing this test + time.sleep(0.4) + + +def configure_network(ifname: str) -> None: + """Configure ring size and queue numbers""" + + # Set defined queues to 1 to force congestion + prev_queues = ethtool_get_queues_cnt(ifname) + logging.debug("RX/TX/combined queues: %s", prev_queues) + # Only set the queues to 1 if they exists in the device. I.e, they are > 0 + ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues)) + defer(ethtool_set_queues_cnt, ifname, prev_queues) + + # Try to set the ring size to some low value. + # Do not fail if the hardware do not accepted desired values + prev_ring_size = ethtool_get_ringsize(ifname) + for size in [(1, 1), (128, 128), (256, 256)]: + if ethtool_set_ringsize(ifname, size): + # hardware accepted the desired ringsize + logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size) + break + defer(ethtool_set_ringsize, ifname, prev_ring_size) + + +def test_netpoll(cfg: NetDrvEpEnv) -> None: + """ + Test netpoll by sending traffic to the interface and then sending + netconsole messages to trigger a poll + """ + + ifname = cfg.ifname + configure_network(ifname) + target_name = netcons_generate_random_target_name() + traffic = None + + try: + traffic = GenerateTraffic(cfg) + do_netpoll_flush_monitored(cfg, ifname, target_name) + finally: + if traffic: + traffic.stop() + + # Revert RX/TX queues + netcons_delete_target(target_name) + + +def test_check_dependencies() -> None: + """Check if the dependencies are met""" + if not os.path.exists(NETCONSOLE_CONFIGFS_PATH): + raise KsftSkipEx( + f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301 + ) + + +def main() -> None: + """Main function to run the test""" + netcons_load_module() + test_check_dependencies() + with NetDrvEpEnv(__file__) as cfg: + ksft_run( + [test_netpoll], + args=(cfg,), + ) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh new file mode 100755 index 000000000000..c51c83421c61 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh @@ -0,0 +1,253 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2022 NXP + +# The script is mostly generic, with the exception of the +# ethtool per-TC counter names ("rx_green_prio_${tc}") + +WAIT_TIME=1 +NUM_NETIFS=4 +STABLE_MAC_ADDRS=yes +NETIF_CREATE=no +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +require_command dcb + +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h1_vlan_create() +{ + local vid=$1 + + vlan_create $h1 $vid + simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64 + ip link set $h1.$vid type vlan \ + egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \ + ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 +} + +h1_vlan_destroy() +{ + local vid=$1 + + simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64 + vlan_destroy $h1 $vid +} + +h2_vlan_create() +{ + local vid=$1 + + vlan_create $h2 $vid + simple_if_init $h2.$vid $H2_IPV4/24 $H2_IPV6/64 + ip link set $h2.$vid type vlan \ + egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \ + ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 +} + +h2_vlan_destroy() +{ + local vid=$1 + + simple_if_fini $h2.$vid $H2_IPV4/24 $H2_IPV6/64 + vlan_destroy $h2 $vid +} + +vlans_prepare() +{ + h1_vlan_create 100 + h2_vlan_create 100 + + tc qdisc add dev ${h1}.100 clsact + tc filter add dev ${h1}.100 egress protocol ipv4 \ + flower ip_proto icmp action skbedit priority 3 + tc filter add dev ${h1}.100 egress protocol ipv6 \ + flower ip_proto icmpv6 action skbedit priority 3 +} + +vlans_destroy() +{ + tc qdisc del dev ${h1}.100 clsact + + h1_vlan_destroy 100 + h2_vlan_destroy 100 +} + +switch_create() +{ + ip link set ${swp1} up + ip link set ${swp2} up + + # Ports should trust VLAN PCP even with vlan_filtering=0 + ip link add br0 type bridge + ip link set ${swp1} master br0 + ip link set ${swp2} master br0 + ip link set br0 up +} + +switch_destroy() +{ + ip link del br0 +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + switch_destroy + + vrf_cleanup +} + +dscp_cs_to_tos() +{ + local dscp_cs=$1 + + # https://datatracker.ietf.org/doc/html/rfc2474 + # 4.2.2.1 The Class Selector Codepoints + echo $((${dscp_cs} << 5)) +} + +run_test() +{ + local test_name=$1; shift + local if_name=$1; shift + local tc=$1; shift + local tos=$1; shift + local counter_name="rx_green_prio_${tc}" + local ipv4_before + local ipv4_after + local ipv6_before + local ipv6_after + + ipv4_before=$(ethtool_stats_get ${swp1} "${counter_name}") + ping_do ${if_name} $H2_IPV4 "-Q ${tos}" + ipv4_after=$(ethtool_stats_get ${swp1} "${counter_name}") + + if [ $((${ipv4_after} - ${ipv4_before})) -lt ${PING_COUNT} ]; then + RET=1 + else + RET=0 + fi + log_test "IPv4 ${test_name}" + + ipv6_before=$(ethtool_stats_get ${swp1} "${counter_name}") + ping_do ${if_name} $H2_IPV6 "-Q ${tos}" + ipv6_after=$(ethtool_stats_get ${swp1} "${counter_name}") + + if [ $((${ipv6_after} - ${ipv6_before})) -lt ${PING_COUNT} ]; then + RET=1 + else + RET=0 + fi + log_test "IPv6 ${test_name}" +} + +port_default_prio_get() +{ + local if_name=$1 + local prio + + prio="$(dcb -j app show dev ${if_name} default-prio | \ + jq '.default_prio[]')" + if [ -z "${prio}" ]; then + prio=0 + fi + + echo ${prio} +} + +test_port_default() +{ + local orig=$(port_default_prio_get ${swp1}) + local dmac=$(mac_get ${h2}) + + dcb app replace dev ${swp1} default-prio 5 + + run_test "Port-default QoS classification" ${h1} 5 0 + + dcb app replace dev ${swp1} default-prio ${orig} +} + +test_vlan_pcp() +{ + vlans_prepare + + run_test "Trusted VLAN PCP QoS classification" ${h1}.100 3 0 + + vlans_destroy +} + +test_ip_dscp() +{ + local port_default=$(port_default_prio_get ${swp1}) + local tos=$(dscp_cs_to_tos 4) + + dcb app add dev ${swp1} dscp-prio CS4:4 + run_test "Trusted DSCP QoS classification" ${h1} 4 ${tos} + dcb app del dev ${swp1} dscp-prio CS4:4 + + vlans_prepare + run_test "Untrusted DSCP QoS classification follows VLAN PCP" \ + ${h1}.100 3 ${tos} + vlans_destroy + + run_test "Untrusted DSCP QoS classification follows port default" \ + ${h1} ${port_default} ${tos} +} + +trap cleanup EXIT + +ALL_TESTS=" + test_port_default + test_vlan_pcp + test_ip_dscp +" + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh new file mode 100755 index 000000000000..8972f42dfe03 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh @@ -0,0 +1,323 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2021-2022 NXP + +# Note: On LS1028A, in lack of enough user ports, this setup requires patching +# the device tree to use the second CPU port as a user port + +WAIT_TIME=1 +NUM_NETIFS=4 +STABLE_MAC_ADDRS=yes +NETIF_CREATE=no +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/tsn_lib.sh + +UDS_ADDRESS_H1="/var/run/ptp4l_h1" +UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1" + +# Tunables +NUM_PKTS=1000 +STREAM_VID=100 +STREAM_PRIO=6 +# Use a conservative cycle of 10 ms to allow the test to still pass when the +# kernel has some extra overhead like lockdep etc +CYCLE_TIME_NS=10000000 +# Create two Gate Control List entries, one OPEN and one CLOSE, of equal +# durations +GATE_DURATION_NS=$((${CYCLE_TIME_NS} / 2)) +# Give 2/3 of the cycle time to user space and 1/3 to the kernel +FUDGE_FACTOR=$((${CYCLE_TIME_NS} / 3)) +# Shift the isochron base time by half the gate time, so that packets are +# always received by swp1 close to the middle of the time slot, to minimize +# inaccuracies due to network sync +SHIFT_TIME_NS=$((${GATE_DURATION_NS} / 2)) + +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +# Chain number exported by the ocelot driver for +# Per-Stream Filtering and Policing filters +PSFP() +{ + echo 30000 +} + +psfp_chain_create() +{ + local if_name=$1 + + tc qdisc add dev $if_name clsact + + tc filter add dev $if_name ingress chain 0 pref 49152 flower \ + skip_sw action goto chain $(PSFP) +} + +psfp_chain_destroy() +{ + local if_name=$1 + + tc qdisc del dev $if_name clsact +} + +psfp_filter_check() +{ + local expected=$1 + local packets="" + local drops="" + local stats="" + + stats=$(tc -j -s filter show dev ${swp1} ingress chain $(PSFP) pref 1) + packets=$(echo ${stats} | jq ".[1].options.actions[].stats.packets") + drops=$(echo ${stats} | jq ".[1].options.actions[].stats.drops") + + if ! [ "${packets}" = "${expected}" ]; then + printf "Expected filter to match on %d packets but matched on %d instead\n" \ + "${expected}" "${packets}" + fi + + echo "Hardware filter reports ${drops} drops" +} + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +switch_create() +{ + local h2_mac_addr=$(mac_get $h2) + + ip link set ${swp1} up + ip link set ${swp2} up + + ip link add br0 type bridge vlan_filtering 1 + ip link set ${swp1} master br0 + ip link set ${swp2} master br0 + ip link set br0 up + + bridge vlan add dev ${swp2} vid ${STREAM_VID} + bridge vlan add dev ${swp1} vid ${STREAM_VID} + # PSFP on Ocelot requires the filter to also be added to the bridge + # FDB, and not be removed + bridge fdb add dev ${swp2} \ + ${h2_mac_addr} vlan ${STREAM_VID} static master + + psfp_chain_create ${swp1} + + tc filter add dev ${swp1} ingress chain $(PSFP) pref 1 \ + protocol 802.1Q flower skip_sw \ + dst_mac ${h2_mac_addr} vlan_id ${STREAM_VID} \ + action gate base-time 0.000000000 \ + sched-entry OPEN ${GATE_DURATION_NS} -1 -1 \ + sched-entry CLOSE ${GATE_DURATION_NS} -1 -1 +} + +switch_destroy() +{ + psfp_chain_destroy ${swp1} + ip link del br0 +} + +txtime_setup() +{ + local if_name=$1 + + tc qdisc add dev ${if_name} clsact + # Classify PTP on TC 7 and isochron on TC 6 + tc filter add dev ${if_name} egress protocol 0x88f7 \ + flower action skbedit priority 7 + tc filter add dev ${if_name} egress protocol 802.1Q \ + flower vlan_ethtype 0xdead action skbedit priority 6 + tc qdisc add dev ${if_name} handle 100: parent root mqprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + hw 1 + # Set up TC 6 for SO_TXTIME. tc-mqprio queues count from 1. + tc qdisc replace dev ${if_name} parent 100:$((${STREAM_PRIO} + 1)) etf \ + clockid CLOCK_TAI offload delta ${FUDGE_FACTOR} +} + +txtime_cleanup() +{ + local if_name=$1 + + tc qdisc del dev ${if_name} root + tc qdisc del dev ${if_name} clsact +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create + + txtime_setup ${h1} + + # Set up swp1 as a master PHC for h1, synchronized to the local + # CLOCK_REALTIME. + phc2sys_start ${UDS_ADDRESS_SWP1} + + # Assumption true for LS1028A: h1 and h2 use the same PHC. So by + # synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized + # to swp1 (and both to CLOCK_REALTIME). + ptp4l_start ${h1} true ${UDS_ADDRESS_H1} + ptp4l_start ${swp1} false ${UDS_ADDRESS_SWP1} + + # Make sure there are no filter matches at the beginning of the test + psfp_filter_check 0 +} + +cleanup() +{ + pre_cleanup + + ptp4l_stop ${swp1} + ptp4l_stop ${h1} + phc2sys_stop + isochron_recv_stop + + txtime_cleanup ${h1} + + h2_destroy + h1_destroy + switch_destroy + + vrf_cleanup +} + +debug_incorrectly_dropped_packets() +{ + local isochron_dat=$1 + local dropped_seqids + local seqid + + echo "Packets incorrectly dropped:" + + dropped_seqids=$(isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "%u RX hw %T\n" \ + --printf-args "qR" | \ + grep 'RX hw 0.000000000' | \ + awk '{print $1}') + + for seqid in ${dropped_seqids}; do + isochron report \ + --input-file "${isochron_dat}" \ + --start ${seqid} --stop ${seqid} \ + --printf-format "seqid %u scheduled for %T, HW TX timestamp %T\n" \ + --printf-args "qST" + done +} + +debug_incorrectly_received_packets() +{ + local isochron_dat=$1 + + echo "Packets incorrectly received:" + + isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "seqid %u scheduled for %T, HW TX timestamp %T, HW RX timestamp %T\n" \ + --printf-args "qSTR" | + grep -v 'HW RX timestamp 0.000000000' +} + +run_test() +{ + local base_time=$1 + local expected=$2 + local test_name=$3 + local debug=$4 + local isochron_dat="$(mktemp)" + local extra_args="" + local received + + isochron_do \ + "${h1}" \ + "${h2}" \ + "${UDS_ADDRESS_H1}" \ + "" \ + "${base_time}" \ + "${CYCLE_TIME_NS}" \ + "${SHIFT_TIME_NS}" \ + "${GATE_DURATION_NS}" \ + "${NUM_PKTS}" \ + "${STREAM_VID}" \ + "${STREAM_PRIO}" \ + "" \ + "${isochron_dat}" + + received=$(isochron_report_num_received "${isochron_dat}") + if [ "${received}" = "${expected}" ]; then + RET=0 + else + RET=1 + echo "Expected isochron to receive ${expected} packets but received ${received}" + fi + + log_test "${test_name}" + + if [ "$RET" = "1" ]; then + ${debug} "${isochron_dat}" + fi + + rm ${isochron_dat} 2> /dev/null +} + +test_gate_in_band() +{ + # Send packets in-band with the OPEN gate entry + run_test 0.000000000 ${NUM_PKTS} "In band" \ + debug_incorrectly_dropped_packets + + psfp_filter_check ${NUM_PKTS} +} + +test_gate_out_of_band() +{ + # Send packets in-band with the CLOSE gate entry + run_test 0.005000000 0 "Out of band" \ + debug_incorrectly_received_packets + + psfp_filter_check $((2 * ${NUM_PKTS})) +} + +trap cleanup EXIT + +ALL_TESTS=" + test_gate_in_band + test_gate_out_of_band +" + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh new file mode 100755 index 000000000000..aff0a59f92d9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -0,0 +1,352 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2020 NXP + +WAIT_TIME=1 +NUM_NETIFS=4 +STABLE_MAC_ADDRS=yes +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +require_command tcpdump + +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +# Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number +# used by the kernel driver. The numbers are: +# VCAP IS1 lookup 0: 10000 +# VCAP IS1 lookup 1: 11000 +# VCAP IS1 lookup 2: 12000 +# VCAP IS2 lookup 0 policy 0: 20000 +# VCAP IS2 lookup 0 policy 1: 20001 +# VCAP IS2 lookup 0 policy 255: 20255 +# VCAP IS2 lookup 1 policy 0: 21000 +# VCAP IS2 lookup 1 policy 1: 21001 +# VCAP IS2 lookup 1 policy 255: 21255 +IS1() +{ + local lookup=$1 + + echo $((10000 + 1000 * lookup)) +} + +IS2() +{ + local lookup=$1 + local pag=$2 + + echo $((20000 + 1000 * lookup + pag)) +} + +ES0() +{ + echo 0 +} + +# The Ocelot switches have a fixed ingress pipeline composed of: +# +# +----------------------------------------------+ +-----------------------------------------+ +# | VCAP IS1 | | VCAP IS2 | +# | | | | +# | +----------+ +----------+ +----------+ | | +----------+ +----------+ | +# | | Lookup 0 | | Lookup 1 | | Lookup 2 | | --+------> PAG 0: | Lookup 0 | -> | Lookup 1 | | +# | +----------+ -> +----------+ -> +----------+ | | | +----------+ +----------+ | +# | |key&action| |key&action| |key&action| | | | |key&action| |key&action| | +# | |key&action| |key&action| |key&action| | | | | .. | | .. | | +# | | .. | | .. | | .. | | | | +----------+ +----------+ | +# | +----------+ +----------+ +----------+ | | | | +# | selects PAG | | | +----------+ +----------+ | +# +----------------------------------------------+ +------> PAG 1: | Lookup 0 | -> | Lookup 1 | | +# | | +----------+ +----------+ | +# | | |key&action| |key&action| | +# | | | .. | | .. | | +# | | +----------+ +----------+ | +# | | ... | +# | | | +# | | +----------+ +----------+ | +# +----> PAG 254: | Lookup 0 | -> | Lookup 1 | | +# | | +----------+ +----------+ | +# | | |key&action| |key&action| | +# | | | .. | | .. | | +# | | +----------+ +----------+ | +# | | | +# | | +----------+ +----------+ | +# +----> PAG 255: | Lookup 0 | -> | Lookup 1 | | +# | +----------+ +----------+ | +# | |key&action| |key&action| | +# | | .. | | .. | | +# | +----------+ +----------+ | +# +-----------------------------------------+ +# +# Both the VCAP IS1 (Ingress Stage 1) and IS2 (Ingress Stage 2) are indexed +# (looked up) multiple times: IS1 3 times, and IS2 2 times. Each filter +# (key and action pair) can be configured to only match during the first, or +# second, etc, lookup. +# +# During one TCAM lookup, the filter processing stops at the first entry that +# matches, then the pipeline jumps to the next lookup. +# The driver maps each individual lookup of each individual ingress TCAM to a +# separate chain number. For correct rule offloading, it is mandatory that each +# filter installed in one TCAM is terminated by a non-optional GOTO action to +# the next lookup from the fixed pipeline. +# +# A chain can only be used if there is a GOTO action correctly set up from the +# prior lookup in the processing pipeline. Setting up all chains is not +# mandatory. + +# NOTE: VCAP IS1 currently uses only S1_NORMAL half keys and VCAP IS2 +# dynamically chooses between MAC_ETYPE, ARP, IP4_TCP_UDP, IP4_OTHER, which are +# all half keys as well. + +create_tcam_skeleton() +{ + local eth=$1 + + tc qdisc add dev $eth clsact + + # VCAP IS1 is the Ingress Classification TCAM and can offload the + # following actions: + # - skbedit priority + # - vlan pop + # - vlan modify + # - goto (only in lookup 2, the last IS1 lookup) + tc filter add dev $eth ingress chain 0 pref 49152 flower \ + skip_sw action goto chain $(IS1 0) + tc filter add dev $eth ingress chain $(IS1 0) pref 49152 \ + flower skip_sw action goto chain $(IS1 1) + tc filter add dev $eth ingress chain $(IS1 1) pref 49152 \ + flower skip_sw action goto chain $(IS1 2) + tc filter add dev $eth ingress chain $(IS1 2) pref 49152 \ + flower skip_sw action goto chain $(IS2 0 0) + + # VCAP IS2 is the Security Enforcement ingress TCAM and can offload the + # following actions: + # - trap + # - drop + # - police + # The two VCAP IS2 lookups can be segmented into up to 256 groups of + # rules, called Policies. A Policy is selected through the Policy + # Association Group (PAG) action of VCAP IS1 (which is the + # GOTO offload). + tc filter add dev $eth ingress chain $(IS2 0 0) pref 49152 \ + flower skip_sw action goto chain $(IS2 1 0) +} + +setup_prepare() +{ + ip link set $swp1 up + ip link set $swp2 up + ip link set $h2 up + ip link set $h1 up + + create_tcam_skeleton $swp1 + + ip link add br0 type bridge + ip link set $swp1 master br0 + ip link set $swp2 master br0 + ip link set br0 up + + ip link add link $h1 name $h1.100 type vlan id 100 + ip link set $h1.100 up + + ip link add link $h1 name $h1.200 type vlan id 200 + ip link set $h1.200 up + + tc filter add dev $swp1 ingress chain $(IS1 1) pref 1 \ + protocol 802.1Q flower skip_sw vlan_id 100 \ + action vlan pop \ + action goto chain $(IS1 2) + + tc filter add dev $swp1 egress chain $(ES0) pref 1 \ + flower skip_sw indev $swp2 \ + action vlan push protocol 802.1Q id 100 + + tc filter add dev $swp1 ingress chain $(IS1 0) pref 2 \ + protocol ipv4 flower skip_sw src_ip 10.1.1.2 \ + action skbedit priority 7 \ + action goto chain $(IS1 1) + + tc filter add dev $swp1 ingress chain $(IS2 0 0) pref 1 \ + protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \ + action police rate 50mbit burst 64k conform-exceed drop/pipe \ + action goto chain $(IS2 1 0) +} + +cleanup() +{ + ip link del $h1.200 + ip link del $h1.100 + tc qdisc del dev $swp1 clsact + ip link del br0 +} + +test_vlan_pop() +{ + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + + RET=0 + + tcpdump_start $h2 + + # Work around Mausezahn VLAN builder bug + # (https://github.com/netsniff-ng/netsniff-ng/issues/225) by using + # an 8021q upper + $MZ $h1.100 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip + + sleep 1 + + tcpdump_stop $h2 + + tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, ethertype IPv4" + check_err "$?" "untagged reception" + + tcpdump_cleanup $h2 + + log_test "VLAN pop" +} + +test_vlan_push() +{ + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + + RET=0 + + tcpdump_start $h1.100 + + $MZ $h2 -q -c 1 -p 64 -a $h2_mac -b $h1_mac -t ip + + sleep 1 + + tcpdump_stop $h1.100 + + tcpdump_show $h1.100 | grep -q "$h2_mac > $h1_mac" + check_err "$?" "tagged reception" + + tcpdump_cleanup $h1.100 + + log_test "VLAN push" +} + +test_vlan_ingress_modify() +{ + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + + RET=0 + + ip link set br0 type bridge vlan_filtering 1 + bridge vlan add dev $swp1 vid 200 + bridge vlan add dev $swp1 vid 300 + bridge vlan add dev $swp2 vid 300 + + tc filter add dev $swp1 ingress chain $(IS1 2) pref 3 \ + protocol 802.1Q flower skip_sw vlan_id 200 src_mac $h1_mac \ + action vlan modify id 300 \ + action goto chain $(IS2 0 0) + + tcpdump_start $h2 + + $MZ $h1.200 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip + + sleep 1 + + tcpdump_stop $h2 + + tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, .* vlan 300" + check_err "$?" "tagged reception" + + tcpdump_cleanup $h2 + + tc filter del dev $swp1 ingress chain $(IS1 2) pref 3 + + bridge vlan del dev $swp1 vid 200 + bridge vlan del dev $swp1 vid 300 + bridge vlan del dev $swp2 vid 300 + ip link set br0 type bridge vlan_filtering 0 + + log_test "Ingress VLAN modification" +} + +test_vlan_egress_modify() +{ + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + + RET=0 + + tc qdisc add dev $swp2 clsact + + ip link set br0 type bridge vlan_filtering 1 + bridge vlan add dev $swp1 vid 200 + bridge vlan add dev $swp2 vid 200 + + tc filter add dev $swp2 egress chain $(ES0) pref 3 \ + protocol 802.1Q flower skip_sw vlan_id 200 vlan_prio 0 \ + action vlan modify id 300 priority 7 + + tcpdump_start $h2 + + $MZ $h1.200 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip + + sleep 1 + + tcpdump_stop $h2 + + tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, .* vlan 300" + check_err "$?" "tagged reception" + + tcpdump_cleanup $h2 + + tc filter del dev $swp2 egress chain $(ES0) pref 3 + tc qdisc del dev $swp2 clsact + + bridge vlan del dev $swp1 vid 200 + bridge vlan del dev $swp2 vid 200 + ip link set br0 type bridge vlan_filtering 0 + + log_test "Egress VLAN modification" +} + +test_skbedit_priority() +{ + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + local num_pkts=100 + + before=$(ethtool_stats_get $swp1 'rx_green_prio_7') + + $MZ $h1 -q -c $num_pkts -p 64 -a $h1_mac -b $h2_mac -t ip -A 10.1.1.2 + + after=$(ethtool_stats_get $swp1 'rx_green_prio_7') + + if [ $((after - before)) = $num_pkts ]; then + RET=0 + else + RET=1 + fi + + log_test "Frame prioritization" +} + +trap cleanup EXIT + +ALL_TESTS=" + test_vlan_pop + test_vlan_push + test_vlan_ingress_modify + test_vlan_egress_modify + test_skbedit_priority +" + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py new file mode 100755 index 000000000000..da3623c5e8a9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ping.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import os +import random, string, time +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, KsftSkipEx, KsftFailEx +from lib.py import EthtoolFamily, NetDrvEpEnv +from lib.py import bkg, cmd, wait_port_listen, rand_port +from lib.py import defer, ethtool, ip + +no_sleep=False + +def _test_v4(cfg) -> None: + if not cfg.addr_v["4"]: + return + + cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) + cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["4"]) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) + +def _test_v6(cfg) -> None: + if not cfg.addr_v["6"]: + return + + cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"]) + cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["6"]) + cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote) + +def _test_tcp(cfg) -> None: + cfg.require_cmd("socat", local=False, remote=True) + + port = rand_port() + listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT" + + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536)) + with bkg(listen_cmd, exit_wait=True) as nc: + wait_port_listen(port) + + cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}", + shell=True, host=cfg.remote) + ksft_eq(nc.stdout.strip(), test_string) + + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536)) + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: + wait_port_listen(port, host=cfg.remote) + + cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) + ksft_eq(nc.stdout.strip(), test_string) + +def _schedule_checksum_reset(cfg, netnl) -> None: + features = ethtool(f"-k {cfg.ifname}", json=True) + setting = "" + for side in ["tx", "rx"]: + f = features[0][side + "-checksumming"] + if not f["fixed"]: + setting += " " + side + setting += " " + ("on" if f["requested"] or f["active"] else "off") + defer(ethtool, f" -K {cfg.ifname} " + setting) + +def _set_offload_checksum(cfg, netnl, on) -> None: + try: + ethtool(f" -K {cfg.ifname} rx {on} tx {on} ") + except: + return + +def _set_xdp_generic_sb_on(cfg) -> None: + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) + defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off") + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_generic_mb_on(cfg) -> None: + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) + ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off") + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_native_sb_on(cfg) -> None: + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") + xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] + if xdp_info['xdp']['mode'] != 1: + """ + If the interface doesn't support native-mode, it falls back to generic mode. + The mode value 1 is native and 2 is generic. + So it raises an exception if mode is not 1(native mode). + """ + raise KsftSkipEx('device does not support native-XDP') + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_native_mb_on(cfg) -> None: + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) + try: + cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") + except Exception as e: + raise KsftSkipEx('device does not support native-multi-buffer XDP') + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_offload_on(cfg) -> None: + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) + try: + cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True) + except Exception as e: + raise KsftSkipEx('device does not support offloaded XDP') + defer(ip, f"link set dev {cfg.ifname} xdpoffload off") + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) + + if no_sleep != True: + time.sleep(10) + +def get_interface_info(cfg) -> None: + global no_sleep + + if cfg.remote_ifname == "": + raise KsftFailEx('Can not get remote interface') + local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] + if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim": + no_sleep=True + if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth": + no_sleep=True + +def set_interface_init(cfg) -> None: + cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) + +def test_default_v4(cfg, netnl) -> None: + cfg.require_ipver("4") + + _schedule_checksum_reset(cfg, netnl) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_tcp(cfg) + +def test_default_v6(cfg, netnl) -> None: + cfg.require_ipver("6") + + _schedule_checksum_reset(cfg, netnl) + _set_offload_checksum(cfg, netnl, "off") + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_generic_sb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) + _set_xdp_generic_sb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_generic_mb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) + _set_xdp_generic_mb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_native_sb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) + _set_xdp_native_sb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_native_mb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) + _set_xdp_native_mb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_offload(cfg, netnl) -> None: + _set_xdp_offload_on(cfg) + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + get_interface_info(cfg) + set_interface_init(cfg) + ksft_run([test_default_v4, + test_default_v6, + test_xdp_generic_sb, + test_xdp_generic_mb, + test_xdp_native_sb, + test_xdp_native_mb, + test_xdp_offload], + args=(cfg, EthtoolFamily())) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py new file mode 100755 index 000000000000..06559ef49b9a --- /dev/null +++ b/tools/testing/selftests/drivers/net/psp.py @@ -0,0 +1,640 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Test suite for PSP capable drivers.""" + +import errno +import fcntl +import socket +import struct +import termios +import time + +from lib.py import defer +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_true, ksft_eq, ksft_ne, ksft_gt, ksft_raises +from lib.py import ksft_not_none +from lib.py import KsftSkipEx +from lib.py import NetDrvEpEnv, PSPFamily, NlError +from lib.py import bkg, rand_port, wait_port_listen + + +def _get_outq(s): + one = b'\0' * 4 + outq = fcntl.ioctl(s.fileno(), termios.TIOCOUTQ, one) + return struct.unpack("I", outq)[0] + + +def _send_with_ack(cfg, msg): + cfg.comm_sock.send(msg) + response = cfg.comm_sock.recv(4) + if response != b'ack\0': + raise RuntimeError("Unexpected server response", response) + + +def _remote_read_len(cfg): + cfg.comm_sock.send(b'read len\0') + return int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8')) + + +def _make_clr_conn(cfg, ipver=None): + _send_with_ack(cfg, b'conn clr\0') + remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr + s = socket.create_connection((remote_addr, cfg.comm_port), ) + return s + + +def _make_psp_conn(cfg, version=0, ipver=None): + _send_with_ack(cfg, b'conn psp\0' + struct.pack('BB', version, version)) + remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr + s = socket.create_connection((remote_addr, cfg.comm_port), ) + return s + + +def _close_conn(cfg, s): + _send_with_ack(cfg, b'data close\0') + s.close() + + +def _close_psp_conn(cfg, s): + _close_conn(cfg, s) + + +def _spi_xchg(s, rx): + s.send(struct.pack('I', rx['spi']) + rx['key']) + tx = s.recv(4 + len(rx['key'])) + return { + 'spi': struct.unpack('I', tx[:4])[0], + 'key': tx[4:] + } + + +def _send_careful(cfg, s, rounds): + data = b'0123456789' * 200 + for i in range(rounds): + n = 0 + for _ in range(10): # allow 10 retries + try: + n += s.send(data[n:], socket.MSG_DONTWAIT) + if n == len(data): + break + except BlockingIOError: + time.sleep(0.05) + else: + rlen = _remote_read_len(cfg) + outq = _get_outq(s) + report = f'sent: {i * len(data) + n} remote len: {rlen} outq: {outq}' + raise RuntimeError(report) + + return len(data) * rounds + + +def _check_data_rx(cfg, exp_len): + read_len = -1 + for _ in range(30): + cfg.comm_sock.send(b'read len\0') + read_len = int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8')) + if read_len == exp_len: + break + time.sleep(0.01) + ksft_eq(read_len, exp_len) + + +def _check_data_outq(s, exp_len, force_wait=False): + outq = 0 + for _ in range(10): + outq = _get_outq(s) + if not force_wait and outq == exp_len: + break + time.sleep(0.01) + ksft_eq(outq, exp_len) + + +def _get_stat(cfg, key): + return cfg.pspnl.get_stats({'dev-id': cfg.psp_dev_id})[key] + +# +# Test case boiler plate +# + +def _init_psp_dev(cfg): + if not hasattr(cfg, 'psp_dev_id'): + # Figure out which local device we are testing against + for dev in cfg.pspnl.dev_get({}, dump=True): + if dev['ifindex'] == cfg.ifindex: + cfg.psp_info = dev + cfg.psp_dev_id = cfg.psp_info['id'] + break + else: + raise KsftSkipEx("No PSP devices found") + + # Enable PSP if necessary + cap = cfg.psp_info['psp-versions-cap'] + ena = cfg.psp_info['psp-versions-ena'] + if cap != ena: + cfg.pspnl.dev_set({'id': cfg.psp_dev_id, 'psp-versions-ena': cap}) + defer(cfg.pspnl.dev_set, {'id': cfg.psp_dev_id, + 'psp-versions-ena': ena }) + +# +# Test cases +# + +def dev_list_devices(cfg): + """ Dump all devices """ + _init_psp_dev(cfg) + + devices = cfg.pspnl.dev_get({}, dump=True) + + found = False + for dev in devices: + found |= dev['id'] == cfg.psp_dev_id + ksft_true(found) + + +def dev_get_device(cfg): + """ Get the device we intend to use """ + _init_psp_dev(cfg) + + dev = cfg.pspnl.dev_get({'id': cfg.psp_dev_id}) + ksft_eq(dev['id'], cfg.psp_dev_id) + + +def dev_get_device_bad(cfg): + """ Test getting device which doesn't exist """ + raised = False + try: + cfg.pspnl.dev_get({'id': 1234567}) + except NlError as e: + ksft_eq(e.nl_msg.error, -errno.ENODEV) + raised = True + ksft_true(raised) + + +def dev_rotate(cfg): + """ Test key rotation """ + _init_psp_dev(cfg) + + prev_rotations = _get_stat(cfg, 'key-rotations') + + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + ksft_eq(rot['id'], cfg.psp_dev_id) + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + ksft_eq(rot['id'], cfg.psp_dev_id) + + cur_rotations = _get_stat(cfg, 'key-rotations') + ksft_eq(cur_rotations, prev_rotations + 2) + + +def dev_rotate_spi(cfg): + """ Test key rotation and SPI check """ + _init_psp_dev(cfg) + + top_a = top_b = 0 + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc_a = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + top_a = assoc_a['rx-key']['spi'] >> 31 + s.close() + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + ksft_eq(rot['id'], cfg.psp_dev_id) + assoc_b = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + top_b = assoc_b['rx-key']['spi'] >> 31 + s.close() + ksft_ne(top_a, top_b) + + +def assoc_basic(cfg): + """ Test creating associations """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + ksft_gt(assoc['rx-key']['spi'], 0) + ksft_eq(len(assoc['rx-key']['key']), 16) + + assoc = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + ksft_eq(len(assoc), 0) + s.close() + + +def assoc_bad_dev(cfg): + """ Test creating associations with bad device ID """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENODEV) + + +def assoc_sk_only_conn(cfg): + """ Test creating associations based on socket """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + _close_conn(cfg, s) + + +def assoc_sk_only_mismatch(cfg): + """ Test creating associations based on socket (dev mismatch) """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_sk_only_mismatch_tx(cfg): + """ Test creating associations based on socket (dev mismatch) """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + with ksft_raises(NlError) as cm: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": assoc['rx-key'], + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_sk_only_unconn(cfg): + """ Test creating associations based on socket (unconnected, should fail) """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['miss-type'], "dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_version_mismatch(cfg): + """ Test creating associations where Rx and Tx PSP versions do not match """ + _init_psp_dev(cfg) + + versions = list(cfg.psp_info['psp-versions-cap']) + if len(versions) < 2: + raise KsftSkipEx("Not enough PSP versions supported by the device for the test") + + # Translate versions to integers + versions = [cfg.pspnl.consts["version"].entries[v].value for v in versions] + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + rx = cfg.pspnl.rx_assoc({"version": versions[0], + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + + for version in versions[1:]: + with ksft_raises(NlError) as cm: + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": rx['rx-key'], + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_twice(cfg): + """ Test reusing Tx assoc for two sockets """ + _init_psp_dev(cfg) + + def rx_assoc_check(s): + assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + ksft_gt(assoc['rx-key']['spi'], 0) + ksft_eq(len(assoc['rx-key']['key']), 16) + + return assoc + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc = rx_assoc_check(s) + tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + ksft_eq(len(tx), 0) + + # Use the same Tx assoc second time + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s2: + rx_assoc_check(s2) + tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s2.fileno()}) + ksft_eq(len(tx), 0) + + s.close() + + +def _data_basic_send(cfg, version, ipver): + """ Test basic data send """ + _init_psp_dev(cfg) + + # Version 0 is required by spec, don't let it skip + if version: + name = cfg.pspnl.consts["version"].entries_by_val[version].name + if name not in cfg.psp_info['psp-versions-cap']: + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": version, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP) + raise KsftSkipEx("PSP version not supported", name) + + s = _make_psp_conn(cfg, version, ipver) + + rx_assoc = cfg.pspnl.rx_assoc({"version": version, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _close_psp_conn(cfg, s) + + +def __bad_xfer_do(cfg, s, tx, version='hdr0-aes-gcm-128'): + # Make sure we accept the ACK for the SPI before we seal with the bad assoc + _check_data_outq(s, 0) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 20) + _check_data_outq(s, data_len, force_wait=True) + _check_data_rx(cfg, 0) + _close_psp_conn(cfg, s) + + +def data_send_bad_key(cfg): + """ Test send data with bad key """ + _init_psp_dev(cfg) + + s = _make_psp_conn(cfg) + + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + tx['key'] = (tx['key'][0] ^ 0xff).to_bytes(1, 'little') + tx['key'][1:] + __bad_xfer_do(cfg, s, tx) + + +def data_send_disconnect(cfg): + """ Test socket close after sending data """ + _init_psp_dev(cfg) + + with _make_psp_conn(cfg) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + tx = _spi_xchg(s, assoc['rx-key']) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + + s.shutdown(socket.SHUT_RDWR) + s.close() + + +def _data_mss_adjust(cfg, ipver): + _init_psp_dev(cfg) + + # First figure out what the MSS would be without any adjustments + s = _make_clr_conn(cfg, ipver) + s.send(b"0123456789abcdef" * 1024) + _check_data_rx(cfg, 16 * 1024) + mss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + _close_conn(cfg, s) + + s = _make_psp_conn(cfg, 0, ipver) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + rxmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, rxmss) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, txmss + 40) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _check_data_outq(s, 0) + + txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, txmss + 40) + finally: + _close_psp_conn(cfg, s) + + +def data_stale_key(cfg): + """ Test send on a double-rotated key """ + _init_psp_dev(cfg) + + prev_stale = _get_stat(cfg, 'stale-events') + s = _make_psp_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _check_data_outq(s, 0) + + cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + + cur_stale = _get_stat(cfg, 'stale-events') + ksft_gt(cur_stale, prev_stale) + + s.send(b'0123456789' * 200) + _check_data_outq(s, 2000, force_wait=True) + finally: + _close_psp_conn(cfg, s) + + +def __nsim_psp_rereg(cfg): + # The PSP dev ID will change, remember what was there before + before = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)]) + + cfg._ns.nsims[0].dfs_write('psp_rereg', '1') + + after = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)]) + + new_devs = list(after - before) + ksft_eq(len(new_devs), 1) + cfg.psp_dev_id = list(after - before)[0] + + +def removal_device_rx(cfg): + """ Test removing a netdev / PSD with active Rx assoc """ + + # We could technically devlink reload real devices, too + # but that kills the control socket. So test this on + # netdevsim only for now + cfg.require_nsim() + + s = _make_clr_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_not_none(rx_assoc) + + __nsim_psp_rereg(cfg) + finally: + _close_conn(cfg, s) + + +def removal_device_bi(cfg): + """ Test removing a netdev / PSD with active Rx/Tx assoc """ + + # We could technically devlink reload real devices, too + # but that kills the control socket. So test this on + # netdevsim only for now + cfg.require_nsim() + + s = _make_clr_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": rx_assoc['rx-key'], + "sock-fd": s.fileno()}) + __nsim_psp_rereg(cfg) + finally: + _close_conn(cfg, s) + + +def psp_ip_ver_test_builder(name, test_func, psp_ver, ipver): + """Build test cases for each combo of PSP version and IP version""" + def test_case(cfg): + cfg.require_ipver(ipver) + test_case.__name__ = f"{name}_v{psp_ver}_ip{ipver}" + test_func(cfg, psp_ver, ipver) + return test_case + + +def ipver_test_builder(name, test_func, ipver): + """Build test cases for each IP version""" + def test_case(cfg): + cfg.require_ipver(ipver) + test_case.__name__ = f"{name}_ip{ipver}" + test_func(cfg, ipver) + return test_case + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + cfg.pspnl = PSPFamily() + + # Set up responder and communication sock + responder = cfg.remote.deploy("psp_responder") + + cfg.comm_port = rand_port() + srv = None + try: + with bkg(responder + f" -p {cfg.comm_port}", host=cfg.remote, + exit_wait=True) as srv: + wait_port_listen(cfg.comm_port, host=cfg.remote) + + cfg.comm_sock = socket.create_connection((cfg.remote_addr, + cfg.comm_port), + timeout=1) + + cases = [ + psp_ip_ver_test_builder( + "data_basic_send", _data_basic_send, version, ipver + ) + for version in range(0, 4) + for ipver in ("4", "6") + ] + cases += [ + ipver_test_builder("data_mss_adjust", _data_mss_adjust, ipver) + for ipver in ("4", "6") + ] + + ksft_run(cases=cases, globs=globals(), + case_pfx={"dev_", "data_", "assoc_", "removal_"}, + args=(cfg, )) + + cfg.comm_sock.send(b"exit\0") + cfg.comm_sock.close() + finally: + if srv and (srv.stdout or srv.stderr): + ksft_pr("") + ksft_pr(f"Responder logs ({srv.ret}):") + if srv and srv.stdout: + ksft_pr("STDOUT:\n# " + srv.stdout.strip().replace("\n", "\n# ")) + if srv and srv.stderr: + ksft_pr("STDERR:\n# " + srv.stderr.strip().replace("\n", "\n# ")) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/psp_responder.c b/tools/testing/selftests/drivers/net/psp_responder.c new file mode 100644 index 000000000000..f309e0d73cbf --- /dev/null +++ b/tools/testing/selftests/drivers/net/psp_responder.c @@ -0,0 +1,483 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <string.h> +#include <sys/poll.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <netinet/in.h> +#include <unistd.h> + +#include <ynl.h> + +#include "psp-user.h" + +#define dbg(msg...) \ +do { \ + if (opts->verbose) \ + fprintf(stderr, "DEBUG: " msg); \ +} while (0) + +static bool should_quit; + +struct opts { + int port; + int devid; + bool verbose; +}; + +enum accept_cfg { + ACCEPT_CFG_NONE = 0, + ACCEPT_CFG_CLEAR, + ACCEPT_CFG_PSP, +}; + +static struct { + unsigned char tx; + unsigned char rx; +} psp_vers; + +static int conn_setup_psp(struct ynl_sock *ys, struct opts *opts, int data_sock) +{ + struct psp_rx_assoc_rsp *rsp; + struct psp_rx_assoc_req *req; + struct psp_tx_assoc_rsp *tsp; + struct psp_tx_assoc_req *teq; + char info[300]; + int key_len; + ssize_t sz; + __u32 spi; + + dbg("create PSP connection\n"); + + // Rx assoc alloc + req = psp_rx_assoc_req_alloc(); + + psp_rx_assoc_req_set_sock_fd(req, data_sock); + psp_rx_assoc_req_set_version(req, psp_vers.rx); + + rsp = psp_rx_assoc(ys, req); + psp_rx_assoc_req_free(req); + + if (!rsp) { + perror("ERROR: failed to Rx assoc"); + return -1; + } + + // SPI exchange + key_len = rsp->rx_key._len.key; + memcpy(info, &rsp->rx_key.spi, sizeof(spi)); + memcpy(&info[sizeof(spi)], rsp->rx_key.key, key_len); + sz = sizeof(spi) + key_len; + + send(data_sock, info, sz, MSG_WAITALL); + psp_rx_assoc_rsp_free(rsp); + + sz = recv(data_sock, info, sz, MSG_WAITALL); + if (sz < 0) { + perror("ERROR: failed to read PSP key from sock"); + return -1; + } + memcpy(&spi, info, sizeof(spi)); + + // Setup Tx assoc + teq = psp_tx_assoc_req_alloc(); + + psp_tx_assoc_req_set_sock_fd(teq, data_sock); + psp_tx_assoc_req_set_version(teq, psp_vers.tx); + psp_tx_assoc_req_set_tx_key_spi(teq, spi); + psp_tx_assoc_req_set_tx_key_key(teq, &info[sizeof(spi)], key_len); + + tsp = psp_tx_assoc(ys, teq); + psp_tx_assoc_req_free(teq); + if (!tsp) { + perror("ERROR: failed to Tx assoc"); + return -1; + } + psp_tx_assoc_rsp_free(tsp); + + return 0; +} + +static void send_ack(int sock) +{ + send(sock, "ack", 4, MSG_WAITALL); +} + +static void send_err(int sock) +{ + send(sock, "err", 4, MSG_WAITALL); +} + +static void send_str(int sock, int value) +{ + char buf[128]; + int ret; + + ret = snprintf(buf, sizeof(buf), "%d", value); + send(sock, buf, ret + 1, MSG_WAITALL); +} + +static void +run_session(struct ynl_sock *ys, struct opts *opts, + int server_sock, int comm_sock) +{ + enum accept_cfg accept_cfg = ACCEPT_CFG_NONE; + struct pollfd pfds[3]; + size_t data_read = 0; + int data_sock = -1; + + while (true) { + bool race_close = false; + int nfds; + + memset(pfds, 0, sizeof(pfds)); + + pfds[0].fd = server_sock; + pfds[0].events = POLLIN; + + pfds[1].fd = comm_sock; + pfds[1].events = POLLIN; + + nfds = 2; + if (data_sock >= 0) { + pfds[2].fd = data_sock; + pfds[2].events = POLLIN; + nfds++; + } + + dbg(" ...\n"); + if (poll(pfds, nfds, -1) < 0) { + perror("poll"); + break; + } + + /* data sock */ + if (pfds[2].revents & POLLIN) { + char buf[8192]; + ssize_t n; + + n = recv(data_sock, buf, sizeof(buf), 0); + if (n <= 0) { + if (n < 0) + perror("data read"); + close(data_sock); + data_sock = -1; + dbg("data sock closed\n"); + } else { + data_read += n; + dbg("data read %zd\n", data_read); + } + } + + /* comm sock */ + if (pfds[1].revents & POLLIN) { + static char buf[4096]; + static ssize_t off; + bool consumed; + ssize_t n; + + n = recv(comm_sock, &buf[off], sizeof(buf) - off, 0); + if (n <= 0) { + if (n < 0) + perror("comm read"); + return; + } + + off += n; + n = off; + +#define __consume(sz) \ + ({ \ + if (n == (sz)) { \ + off = 0; \ + } else { \ + off -= (sz); \ + memmove(buf, &buf[(sz)], off); \ + } \ + }) + +#define cmd(_name) \ + ({ \ + ssize_t sz = sizeof(_name); \ + bool match = n >= sz && !memcmp(buf, _name, sz); \ + \ + if (match) { \ + dbg("command: " _name "\n"); \ + __consume(sz); \ + } \ + consumed |= match; \ + match; \ + }) + + do { + consumed = false; + + if (cmd("read len")) + send_str(comm_sock, data_read); + + if (cmd("data echo")) { + if (data_sock >= 0) + send(data_sock, "echo", 5, + MSG_WAITALL); + else + fprintf(stderr, "WARN: echo but no data sock\n"); + send_ack(comm_sock); + } + if (cmd("data close")) { + if (data_sock >= 0) { + close(data_sock); + data_sock = -1; + send_ack(comm_sock); + } else { + race_close = true; + } + } + if (cmd("conn psp")) { + if (accept_cfg != ACCEPT_CFG_NONE) + fprintf(stderr, "WARN: old conn config still set!\n"); + accept_cfg = ACCEPT_CFG_PSP; + send_ack(comm_sock); + /* next two bytes are versions */ + if (off >= 2) { + memcpy(&psp_vers, buf, 2); + __consume(2); + } else { + fprintf(stderr, "WARN: short conn psp command!\n"); + } + } + if (cmd("conn clr")) { + if (accept_cfg != ACCEPT_CFG_NONE) + fprintf(stderr, "WARN: old conn config still set!\n"); + accept_cfg = ACCEPT_CFG_CLEAR; + send_ack(comm_sock); + } + if (cmd("exit")) + should_quit = true; +#undef cmd + + if (!consumed) { + fprintf(stderr, "WARN: unknown cmd: [%zd] %s\n", + off, buf); + } + } while (consumed && off); + } + + /* server sock */ + if (pfds[0].revents & POLLIN) { + if (data_sock >= 0) { + fprintf(stderr, "WARN: new data sock but old one still here\n"); + close(data_sock); + data_sock = -1; + } + data_sock = accept(server_sock, NULL, NULL); + if (data_sock < 0) { + perror("accept"); + continue; + } + data_read = 0; + + if (accept_cfg == ACCEPT_CFG_CLEAR) { + dbg("new data sock: clear\n"); + /* nothing to do */ + } else if (accept_cfg == ACCEPT_CFG_PSP) { + dbg("new data sock: psp\n"); + conn_setup_psp(ys, opts, data_sock); + } else { + fprintf(stderr, "WARN: new data sock but no config\n"); + } + accept_cfg = ACCEPT_CFG_NONE; + } + + if (race_close) { + if (data_sock >= 0) { + /* indeed, ordering problem, handle the close */ + close(data_sock); + data_sock = -1; + send_ack(comm_sock); + } else { + fprintf(stderr, "WARN: close but no data sock\n"); + send_err(comm_sock); + } + } + } + dbg("session ending\n"); +} + +static int spawn_server(struct opts *opts) +{ + struct sockaddr_in6 addr; + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd < 0) { + perror("can't open socket"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + + addr.sin6_family = AF_INET6; + addr.sin6_addr = in6addr_any; + addr.sin6_port = htons(opts->port); + + if (bind(fd, (struct sockaddr *)&addr, sizeof(addr))) { + perror("can't bind socket"); + return -1; + } + + if (listen(fd, 5)) { + perror("can't listen"); + return -1; + } + + return fd; +} + +static int run_responder(struct ynl_sock *ys, struct opts *opts) +{ + int server_sock, comm; + + server_sock = spawn_server(opts); + if (server_sock < 0) + return 4; + + while (!should_quit) { + comm = accept(server_sock, NULL, NULL); + if (comm < 0) { + perror("accept failed"); + } else { + run_session(ys, opts, server_sock, comm); + close(comm); + } + } + + return 0; +} + +static void usage(const char *name, const char *miss) +{ + if (miss) + fprintf(stderr, "Missing argument: %s\n", miss); + + fprintf(stderr, "Usage: %s -p port [-v] [-d psp-dev-id]\n", name); + exit(EXIT_FAILURE); +} + +static void parse_cmd_opts(int argc, char **argv, struct opts *opts) +{ + int opt; + + while ((opt = getopt(argc, argv, "vp:d:")) != -1) { + switch (opt) { + case 'v': + opts->verbose = 1; + break; + case 'p': + opts->port = atoi(optarg); + break; + case 'd': + opts->devid = atoi(optarg); + break; + default: + usage(argv[0], NULL); + } + } +} + +static int psp_dev_set_ena(struct ynl_sock *ys, __u32 dev_id, __u32 versions) +{ + struct psp_dev_set_req *sreq; + struct psp_dev_set_rsp *srsp; + + fprintf(stderr, "Set PSP enable on device %d to 0x%x\n", + dev_id, versions); + + sreq = psp_dev_set_req_alloc(); + + psp_dev_set_req_set_id(sreq, dev_id); + psp_dev_set_req_set_psp_versions_ena(sreq, versions); + + srsp = psp_dev_set(ys, sreq); + psp_dev_set_req_free(sreq); + if (!srsp) + return 10; + + psp_dev_set_rsp_free(srsp); + return 0; +} + +int main(int argc, char **argv) +{ + struct psp_dev_get_list *dev_list; + bool devid_found = false; + __u32 ver_ena, ver_cap; + struct opts opts = {}; + struct ynl_error yerr; + struct ynl_sock *ys; + int first_id = 0; + int ret; + + parse_cmd_opts(argc, argv, &opts); + if (!opts.port) + usage(argv[0], "port"); // exits + + ys = ynl_sock_create(&ynl_psp_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return 1; + } + + dev_list = psp_dev_get_dump(ys); + if (ynl_dump_empty(dev_list)) { + if (ys->err.code) + goto err_close; + fprintf(stderr, "No PSP devices\n"); + goto err_close_silent; + } + + ynl_dump_foreach(dev_list, d) { + if (opts.devid) { + devid_found = true; + ver_ena = d->psp_versions_ena; + ver_cap = d->psp_versions_cap; + } else if (!first_id) { + first_id = d->id; + ver_ena = d->psp_versions_ena; + ver_cap = d->psp_versions_cap; + } else { + fprintf(stderr, "Multiple PSP devices found\n"); + goto err_close_silent; + } + } + psp_dev_get_list_free(dev_list); + + if (opts.devid && !devid_found) { + fprintf(stderr, "PSP device %d requested on cmdline, not found\n", + opts.devid); + goto err_close_silent; + } else if (!opts.devid) { + opts.devid = first_id; + } + + if (ver_ena != ver_cap) { + ret = psp_dev_set_ena(ys, opts.devid, ver_cap); + if (ret) + goto err_close; + } + + ret = run_responder(ys, &opts); + + if (ver_ena != ver_cap && psp_dev_set_ena(ys, opts.devid, ver_ena)) + fprintf(stderr, "WARN: failed to set the PSP versions back\n"); + + ynl_sock_destroy(ys); + + return ret; + +err_close: + fprintf(stderr, "YNL: %s\n", ys->err.msg); +err_close_silent: + ynl_sock_destroy(ys); + return 2; +} diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py new file mode 100755 index 000000000000..236005290a33 --- /dev/null +++ b/tools/testing/selftests/drivers/net/queues.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_disruptive, ksft_exit, ksft_run +from lib.py import ksft_eq, ksft_not_in, ksft_raises, KsftSkipEx, KsftFailEx +from lib.py import EthtoolFamily, NetdevFamily, NlError +from lib.py import NetDrvEnv +from lib.py import bkg, cmd, defer, ip +import errno +import glob +import os +import socket +import struct + +def sys_get_queues(ifname, qtype='rx') -> int: + folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*') + return len(folders) + + +def nl_get_queues(cfg, nl, qtype='rx'): + queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) + if queues: + return len([q for q in queues if q['type'] == qtype]) + return None + + +def check_xsk(cfg, nl, xdp_queue_id=0) -> None: + # Probe for support + xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False) + if xdp.ret == 255: + raise KsftSkipEx('AF_XDP unsupported') + elif xdp.ret > 0: + raise KsftFailEx('unable to create AF_XDP socket') + + with bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}', + ksft_wait=3): + + rx = tx = False + + queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) + if not queues: + raise KsftSkipEx("Netlink reports no queues") + + for q in queues: + if q['id'] == 0: + if q['type'] == 'rx': + rx = True + if q['type'] == 'tx': + tx = True + + ksft_eq(q.get('xsk', None), {}, + comment="xsk attr on queue we configured") + else: + ksft_not_in('xsk', q, + comment="xsk attr on queue we didn't configure") + + ksft_eq(rx, True) + ksft_eq(tx, True) + + +def get_queues(cfg, nl) -> None: + snl = NetdevFamily(recv_size=4096) + + for qtype in ['rx', 'tx']: + queues = nl_get_queues(cfg, snl, qtype) + if not queues: + raise KsftSkipEx('queue-get not supported by device') + + expected = sys_get_queues(cfg.dev['ifname'], qtype) + ksft_eq(queues, expected) + + +def addremove_queues(cfg, nl) -> None: + queues = nl_get_queues(cfg, nl) + if not queues: + raise KsftSkipEx('queue-get not supported by device') + + curr_queues = sys_get_queues(cfg.dev['ifname']) + if curr_queues == 1: + raise KsftSkipEx('cannot decrement queue: already at 1') + + netnl = EthtoolFamily() + channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + rx_type = 'rx' + if channels.get('combined-count', 0) > 0: + rx_type = 'combined' + + expected = curr_queues - 1 + cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10) + queues = nl_get_queues(cfg, nl) + ksft_eq(queues, expected) + + expected = curr_queues + cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10) + queues = nl_get_queues(cfg, nl) + ksft_eq(queues, expected) + + +@ksft_disruptive +def check_down(cfg, nl) -> None: + # Check the NAPI IDs before interface goes down and hides them + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + + ip(f"link set dev {cfg.dev['ifname']} down") + defer(ip, f"link set dev {cfg.dev['ifname']} up") + + with ksft_raises(NlError) as cm: + nl.queue_get({'ifindex': cfg.ifindex, 'id': 0, 'type': 'rx'}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT) + + if napis: + with ksft_raises(NlError) as cm: + nl.napi_get({'id': napis[0]['id']}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT) + + +def main() -> None: + with NetDrvEnv(__file__, queue_count=100) as cfg: + ksft_run([get_queues, addremove_queues, check_down, check_xsk], + args=(cfg, NetdevFamily())) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/ring_reconfig.py b/tools/testing/selftests/drivers/net/ring_reconfig.py new file mode 100755 index 000000000000..f9530a8b0856 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ring_reconfig.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test channel and ring size configuration via ethtool (-L / -G). +""" + +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_eq +from lib.py import NetDrvEpEnv, EthtoolFamily, GenerateTraffic +from lib.py import defer, NlError + + +def channels(cfg) -> None: + """ + Twiddle channel counts in various combinations of parameters. + We're only looking for driver adhering to the requested config + if the config is accepted and crashes. + """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + chans = cfg.eth.channels_get(ehdr) + + all_keys = ["rx", "tx", "combined"] + mixes = [{"combined"}, {"rx", "tx"}, {"rx", "combined"}, {"tx", "combined"}, + {"rx", "tx", "combined"},] + + # Get the set of keys that device actually supports + restore = {} + supported = set() + for key in all_keys: + if key + "-max" in chans: + supported.add(key) + restore |= {key + "-count": chans[key + "-count"]} + + defer(cfg.eth.channels_set, ehdr | restore) + + def test_config(config): + try: + cfg.eth.channels_set(ehdr | config) + get = cfg.eth.channels_get(ehdr) + for k, v in config.items(): + ksft_eq(get.get(k, 0), v) + except NlError as e: + failed.append(mix) + ksft_pr("Can't set", config, e) + else: + ksft_pr("Okay", config) + + failed = [] + for mix in mixes: + if not mix.issubset(supported): + continue + + # Set all the values in the mix to 1, other supported to 0 + config = {} + for key in all_keys: + config[key + "-count"] = 1 if key in mix else 0 + test_config(config) + + for mix in mixes: + if not mix.issubset(supported): + continue + if mix in failed: + continue + + # Set all the values in the mix to max, other supported to 0 + config = {} + for key in all_keys: + config[key + "-count"] = chans[key + '-max'] if key in mix else 0 + test_config(config) + + +def _configure_min_ring_cnt(cfg) -> None: + """ Try to configure a single Rx/Tx ring. """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + chans = cfg.eth.channels_get(ehdr) + + all_keys = ["rx-count", "tx-count", "combined-count"] + restore = {} + config = {} + for key in all_keys: + if key in chans: + restore[key] = chans[key] + config[key] = 0 + + if chans.get('combined-count', 0) > 1: + config['combined-count'] = 1 + elif chans.get('rx-count', 0) > 1 and chans.get('tx-count', 0) > 1: + config['tx-count'] = 1 + config['rx-count'] = 1 + else: + # looks like we're already on 1 channel + return + + cfg.eth.channels_set(ehdr | config) + defer(cfg.eth.channels_set, ehdr | restore) + + +def ringparam(cfg) -> None: + """ + Tweak the ringparam configuration. Try to run some traffic over min + ring size to make sure it actually functions. + """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + rings = cfg.eth.rings_get(ehdr) + + restore = {} + maxes = {} + params = set() + for key in rings.keys(): + if 'max' in key: + param = key[:-4] + maxes[param] = rings[key] + params.add(param) + restore[param] = rings[param] + + defer(cfg.eth.rings_set, ehdr | restore) + + # Speed up the reconfig by configuring just one ring + _configure_min_ring_cnt(cfg) + + # Try to reach min on all settings + for param in params: + val = rings[param] + while True: + try: + cfg.eth.rings_set({'header':{'dev-index': cfg.ifindex}, + param: val // 2}) + if val == 0: + break + val //= 2 + except NlError: + break + + get = cfg.eth.rings_get(ehdr) + ksft_eq(get[param], val) + + ksft_pr(f"Reached min for '{param}' at {val} (max {rings[param]})") + + GenerateTraffic(cfg).wait_pkts_and_stop(10000) + + # Try max across all params, if the driver supports large rings + # this may OOM so we ignore errors + try: + ksft_pr("Applying max settings") + config = {p: maxes[p] for p in params} + cfg.eth.rings_set(ehdr | config) + except NlError as e: + ksft_pr("Can't set max params", config, e) + else: + GenerateTraffic(cfg).wait_pkts_and_stop(10000) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + cfg.eth = EthtoolFamily() + + ksft_run([channels, + ringparam], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/shaper.py b/tools/testing/selftests/drivers/net/shaper.py new file mode 100755 index 000000000000..11310f19bfa0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/shaper.py @@ -0,0 +1,461 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_true, KsftSkipEx +from lib.py import EthtoolFamily, NetshaperFamily +from lib.py import NetDrvEnv +from lib.py import NlError +from lib.py import cmd + +def get_shapers(cfg, nl_shaper) -> None: + try: + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + + # Default configuration: no shapers configured. + ksft_eq(len(shapers), 0) + +def get_caps(cfg, nl_shaper) -> None: + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex}, dump=True) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + + # Each device implementing shaper support must support some + # features in at least a scope. + ksft_true(len(caps)> 0) + +def set_qshapers(cfg, nl_shaper) -> None: + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'queue'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-bw-max' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support queue scope shapers with bw_max and metric bps") + + cfg.queues = True; + netnl = EthtoolFamily() + channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + cfg.rx_type = 'rx' + cfg.nr_queues = channels['rx-count'] + else: + cfg.rx_type = 'combined' + cfg.nr_queues = channels['combined-count'] + if cfg.nr_queues < 3: + raise KsftSkipEx(f"device does not support enough queues min 3 found {cfg.nr_queues}") + + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 10000}) + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}, + 'metric': 'bps', + 'bw-max': 20000}) + + # Querying a specific shaper not yet configured must fail. + raised = False + try: + shaper_q0 = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 0}}) + except (NlError): + raised = True + ksft_eq(raised, True) + + shaper_q1 = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + ksft_eq(shaper_q1, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 10000}) + + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 10000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 2}, + 'metric': 'bps', + 'bw-max': 20000}]) + +def del_qshapers(cfg, nl_shaper) -> None: + if not cfg.queues: + raise KsftSkipEx("queue shapers not supported by device, skipping delete") + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}}) + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def set_nshapers(cfg, nl_shaper) -> None: + # Check required features. + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'netdev'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-bw-max' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support nested netdev scope shapers with weight") + + cfg.netdev = True; + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev', 'id': 0}, + 'bw-max': 100000}) + + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}, + 'metric': 'bps', + 'bw-max': 100000}]) + +def del_nshapers(cfg, nl_shaper) -> None: + if not cfg.netdev: + raise KsftSkipEx("netdev shaper not supported by device, skipping delete") + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def basic_groups(cfg, nl_shaper) -> None: + if not cfg.netdev: + raise KsftSkipEx("netdev shaper not supported by the device") + if cfg.nr_queues < 3: + raise KsftSkipEx(f"netdev does not have enough queues min 3 reported {cfg.nr_queues}") + + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'queue'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-weight' in caps: + raise KsftSkipEx("device does not support queue scope shapers with weight") + + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 1}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}], + 'handle': {'scope':'netdev'}, + 'metric': 'bps', + 'bw-max': 10000}) + ksft_eq(node_handle, {'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}}) + + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 1 }) + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}}) + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + + # Deleting all the leaves shaper does not affect the node one + # when the latter has 'netdev' scope. + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 1) + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}}) + +def qgroups(cfg, nl_shaper) -> None: + if cfg.nr_queues < 4: + raise KsftSkipEx(f"netdev does not have enough queues min 4 reported {cfg.nr_queues}") + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'node'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-bw-max' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support node scope shapers with bw_max and metric bps") + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'queue'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-nesting' in caps or not 'support-weight' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support nested queue scope shapers with weight") + + cfg.groups = True; + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 10000}) + node_id = node_handle['handle']['id'] + + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}) + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}, + 'parent': {'scope': 'netdev'}, + 'metric': 'bps', + 'bw-max': 10000}) + + # Grouping to a specified, not existing node scope shaper must fail + raised = False + try: + nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 3}, + 'weight': 3}], + 'handle': {'scope':'node', 'id': node_id + 1}, + 'metric': 'bps', + 'bw-max': 10000}) + + except (NlError): + raised = True + ksft_eq(raised, True) + + # Add to an existing node + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 3}, + 'weight': 4}], + 'handle': {'scope':'node', 'id': node_id}}) + ksft_eq(node_handle, {'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}}) + + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 3}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 4}) + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}}) + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + + # Deleting a non empty node will move the leaves downstream. + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 4}]) + + # Finish and verify the complete cleanup. + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 3}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def delegation(cfg, nl_shaper) -> None: + if not cfg.groups: + raise KsftSkipEx("device does not support node scope") + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'node'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("node scope shapers not supported by the device") + raise + if not 'support-nesting' in caps: + raise KsftSkipEx("device does not support node scope shapers nesting") + + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}, + {'handle': {'scope': 'queue', 'id': 3}, + 'weight': 1}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 10000}) + node_id = node_handle['handle']['id'] + + # Create the nested node and validate the hierarchy + nested_node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 5000}) + nested_node_id = nested_node_handle['handle']['id'] + ksft_true(nested_node_id != node_id) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': nested_node_id}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': nested_node_id}, + 'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 1}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'node', 'id': node_id}, + 'metric': 'bps', + 'bw-max': 10000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'node', 'id': nested_node_id}, + 'metric': 'bps', + 'bw-max': 5000}]) + + # Deleting a non empty node will move the leaves downstream. + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': nested_node_id}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 1}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'node', 'id': node_id}, + 'metric': 'bps', + 'bw-max': 10000}]) + + # Final cleanup. + for i in range(1, 4): + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': i}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def queue_update(cfg, nl_shaper) -> None: + if cfg.nr_queues < 4: + raise KsftSkipEx(f"netdev does not have enough queues min 4 reported {cfg.nr_queues}") + if not cfg.queues: + raise KsftSkipEx("device does not support queue scope") + + for i in range(3): + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': i}, + 'metric': 'bps', + 'bw-max': (i + 1) * 1000}) + # Delete a channel, with no shapers configured on top of the related + # queue: no changes expected + cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} 3", timeout=10) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 0}, + 'metric': 'bps', + 'bw-max': 1000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 2000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 2}, + 'metric': 'bps', + 'bw-max': 3000}]) + + # Delete a channel, with a shaper configured on top of the related + # queue: the shaper must be deleted, too + cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} 2", timeout=10) + + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 0}, + 'metric': 'bps', + 'bw-max': 1000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 2000}]) + + # Restore the original channels number, no expected changes + cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} {cfg.nr_queues}", timeout=10) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 0}, + 'metric': 'bps', + 'bw-max': 1000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 2000}]) + + # Final cleanup. + for i in range(0, 2): + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': i}}) + +def main() -> None: + with NetDrvEnv(__file__, queue_count=4) as cfg: + cfg.queues = False + cfg.netdev = False + cfg.groups = False + cfg.nr_queues = 0 + ksft_run([get_shapers, + get_caps, + set_qshapers, + del_qshapers, + set_nshapers, + del_nshapers, + basic_groups, + qgroups, + delegation, + queue_update], args=(cfg, NetshaperFamily())) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py new file mode 100755 index 000000000000..b08e4d48b15c --- /dev/null +++ b/tools/testing/selftests/drivers/net/stats.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests related to standard netdevice statistics. +""" + +import errno +import subprocess +import time +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import ksft_disruptive +from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError +from lib.py import NetDrvEnv +from lib.py import cmd, ip, defer + +ethnl = EthtoolFamily() +netfam = NetdevFamily() +rtnl = RtnlFamily() + + +def check_pause(cfg) -> None: + """ + Check that drivers which support Pause config also report standard + pause stats. + """ + + try: + ethnl.pause_get({"header": {"dev-index": cfg.ifindex}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("pause not supported by the device") from e + raise + + data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex, + "flags": {'stats'}}}) + ksft_true(data['stats'], "driver does not report stats") + + +def check_fec(cfg) -> None: + """ + Check that drivers which support FEC config also report standard + FEC stats. + """ + + try: + ethnl.fec_get({"header": {"dev-index": cfg.ifindex}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("FEC not supported by the device") from e + raise + + data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex, + "flags": {'stats'}}}) + ksft_true(data['stats'], "driver does not report stats") + + +def check_fec_hist(cfg) -> None: + """ + Check that drivers which support FEC histogram statistics report + reasonable values. + """ + + try: + data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex, + "flags": {'stats'}}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("FEC not supported by the device") from e + raise + if 'stats' not in data: + raise KsftSkipEx("FEC stats not supported by the device") + if 'hist' not in data['stats']: + raise KsftSkipEx("FEC histogram not supported by the device") + + hist = data['stats']['hist'] + for fec_bin in hist: + for key in ['bin-low', 'bin-high', 'bin-val']: + ksft_in(key, fec_bin, + "Drivers should always report FEC bin range and value") + ksft_ge(fec_bin['bin-high'], fec_bin['bin-low'], + "FEC bin range should be valid") + if 'bin-val-per-lane' in fec_bin: + ksft_eq(sum(fec_bin['bin-val-per-lane']), fec_bin['bin-val'], + "FEC bin value should be equal to sum of per-plane values") + + +def pkt_byte_sum(cfg) -> None: + """ + Check that qstat and interface stats match in value. + """ + + def get_qstat(test): + stats = netfam.qstats_get({}, dump=True) + if stats: + for qs in stats: + if qs["ifindex"]== test.ifindex: + return qs + return None + + qstat = get_qstat(cfg) + if qstat is None: + raise KsftSkipEx("qstats not supported by the device") + + for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']: + ksft_in(key, qstat, "Drivers should always report basic keys") + + # Compare stats, rtnl stats and qstats must match, + # but the interface may be up, so do a series of dumps + # each time the more "recent" stats must be higher or same. + def stat_cmp(rstat, qstat): + for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']: + if rstat[key] != qstat[key]: + return rstat[key] - qstat[key] + return 0 + + for _ in range(10): + rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + if stat_cmp(rtstat, qstat) < 0: + raise KsftFailEx("RTNL stats are lower, fetched later") + qstat = get_qstat(cfg) + if stat_cmp(rtstat, qstat) > 0: + raise KsftFailEx("Qstats are lower, fetched later") + + +def qstat_by_ifindex(cfg) -> None: + """ Qstats Netlink API tests - querying by ifindex. """ + + # Construct a map ifindex -> [dump, by-index, dump] + ifindexes = {} + stats = netfam.qstats_get({}, dump=True) + for entry in stats: + ifindexes[entry['ifindex']] = [entry, None, None] + + for ifindex in ifindexes: + entry = netfam.qstats_get({"ifindex": ifindex}, dump=True) + ksft_eq(len(entry), 1) + ifindexes[entry[0]['ifindex']][1] = entry[0] + + stats = netfam.qstats_get({}, dump=True) + for entry in stats: + ifindexes[entry['ifindex']][2] = entry + + if len(ifindexes) == 0: + raise KsftSkipEx("No ifindex supports qstats") + + # Now make sure the stats match/make sense + for ifindex, triple in ifindexes.items(): + all_keys = triple[0].keys() | triple[1].keys() | triple[2].keys() + + for key in all_keys: + ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key) + ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key) + + # Sanity check the dumps + queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True) + # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}} + parsed = {} + for entry in queues: + ifindex = entry["ifindex"] + if ifindex not in parsed: + parsed[ifindex] = {"rx":[], "tx": []} + parsed[ifindex][entry["queue-type"]].append(entry['queue-id']) + # Now, validate + for ifindex, queues in parsed.items(): + for qtype in ['rx', 'tx']: + ksft_eq(len(queues[qtype]), len(set(queues[qtype])), + comment="repeated queue keys") + ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1, + comment="missing queue keys") + + # Test invalid dumps + # 0 is invalid + with ksft_raises(NlError) as cm: + netfam.qstats_get({"ifindex": 0}, dump=True) + ksft_eq(cm.exception.nl_msg.error, -34) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') + + # loopback has no stats + with ksft_raises(NlError) as cm: + netfam.qstats_get({"ifindex": 1}, dump=True) + ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') + + # Try to get stats for lowest unused ifindex but not 0 + devs = rtnl.getlink({}, dump=True) + all_ifindexes = set(dev["ifi-index"] for dev in devs) + lowest = 2 + while lowest in all_ifindexes: + lowest += 1 + + with ksft_raises(NlError) as cm: + netfam.qstats_get({"ifindex": lowest}, dump=True) + ksft_eq(cm.exception.nl_msg.error, -19) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') + + +@ksft_disruptive +def check_down(cfg) -> None: + """ Test statistics (interface and qstat) are not impacted by ifdown """ + + try: + qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("qstats not supported by the device") from e + raise + + ip(f"link set dev {cfg.dev['ifname']} down") + defer(ip, f"link set dev {cfg.dev['ifname']} up") + + qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + for k in qstat: + ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down") + + # exercise per-queue API to make sure that "device down" state + # is handled correctly and doesn't crash + netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True) + + +def __run_inf_loop(body): + body = body.strip() + if body[-1] != ';': + body += ';' + + return subprocess.Popen(f"while true; do {body} done", shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + +def __stats_increase_sanely(old, new) -> None: + for k in old.keys(): + ksft_ge(new[k], old[k]) + ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error") + + +def procfs_hammer(cfg) -> None: + """ + Reading stats via procfs only holds the RCU lock, which is not an exclusive + lock, make sure drivers can handle parallel reads of stats. + """ + one = __run_inf_loop("cat /proc/net/dev") + defer(one.kill) + two = __run_inf_loop("cat /proc/net/dev") + defer(two.kill) + + time.sleep(1) + # Make sure the processes are running + ksft_is(one.poll(), None) + ksft_is(two.poll(), None) + + rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + time.sleep(2) + rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + __stats_increase_sanely(rtstat1, rtstat2) + # defers will kill the loops + + +@ksft_disruptive +def procfs_downup_hammer(cfg) -> None: + """ + Reading stats via procfs only holds the RCU lock, drivers often try + to sleep when reading the stats, or don't protect against races. + """ + # Set a large number of queues, + # we'll flip between min(max_queues, 64) and 1 + channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + rx_type = 'rx' + else: + rx_type = 'combined' + cur_queue_cnt = channels[f'{rx_type}-count'] + max_queue_cnt = min(channels[f'{rx_type}-max'], 64) + + cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}") + defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}") + + # Real test stats + stats = __run_inf_loop("cat /proc/net/dev") + defer(stats.kill) + + ipset = f"ip link set dev {cfg.ifname}" + defer(ip, f"link set dev {cfg.ifname} up") + # The "echo -n 1" lets us count iterations below + updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \ + f"ethtool -L {cfg.ifname} {rx_type} 1; " + \ + f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \ + "echo -n 1" + updown = __run_inf_loop(updown) + kill_updown = defer(updown.kill) + + time.sleep(1) + # Make sure the processes are running + ksft_is(stats.poll(), None) + ksft_is(updown.poll(), None) + + rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + # We're looking for crashes, give it extra time + time.sleep(9) + rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + __stats_increase_sanely(rtstat1, rtstat2) + + kill_updown.exec() + stdout, _ = updown.communicate(timeout=5) + ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8'))) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, queue_count=100) as cfg: + ksft_run([check_pause, check_fec, check_fec_hist, pkt_byte_sum, + qstat_by_ifindex, check_down, procfs_hammer, + procfs_downup_hammer], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile new file mode 100644 index 000000000000..1340b3df9c31 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for net selftests + +TEST_PROGS := \ + dev_addr_lists.sh \ + options.sh \ + propagation.sh \ +# end of TEST_PROGS + +TEST_INCLUDES := \ + ../bonding/lag_lib.sh \ + ../../../net/forwarding/lib.sh \ + ../../../net/in_netns.sh \ + ../../../net/lib.sh \ + ../../../net/lib/sh/defer.sh \ +# end of TEST_INCLUDES + +include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config new file mode 100644 index 000000000000..558e1d0cf565 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/config @@ -0,0 +1,7 @@ +CONFIG_DUMMY=y +CONFIG_IPV6=y +CONFIG_MACVLAN=y +CONFIG_NETDEVSIM=m +CONFIG_NET_TEAM=y +CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y +CONFIG_NET_TEAM_MODE_LOADBALANCE=y diff --git a/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh new file mode 100755 index 000000000000..b1ec7755b783 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test team device handling of addr lists (dev->uc, mc) +# + +ALL_TESTS=" + team_cleanup +" + +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +source "$lib_dir"/../bonding/lag_lib.sh + + +destroy() +{ + local ifnames=(dummy1 dummy2 team0 mv0) + local ifname + + for ifname in "${ifnames[@]}"; do + ip link del "$ifname" &>/dev/null + done +} + +cleanup() +{ + pre_cleanup + + destroy +} + + +team_cleanup() +{ + RET=0 + + test_LAG_cleanup "team" "lacp" +} + + +require_command teamd + +trap cleanup EXIT + +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/team/options.sh b/tools/testing/selftests/drivers/net/team/options.sh new file mode 100755 index 000000000000..44888f32b513 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/options.sh @@ -0,0 +1,188 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# These tests verify basic set and get functionality of the team +# driver options over netlink. + +# Run in private netns. +test_dir="$(dirname "$0")" +if [[ $# -eq 0 ]]; then + "${test_dir}"/../../../net/in_netns.sh "$0" __subprocess + exit $? +fi + +ALL_TESTS=" + team_test_options +" + +source "${test_dir}/../../../net/lib.sh" + +TEAM_PORT="team0" +MEMBER_PORT="dummy0" + +setup() +{ + ip link add name "${MEMBER_PORT}" type dummy + ip link add name "${TEAM_PORT}" type team +} + +get_and_check_value() +{ + local option_name="$1" + local expected_value="$2" + local port_flag="$3" + + local value_from_get + + if ! value_from_get=$(teamnl "${TEAM_PORT}" getoption "${option_name}" \ + "${port_flag}"); then + echo "Could not get option '${option_name}'" >&2 + return 1 + fi + + if [[ "${value_from_get}" != "${expected_value}" ]]; then + echo "Incorrect value for option '${option_name}'" >&2 + echo "get (${value_from_get}) != set (${expected_value})" >&2 + return 1 + fi +} + +set_and_check_get() +{ + local option_name="$1" + local option_value="$2" + local port_flag="$3" + + local value_from_get + + if ! teamnl "${TEAM_PORT}" setoption "${option_name}" \ + "${option_value}" "${port_flag}"; then + echo "'setoption ${option_name} ${option_value}' failed" >&2 + return 1 + fi + + get_and_check_value "${option_name}" "${option_value}" "${port_flag}" + return $? +} + +# Get a "port flag" to pass to the `teamnl` command. +# E.g. $1="dummy0" -> "port=dummy0", +# $1="" -> "" +get_port_flag() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + echo "--port=${port_name}" + fi +} + +attach_port_if_specified() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + ip link set dev "${port_name}" master "${TEAM_PORT}" + return $? + fi +} + +detach_port_if_specified() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + ip link set dev "${port_name}" nomaster + return $? + fi +} + +# Test that an option's get value matches its set value. +# Globals: +# RET - Used by testing infra like `check_err`. +# EXIT_STATUS - Used by `log_test` for whole script exit value. +# Arguments: +# option_name - The name of the option. +# value_1 - The first value to try setting. +# value_2 - The second value to try setting. +# port_name - The (optional) name of the attached port. +team_test_option() +{ + local option_name="$1" + local value_1="$2" + local value_2="$3" + local possible_values="$2 $3 $2" + local port_name="$4" + local port_flag + + RET=0 + + echo "Setting '${option_name}' to '${value_1}' and '${value_2}'" + + attach_port_if_specified "${port_name}" + check_err $? "Couldn't attach ${port_name} to master" + port_flag=$(get_port_flag "${port_name}") + + # Set and get both possible values. + for value in ${possible_values}; do + set_and_check_get "${option_name}" "${value}" "${port_flag}" + check_err $? "Failed to set '${option_name}' to '${value}'" + done + + detach_port_if_specified "${port_name}" + check_err $? "Couldn't detach ${port_name} from its master" + + log_test "Set + Get '${option_name}' test" +} + +# Test that getting a non-existant option fails. +# Globals: +# RET - Used by testing infra like `check_err`. +# EXIT_STATUS - Used by `log_test` for whole script exit value. +# Arguments: +# option_name - The name of the option. +# port_name - The (optional) name of the attached port. +team_test_get_option_fails() +{ + local option_name="$1" + local port_name="$2" + local port_flag + + RET=0 + + attach_port_if_specified "${port_name}" + check_err $? "Couldn't attach ${port_name} to master" + port_flag=$(get_port_flag "${port_name}") + + # Just confirm that getting the value fails. + teamnl "${TEAM_PORT}" getoption "${option_name}" "${port_flag}" + check_fail $? "Shouldn't be able to get option '${option_name}'" + + detach_port_if_specified "${port_name}" + + log_test "Get '${option_name}' fails" +} + +team_test_options() +{ + # Wrong option name behavior. + team_test_get_option_fails fake_option1 + team_test_get_option_fails fake_option2 "${MEMBER_PORT}" + + # Correct set and get behavior. + team_test_option mode activebackup loadbalance + team_test_option notify_peers_count 0 5 + team_test_option notify_peers_interval 0 5 + team_test_option mcast_rejoin_count 0 5 + team_test_option mcast_rejoin_interval 0 5 + team_test_option enabled true false "${MEMBER_PORT}" + team_test_option user_linkup true false "${MEMBER_PORT}" + team_test_option user_linkup_enabled true false "${MEMBER_PORT}" + team_test_option priority 10 20 "${MEMBER_PORT}" + team_test_option queue_id 0 1 "${MEMBER_PORT}" +} + +require_command teamnl +setup +tests_run +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/team/propagation.sh b/tools/testing/selftests/drivers/net/team/propagation.sh new file mode 100755 index 000000000000..4bea75b79878 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/propagation.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e + +NSIM_LRO_ID=$((256 + RANDOM % 256)) +NSIM_LRO_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_LRO_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device + +cleanup() +{ + set +e + ip link del dummyteam &>/dev/null + ip link del team0 &>/dev/null + echo $NSIM_LRO_ID > $NSIM_DEV_SYS_DEL + modprobe -r netdevsim +} + +# Trigger LRO propagation to the lower. +# https://lore.kernel.org/netdev/aBvOpkIoxcr9PfDg@mini-arch/ +team_lro() +{ + # using netdevsim because it supports NETIF_F_LRO + NSIM_LRO_NAME=$(find $NSIM_LRO_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_LRO_SYS/net -exec basename {} \;) + + ip link add name team0 type team + ip link set $NSIM_LRO_NAME down + ip link set dev $NSIM_LRO_NAME master team0 + ip link set team0 up + ethtool -K team0 large-receive-offload off + + ip link del team0 +} + +# Trigger promisc propagation to the lower during IFLA_MASTER. +# https://lore.kernel.org/netdev/20250506032328.3003050-1-sdf@fomichev.me/ +team_promisc() +{ + ip link add name dummyteam type dummy + ip link add name team0 type team + ip link set dummyteam down + ip link set team0 promisc on + ip link set dev dummyteam master team0 + ip link set team0 up + + ip link del team0 + ip link del dummyteam +} + +# Trigger promisc propagation to the lower via netif_change_flags (aka +# ndo_change_rx_flags). +# https://lore.kernel.org/netdev/20250514220319.3505158-1-stfomichev@gmail.com/ +team_change_flags() +{ + ip link add name dummyteam type dummy + ip link add name team0 type team + ip link set dummyteam down + ip link set dev dummyteam master team0 + ip link set team0 up + ip link set team0 promisc on + + # Make sure we can add more L2 addresses without any issues. + ip link add link team0 address 00:00:00:00:00:01 team0.1 type macvlan + ip link set team0.1 up + + ip link del team0.1 + ip link del team0 + ip link del dummyteam +} + +trap cleanup EXIT +modprobe netdevsim || : +echo $NSIM_LRO_ID > $NSIM_DEV_SYS_NEW +udevadm settle +team_lro +team_promisc +team_change_flags diff --git a/tools/testing/selftests/drivers/net/virtio_net/Makefile b/tools/testing/selftests/drivers/net/virtio_net/Makefile new file mode 100644 index 000000000000..868ece3fea1f --- /dev/null +++ b/tools/testing/selftests/drivers/net/virtio_net/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS = basic_features.sh + +TEST_FILES = virtio_net_common.sh + +TEST_INCLUDES = \ + ../../../net/forwarding/lib.sh \ + ../../../net/lib.sh \ +# end of TEST_INCLUDES + +include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh b/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh new file mode 100755 index 000000000000..cf8cf816ed48 --- /dev/null +++ b/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh @@ -0,0 +1,131 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# See virtio_net_common.sh comments for more details about assumed setup + +ALL_TESTS=" + initial_ping_test + f_mac_test +" + +source virtio_net_common.sh + +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +h1=${NETIFS[p1]} +h2=${NETIFS[p2]} + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +initial_ping_test() +{ + setup_cleanup + setup_prepare + ping_test $h1 $H2_IPV4 " simple" +} + +f_mac_test() +{ + RET=0 + local test_name="mac feature filtered" + + virtio_feature_present $h1 $VIRTIO_NET_F_MAC + if [ $? -ne 0 ]; then + log_test_skip "$test_name" "Device $h1 is missing feature $VIRTIO_NET_F_MAC." + return 0 + fi + virtio_feature_present $h1 $VIRTIO_NET_F_MAC + if [ $? -ne 0 ]; then + log_test_skip "$test_name" "Device $h2 is missing feature $VIRTIO_NET_F_MAC." + return 0 + fi + + setup_cleanup + setup_prepare + + grep -q 0 /sys/class/net/$h1/addr_assign_type + check_err $? "Permanent address assign type for $h1 is not set" + grep -q 0 /sys/class/net/$h2/addr_assign_type + check_err $? "Permanent address assign type for $h2 is not set" + + setup_cleanup + virtio_filter_feature_add $h1 $VIRTIO_NET_F_MAC + virtio_filter_feature_add $h2 $VIRTIO_NET_F_MAC + setup_prepare + + grep -q 0 /sys/class/net/$h1/addr_assign_type + check_fail $? "Permanent address assign type for $h1 is set when F_MAC feature is filtered" + grep -q 0 /sys/class/net/$h2/addr_assign_type + check_fail $? "Permanent address assign type for $h2 is set when F_MAC feature is filtered" + + ping_do $h1 $H2_IPV4 + check_err $? "Ping failed" + + log_test "$test_name" +} + +setup_prepare() +{ + virtio_device_rebind $h1 + virtio_device_rebind $h2 + wait_for_dev $h1 + wait_for_dev $h2 + + vrf_prepare + + h1_create + h2_create +} + +setup_cleanup() +{ + h2_destroy + h1_destroy + + vrf_cleanup + + virtio_filter_features_clear $h1 + virtio_filter_features_clear $h2 + virtio_device_rebind $h1 + virtio_device_rebind $h2 + wait_for_dev $h1 + wait_for_dev $h2 +} + +cleanup() +{ + pre_cleanup + setup_cleanup +} + +check_driver $h1 "virtio_net" +check_driver $h2 "virtio_net" +check_virtio_debugfs $h1 +check_virtio_debugfs $h2 + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/virtio_net/config b/tools/testing/selftests/drivers/net/virtio_net/config new file mode 100644 index 000000000000..bcf7555eaffe --- /dev/null +++ b/tools/testing/selftests/drivers/net/virtio_net/config @@ -0,0 +1,8 @@ +CONFIG_BPF_SYSCALL=y +CONFIG_CGROUP_BPF=y +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=m +CONFIG_VIRTIO_DEBUG=y +CONFIG_VIRTIO_NET=y diff --git a/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh b/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh new file mode 100644 index 000000000000..57bd8055e2e5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This assumes running on a host with two virtio interfaces connected +# back to back. Example script to do such wire-up of tap devices would +# look like this: +# +# ======================================================================================================= +# #!/bin/bash +# +# DEV1="$1" +# DEV2="$2" +# +# sudo tc qdisc add dev $DEV1 clsact +# sudo tc qdisc add dev $DEV2 clsact +# sudo tc filter add dev $DEV1 ingress protocol all pref 1 matchall action mirred egress redirect dev $DEV2 +# sudo tc filter add dev $DEV2 ingress protocol all pref 1 matchall action mirred egress redirect dev $DEV1 +# sudo ip link set $DEV1 up +# sudo ip link set $DEV2 up +# ======================================================================================================= + +REQUIRE_MZ="no" +NETIF_CREATE="no" +NETIF_FIND_DRIVER="virtio_net" +NUM_NETIFS=2 + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +VIRTIO_NET_F_MAC=5 + +virtio_device_get() +{ + local dev=$1; shift + local device_path="/sys/class/net/$dev/device/" + + basename `realpath $device_path` +} + +virtio_device_rebind() +{ + local dev=$1; shift + local device=`virtio_device_get $dev` + + echo "$device" > /sys/bus/virtio/drivers/virtio_net/unbind + echo "$device" > /sys/bus/virtio/drivers/virtio_net/bind +} + +virtio_debugfs_get() +{ + local dev=$1; shift + local device=`virtio_device_get $dev` + + echo /sys/kernel/debug/virtio/$device/ +} + +check_virtio_debugfs() +{ + local dev=$1; shift + local debugfs=`virtio_debugfs_get $dev` + + if [ ! -f "$debugfs/device_features" ] || + [ ! -f "$debugfs/filter_feature_add" ] || + [ ! -f "$debugfs/filter_feature_del" ] || + [ ! -f "$debugfs/filter_features" ] || + [ ! -f "$debugfs/filter_features_clear" ]; then + echo "SKIP: not possible to access debugfs for $dev" + exit $ksft_skip + fi +} + +virtio_feature_present() +{ + local dev=$1; shift + local feature=$1; shift + local debugfs=`virtio_debugfs_get $dev` + + cat $debugfs/device_features |grep "^$feature$" &> /dev/null + return $? +} + +virtio_filter_features_clear() +{ + local dev=$1; shift + local debugfs=`virtio_debugfs_get $dev` + + echo "1" > $debugfs/filter_features_clear +} + +virtio_filter_feature_add() +{ + local dev=$1; shift + local feature=$1; shift + local debugfs=`virtio_debugfs_get $dev` + + echo "$feature" > $debugfs/filter_feature_add +} diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py new file mode 100755 index 000000000000..e54df158dfe9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -0,0 +1,779 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +This file contains tests to verify native XDP support in network drivers. +The tests utilize the BPF program `xdp_native.bpf.o` from the `selftests.net.lib` +directory, with each test focusing on a specific aspect of XDP functionality. +""" +import random +import string +from dataclasses import dataclass +from enum import Enum + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr +from lib.py import KsftNamedVariant, ksft_variants +from lib.py import KsftFailEx, NetDrvEpEnv +from lib.py import EthtoolFamily, NetdevFamily, NlError +from lib.py import bkg, cmd, rand_port, wait_port_listen +from lib.py import ip, bpftool, defer + + +class TestConfig(Enum): + """Enum for XDP configuration options.""" + MODE = 0 # Configures the BPF program for a specific test + PORT = 1 # Port configuration to communicate with the remote host + ADJST_OFFSET = 2 # Tail/Head adjustment offset for extension/shrinking + ADJST_TAG = 3 # Adjustment tag to annotate the start and end of extension + + +class XDPAction(Enum): + """Enum for XDP actions.""" + PASS = 0 # Pass the packet up to the stack + DROP = 1 # Drop the packet + TX = 2 # Route the packet to the remote host + TAIL_ADJST = 3 # Adjust the tail of the packet + HEAD_ADJST = 4 # Adjust the head of the packet + + +class XDPStats(Enum): + """Enum for XDP statistics.""" + RX = 0 # Count of valid packets received for testing + PASS = 1 # Count of packets passed up to the stack + DROP = 2 # Count of packets dropped + TX = 3 # Count of incoming packets routed to the remote host + ABORT = 4 # Count of packets that were aborted + + +@dataclass +class BPFProgInfo: + """Data class to store information about a BPF program.""" + name: str # Name of the BPF program + file: str # BPF program object file + xdp_sec: str = "xdp" # XDP section name (e.g., "xdp" or "xdp.frags") + mtu: int = 1500 # Maximum Transmission Unit, default is 1500 + + +def _exchg_udp(cfg, port, test_string): + """ + Exchanges UDP packets between a local and remote host using the socat tool. + + Args: + cfg: Configuration object containing network settings. + port: Port number to use for the UDP communication. + test_string: String that the remote host will send. + + Returns: + The string received by the test host. + """ + cfg.require_cmd("socat", remote=True) + + rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" + tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + + with bkg(rx_udp_cmd, exit_wait=True) as nc: + wait_port_listen(port, proto="udp") + cmd(tx_udp_cmd, host=cfg.remote, shell=True) + + return nc.stdout.strip() + + +def _test_udp(cfg, port, size=256): + """ + Tests UDP packet exchange between a local and remote host. + + Args: + cfg: Configuration object containing network settings. + port: Port number to use for the UDP communication. + size: The length of the test string to be exchanged, default is 256 characters. + + Returns: + bool: True if the received string matches the sent string, False otherwise. + """ + test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(size)) + recvd_str = _exchg_udp(cfg, port, test_str) + + return recvd_str == test_str + + +def _load_xdp_prog(cfg, bpf_info): + """ + Loads an XDP program onto a network interface. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + + Returns: + dict: A dictionary containing the XDP program ID, name, and associated map IDs. + """ + abs_path = cfg.net_lib_dir / bpf_info.file + prog_info = {} + + cmd(f"ip link set dev {cfg.remote_ifname} mtu {bpf_info.mtu}", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) + + cmd( + f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdpdrv obj {abs_path} sec {bpf_info.xdp_sec}", + shell=True + ) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpdrv off") + + xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] + prog_info["id"] = xdp_info["xdp"]["prog"]["id"] + prog_info["name"] = xdp_info["xdp"]["prog"]["name"] + prog_id = prog_info["id"] + + map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"] + prog_info["maps"] = {} + for map_id in map_ids: + name = bpftool(f"map show id {map_id}", json=True)["name"] + prog_info["maps"][name] = map_id + + return prog_info + + +def format_hex_bytes(value): + """ + Helper function that converts an integer into a formatted hexadecimal byte string. + + Args: + value: An integer representing the number to be converted. + + Returns: + A string representing hexadecimal equivalent of value, with bytes separated by spaces. + """ + hex_str = value.to_bytes(4, byteorder='little', signed=True) + return ' '.join(f'{byte:02x}' for byte in hex_str) + + +def _set_xdp_map(map_name, key, value): + """ + Updates an XDP map with a given key-value pair using bpftool. + + Args: + map_name: The name of the XDP map to update. + key: The key to update in the map, formatted as a hexadecimal string. + value: The value to associate with the key, formatted as a hexadecimal string. + """ + key_formatted = format_hex_bytes(key) + value_formatted = format_hex_bytes(value) + bpftool( + f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}" + ) + + +def _get_stats(xdp_map_id): + """ + Retrieves and formats statistics from an XDP map. + + Args: + xdp_map_id: The ID of the XDP map from which to retrieve statistics. + + Returns: + A dictionary containing formatted packet statistics for various XDP actions. + The keys are based on the XDPStats Enum values. + + Raises: + KsftFailEx: If the stats retrieval fails. + """ + stats_dump = bpftool(f"map dump id {xdp_map_id}", json=True) + if not stats_dump: + raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}") + + stats_formatted = {} + for key in range(0, 5): + val = stats_dump[key]["formatted"]["value"] + if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value: + stats_formatted[XDPStats.RX.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.PASS.value: + stats_formatted[XDPStats.PASS.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value: + stats_formatted[XDPStats.DROP.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value: + stats_formatted[XDPStats.TX.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.ABORT.value: + stats_formatted[XDPStats.ABORT.value] = val + + return stats_formatted + + +def _test_pass(cfg, bpf_info, msg_sz): + """ + Tests the XDP_PASS action by exchanging UDP packets. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + msg_sz: Size of the test message to send. + """ + + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed") + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should not be zero") + ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.PASS.value], "RX and PASS stats mismatch") + + +def test_xdp_native_pass_sb(cfg): + """ + Tests the XDP_PASS action for single buffer case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + _test_pass(cfg, bpf_info, 256) + + +def test_xdp_native_pass_mb(cfg): + """ + Tests the XDP_PASS action for a multi-buff size. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + _test_pass(cfg, bpf_info, 8000) + + +def _test_drop(cfg, bpf_info, msg_sz): + """ + Tests the XDP_DROP action by exchanging UDP packets. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + msg_sz: Size of the test message to send. + """ + + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail") + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should be zero") + ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.DROP.value], "RX and DROP stats mismatch") + + +def test_xdp_native_drop_sb(cfg): + """ + Tests the XDP_DROP action for a signle-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + _test_drop(cfg, bpf_info, 256) + + +def test_xdp_native_drop_mb(cfg): + """ + Tests the XDP_DROP action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + _test_drop(cfg, bpf_info, 8000) + + +def _test_xdp_native_tx(cfg, bpf_info, payload_lens): + """ + Tests the XDP_TX action. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing the BPF program metadata. + payload_lens: Array of packet lengths to send. + """ + cfg.require_cmd("socat", remote=True) + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + expected_pkts = 0 + for payload_len in payload_lens: + test_string = "".join( + random.choice(string.ascii_lowercase) for _ in range(payload_len) + ) + + rx_udp = f"socat -{cfg.addr_ipver} -T 2 " + \ + f"-u UDP-RECV:{port},reuseport STDOUT" + + # Writing zero bytes to stdin gets ignored by socat, + # but with the shut-null flag socat generates a zero sized packet + # when the socket is closed. + tx_cmd_suffix = ",shut-null" if payload_len == 0 else "" + tx_udp = f"echo -n {test_string} | socat -t 2 " + \ + f"-u STDIN UDP:{cfg.baddr}:{port}{tx_cmd_suffix}" + + with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: + wait_port_listen(port, proto="udp", host=cfg.remote) + cmd(tx_udp, host=cfg.remote, shell=True) + + ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") + + expected_pkts += 1 + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + ksft_eq(stats[XDPStats.RX.value], expected_pkts, "RX stats mismatch") + ksft_eq(stats[XDPStats.TX.value], expected_pkts, "TX stats mismatch") + + +def test_xdp_native_tx_sb(cfg): + """ + Tests the XDP_TX action for a single-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + # Ensure there's enough room for an ETH / IP / UDP header + pkt_hdr_len = 42 if cfg.addr_ipver == "4" else 62 + + _test_xdp_native_tx(cfg, bpf_info, [0, 1500 // 2, 1500 - pkt_hdr_len]) + + +def test_xdp_native_tx_mb(cfg): + """ + Tests the XDP_TX action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", + "xdp.frags", 9000) + # The first packet ensures we exercise the fragmented code path. + # And the subsequent 0-sized packet ensures the driver + # reinitializes xdp_buff correctly. + _test_xdp_native_tx(cfg, bpf_info, [8000, 0]) + + +def _validate_res(res, offset_lst, pkt_sz_lst): + """ + Validates the result of a test. + + Args: + res: The result of the test, which should be a dictionary with a "status" key. + + Raises: + KsftFailEx: If the test fails to pass any combination of offset and packet size. + """ + if "status" not in res: + raise KsftFailEx("Missing 'status' key in result dictionary") + + # Validate that not a single case was successful + if res["status"] == "fail": + if res["offset"] == offset_lst[0] and res["pkt_sz"] == pkt_sz_lst[0]: + raise KsftFailEx(f"{res['reason']}") + + # Get the previous offset and packet size to report the successful run + tmp_idx = offset_lst.index(res["offset"]) + prev_offset = offset_lst[tmp_idx - 1] + if tmp_idx == 0: + tmp_idx = pkt_sz_lst.index(res["pkt_sz"]) + prev_pkt_sz = pkt_sz_lst[tmp_idx - 1] + else: + prev_pkt_sz = res["pkt_sz"] + + # Use these values for error reporting + ksft_pr( + f"Failed run: pkt_sz {res['pkt_sz']}, offset {res['offset']}. " + f"Last successful run: pkt_sz {prev_pkt_sz}, offset {prev_offset}. " + f"Reason: {res['reason']}" + ) + + +def _check_for_failures(recvd_str, stats): + """ + Checks for common failures while adjusting headroom or tailroom. + + Args: + recvd_str: The string received from the remote host after sending a test string. + stats: A dictionary containing formatted packet statistics for various XDP actions. + + Returns: + str: A string describing the failure reason if a failure is detected, otherwise None. + """ + + # Any adjustment failure result in an abort hence, we track this counter + if stats[XDPStats.ABORT.value] != 0: + return "Adjustment failed" + + # Since we are using aggregate stats for a single test across all offsets and packet sizes + # we can't use RX stats only to track data exchange failure without taking a previous + # snapshot. An easier way is to simply check for non-zero length of received string. + if len(recvd_str) == 0: + return "Data exchange failed" + + # Check for RX and PASS stats mismatch. Ideally, they should be equal for a successful run + if stats[XDPStats.RX.value] != stats[XDPStats.PASS.value]: + return "RX stats mismatch" + + return None + + +def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst): + """ + Tests the XDP tail adjustment functionality. + + This function loads the appropriate XDP program based on the provided + program name and configures the XDP map for tail adjustment. It then + validates the tail adjustment by sending and receiving UDP packets + with specified packet sizes and offsets. + + Args: + cfg: Configuration object containing network settings. + prog: Name of the XDP program to load. + pkt_sz_lst: List of packet sizes to test. + offset_lst: List of offsets to validate support for tail adjustment. + + Returns: + dict: A dictionary with test status and failure details if applicable. + """ + port = rand_port() + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + prog_info = _load_xdp_prog(cfg, bpf_info) + + # Configure the XDP map for tail adjustment + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + for offset in offset_lst: + tag = format(random.randint(65, 90), "02x") + + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + if offset > 0: + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + + for pkt_sz in pkt_sz_lst: + test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) + recvd_str = _exchg_udp(cfg, port, test_str) + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + failure = _check_for_failures(recvd_str, stats) + if failure is not None: + return { + "status": "fail", + "reason": failure, + "offset": offset, + "pkt_sz": pkt_sz, + } + + # Validate data content based on offset direction + expected_data = None + if offset > 0: + expected_data = test_str + (offset * chr(int(tag, 16))) + else: + expected_data = test_str[0:pkt_sz + offset] + + if recvd_str != expected_data: + return { + "status": "fail", + "reason": "Data mismatch", + "offset": offset, + "pkt_sz": pkt_sz, + } + + return {"status": "pass"} + + +def test_xdp_native_adjst_tail_grow_data(cfg): + """ + Tests the XDP tail adjustment by growing packet data. + + Args: + cfg: Configuration object containing network settings. + """ + pkt_sz_lst = [512, 1024, 2048] + offset_lst = [1, 16, 32, 64, 128, 256] + res = _test_xdp_native_tail_adjst( + cfg, + pkt_sz_lst, + offset_lst, + ) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def test_xdp_native_adjst_tail_shrnk_data(cfg): + """ + Tests the XDP tail adjustment by shrinking packet data. + + Args: + cfg: Configuration object containing network settings. + """ + pkt_sz_lst = [512, 1024, 2048] + offset_lst = [-16, -32, -64, -128, -256] + res = _test_xdp_native_tail_adjst( + cfg, + pkt_sz_lst, + offset_lst, + ) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def get_hds_thresh(cfg): + """ + Retrieves the header data split (HDS) threshold for a network interface. + + Args: + cfg: Configuration object containing network settings. + + Returns: + The HDS threshold value. If the threshold is not supported or an error occurs, + a default value of 1500 is returned. + """ + ethnl = cfg.ethnl + hds_thresh = 1500 + + try: + rings = ethnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if 'hds-thresh' not in rings: + ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}') + return hds_thresh + hds_thresh = rings['hds-thresh'] + except NlError as e: + ksft_pr(f"Failed to get rings: {e}. Using default: {hds_thresh}") + + return hds_thresh + + +def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst): + """ + Tests the XDP head adjustment action for a multi-buffer case. + + Args: + cfg: Configuration object containing network settings. + ethnl: Network namespace or link object (not used in this function). + + This function sets up the packet size and offset lists, then performs + the head adjustment test by sending and receiving UDP packets. + """ + cfg.require_cmd("socat", remote=True) + + prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000)) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + hds_thresh = get_hds_thresh(cfg) + for offset in offset_lst: + for pkt_sz in pkt_sz_lst: + # The "head" buffer must contain at least the Ethernet header + # after we eat into it. We send large-enough packets, but if HDS + # is enabled head will only contain headers. Don't try to eat + # more than 28 bytes (UDPv4 + eth hdr left: (14 + 20 + 8) - 14) + l2_cut_off = 28 if cfg.addr_ipver == 4 else 48 + if pkt_sz > hds_thresh and offset > l2_cut_off: + ksft_pr( + f"Failed run: pkt_sz ({pkt_sz}) > HDS threshold ({hds_thresh}) and " + f"offset {offset} > {l2_cut_off}" + ) + return {"status": "pass"} + + test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) + tag = format(random.randint(65, 90), '02x') + + _set_xdp_map("map_xdp_setup", + TestConfig.ADJST_OFFSET.value, + offset) + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + + recvd_str = _exchg_udp(cfg, port, test_str) + + # Check for failures around adjustment and data exchange + failure = _check_for_failures(recvd_str, _get_stats(prog_info['maps']['map_xdp_stats'])) + if failure is not None: + return { + "status": "fail", + "reason": failure, + "offset": offset, + "pkt_sz": pkt_sz + } + + # Validate data content based on offset direction + expected_data = None + if offset < 0: + expected_data = chr(int(tag, 16)) * (0 - offset) + test_str + else: + expected_data = test_str[offset:] + + if recvd_str != expected_data: + return { + "status": "fail", + "reason": "Data mismatch", + "offset": offset, + "pkt_sz": pkt_sz + } + + return {"status": "pass"} + + +def test_xdp_native_adjst_head_grow_data(cfg): + """ + Tests the XDP headroom growth support. + + Args: + cfg: Configuration object containing network settings. + + This function sets up the packet size and offset lists, then calls the + _test_xdp_native_head_adjst_mb function to perform the actual test. The + test is passed if the headroom is successfully extended for given packet + sizes and offsets. + """ + pkt_sz_lst = [512, 1024, 2048] + + # Negative values result in headroom shrinking, resulting in growing of payload + offset_lst = [-16, -32, -64, -128, -256] + res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def test_xdp_native_adjst_head_shrnk_data(cfg): + """ + Tests the XDP headroom shrinking support. + + Args: + cfg: Configuration object containing network settings. + + This function sets up the packet size and offset lists, then calls the + _test_xdp_native_head_adjst_mb function to perform the actual test. The + test is passed if the headroom is successfully shrunk for given packet + sizes and offsets. + """ + pkt_sz_lst = [512, 1024, 2048] + + # Positive values result in headroom growing, resulting in shrinking of payload + offset_lst = [16, 32, 64, 128, 256] + res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +@ksft_variants([ + KsftNamedVariant("pass", XDPAction.PASS), + KsftNamedVariant("drop", XDPAction.DROP), + KsftNamedVariant("tx", XDPAction.TX), +]) +def test_xdp_native_qstats(cfg, act): + """ + Send 1000 messages. Expect XDP action specified in @act. + Make sure the packets were counted to interface level qstats + (Rx, and Tx if act is TX). + """ + + cfg.require_cmd("socat") + + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, act.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + # Discard the input, but we need a listener to avoid ICMP errors + rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport " + \ + "/dev/null" + # Listener runs on "remote" in case of XDP_TX + rx_host = cfg.remote if act == XDPAction.TX else None + # We want to spew 1000 packets quickly, bash seems to do a good enough job + # Each reopening of the socket gives us a differenot local port (for RSS) + tx_udp = "for _ in `seq 20`; do " \ + f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \ + "for i in `seq 50`; do echo a >&5; done; " \ + "exec 5>&-; done" + + cfg.wait_hw_stats_settle() + # Qstats have more clearly defined semantics than rtnetlink. + # XDP is the "first layer of the stack" so XDP packets should be counted + # as received and sent as if the decision was made in the routing layer. + before = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + with bkg(rx_udp, host=rx_host, exit_wait=True): + wait_port_listen(port, proto="udp", host=rx_host) + cmd(tx_udp, host=cfg.remote, shell=True) + + cfg.wait_hw_stats_settle() + after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + expected_pkts = 1000 + ksft_ge(after['rx-packets'] - before['rx-packets'], expected_pkts) + if act == XDPAction.TX: + ksft_ge(after['tx-packets'] - before['tx-packets'], expected_pkts) + + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch") + if act == XDPAction.TX: + ksft_eq(stats[XDPStats.TX.value], expected_pkts, "XDP TX stats mismatch") + + # Flip the ring count back and forth to make sure the stats from XDP rings + # don't get lost. + chans = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if chans.get('combined-count', 0) > 1: + cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex}, + 'combined-count': 1}) + cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex}, + 'combined-count': chans['combined-count']}) + before = after + after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + ksft_ge(after['rx-packets'], before['rx-packets']) + if act == XDPAction.TX: + ksft_ge(after['tx-packets'], before['tx-packets']) + + +def main(): + """ + Main function to execute the XDP tests. + + This function runs a series of tests to validate the XDP support for + both the single and multi-buffer. It uses the NetDrvEpEnv context + manager to manage the network driver environment and the ksft_run + function to execute the tests. + """ + with NetDrvEpEnv(__file__) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + ksft_run( + [ + test_xdp_native_pass_sb, + test_xdp_native_pass_mb, + test_xdp_native_drop_sb, + test_xdp_native_drop_mb, + test_xdp_native_tx_sb, + test_xdp_native_tx_mb, + test_xdp_native_adjst_tail_grow_data, + test_xdp_native_adjst_tail_shrnk_data, + test_xdp_native_adjst_head_grow_data, + test_xdp_native_adjst_head_shrnk_data, + test_xdp_native_qstats, + ], + args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/ntsync/.gitignore b/tools/testing/selftests/drivers/ntsync/.gitignore new file mode 100644 index 000000000000..848573a3d3ea --- /dev/null +++ b/tools/testing/selftests/drivers/ntsync/.gitignore @@ -0,0 +1 @@ +ntsync diff --git a/tools/testing/selftests/drivers/ntsync/Makefile b/tools/testing/selftests/drivers/ntsync/Makefile new file mode 100644 index 000000000000..dbf2b055c0b2 --- /dev/null +++ b/tools/testing/selftests/drivers/ntsync/Makefile @@ -0,0 +1,7 @@ +# SPDX-LICENSE-IDENTIFIER: GPL-2.0-only +TEST_GEN_PROGS := ntsync + +CFLAGS += $(KHDR_INCLUDES) +LDLIBS += -lpthread + +include ../../lib.mk diff --git a/tools/testing/selftests/drivers/ntsync/config b/tools/testing/selftests/drivers/ntsync/config new file mode 100644 index 000000000000..60539c826d06 --- /dev/null +++ b/tools/testing/selftests/drivers/ntsync/config @@ -0,0 +1 @@ +CONFIG_WINESYNC=y diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c new file mode 100644 index 000000000000..e6a37214aa46 --- /dev/null +++ b/tools/testing/selftests/drivers/ntsync/ntsync.c @@ -0,0 +1,1343 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Various unit tests for the "ntsync" synchronization primitive driver. + * + * Copyright (C) 2021-2022 Elizabeth Figura <zfigura@codeweavers.com> + */ + +#define _GNU_SOURCE +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <pthread.h> +#include <linux/ntsync.h> +#include "kselftest_harness.h" + +static int read_sem_state(int sem, __u32 *count, __u32 *max) +{ + struct ntsync_sem_args args; + int ret; + + memset(&args, 0xcc, sizeof(args)); + ret = ioctl(sem, NTSYNC_IOC_SEM_READ, &args); + *count = args.count; + *max = args.max; + return ret; +} + +#define check_sem_state(sem, count, max) \ + ({ \ + __u32 __count, __max; \ + int ret = read_sem_state((sem), &__count, &__max); \ + EXPECT_EQ(0, ret); \ + EXPECT_EQ((count), __count); \ + EXPECT_EQ((max), __max); \ + }) + +static int release_sem(int sem, __u32 *count) +{ + return ioctl(sem, NTSYNC_IOC_SEM_RELEASE, count); +} + +static int read_mutex_state(int mutex, __u32 *count, __u32 *owner) +{ + struct ntsync_mutex_args args; + int ret; + + memset(&args, 0xcc, sizeof(args)); + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &args); + *count = args.count; + *owner = args.owner; + return ret; +} + +#define check_mutex_state(mutex, count, owner) \ + ({ \ + __u32 __count, __owner; \ + int ret = read_mutex_state((mutex), &__count, &__owner); \ + EXPECT_EQ(0, ret); \ + EXPECT_EQ((count), __count); \ + EXPECT_EQ((owner), __owner); \ + }) + +static int unlock_mutex(int mutex, __u32 owner, __u32 *count) +{ + struct ntsync_mutex_args args; + int ret; + + args.owner = owner; + args.count = 0xdeadbeef; + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_UNLOCK, &args); + *count = args.count; + return ret; +} + +static int read_event_state(int event, __u32 *signaled, __u32 *manual) +{ + struct ntsync_event_args args; + int ret; + + memset(&args, 0xcc, sizeof(args)); + ret = ioctl(event, NTSYNC_IOC_EVENT_READ, &args); + *signaled = args.signaled; + *manual = args.manual; + return ret; +} + +#define check_event_state(event, signaled, manual) \ + ({ \ + __u32 __signaled, __manual; \ + int ret = read_event_state((event), &__signaled, &__manual); \ + EXPECT_EQ(0, ret); \ + EXPECT_EQ((signaled), __signaled); \ + EXPECT_EQ((manual), __manual); \ + }) + +static int wait_objs(int fd, unsigned long request, __u32 count, + const int *objs, __u32 owner, int alert, __u32 *index) +{ + struct ntsync_wait_args args = {0}; + struct timespec timeout; + int ret; + + clock_gettime(CLOCK_MONOTONIC, &timeout); + + args.timeout = timeout.tv_sec * 1000000000 + timeout.tv_nsec; + args.count = count; + args.objs = (uintptr_t)objs; + args.owner = owner; + args.index = 0xdeadbeef; + args.alert = alert; + ret = ioctl(fd, request, &args); + *index = args.index; + return ret; +} + +static int wait_any(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index) +{ + return wait_objs(fd, NTSYNC_IOC_WAIT_ANY, count, objs, owner, 0, index); +} + +static int wait_all(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index) +{ + return wait_objs(fd, NTSYNC_IOC_WAIT_ALL, count, objs, owner, 0, index); +} + +static int wait_any_alert(int fd, __u32 count, const int *objs, + __u32 owner, int alert, __u32 *index) +{ + return wait_objs(fd, NTSYNC_IOC_WAIT_ANY, + count, objs, owner, alert, index); +} + +static int wait_all_alert(int fd, __u32 count, const int *objs, + __u32 owner, int alert, __u32 *index) +{ + return wait_objs(fd, NTSYNC_IOC_WAIT_ALL, + count, objs, owner, alert, index); +} + +TEST(semaphore_state) +{ + struct ntsync_sem_args sem_args; + struct timespec timeout; + __u32 count, index; + int fd, ret, sem; + + clock_gettime(CLOCK_MONOTONIC, &timeout); + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 3; + sem_args.max = 2; + sem = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_EQ(-1, sem); + EXPECT_EQ(EINVAL, errno); + + sem_args.count = 2; + sem_args.max = 2; + sem = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, sem); + check_sem_state(sem, 2, 2); + + count = 0; + ret = release_sem(sem, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, count); + check_sem_state(sem, 2, 2); + + count = 1; + ret = release_sem(sem, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOVERFLOW, errno); + check_sem_state(sem, 2, 2); + + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(sem, 1, 2); + + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(sem, 0, 2); + + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + count = 3; + ret = release_sem(sem, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOVERFLOW, errno); + check_sem_state(sem, 0, 2); + + count = 2; + ret = release_sem(sem, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + check_sem_state(sem, 2, 2); + + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(0, ret); + ret = wait_any(fd, 1, &sem, 123, &index); + EXPECT_EQ(0, ret); + + count = 1; + ret = release_sem(sem, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + check_sem_state(sem, 1, 2); + + count = ~0u; + ret = release_sem(sem, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOVERFLOW, errno); + check_sem_state(sem, 1, 2); + + close(sem); + + close(fd); +} + +TEST(mutex_state) +{ + struct ntsync_mutex_args mutex_args; + __u32 owner, count, index; + struct timespec timeout; + int fd, ret, mutex; + + clock_gettime(CLOCK_MONOTONIC, &timeout); + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + mutex_args.owner = 123; + mutex_args.count = 0; + mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(-1, mutex); + EXPECT_EQ(EINVAL, errno); + + mutex_args.owner = 0; + mutex_args.count = 2; + mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_EQ(-1, mutex); + EXPECT_EQ(EINVAL, errno); + + mutex_args.owner = 123; + mutex_args.count = 2; + mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, mutex); + check_mutex_state(mutex, 2, 123); + + ret = unlock_mutex(mutex, 0, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = unlock_mutex(mutex, 456, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + check_mutex_state(mutex, 2, 123); + + ret = unlock_mutex(mutex, 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, count); + check_mutex_state(mutex, 1, 123); + + ret = unlock_mutex(mutex, 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, count); + check_mutex_state(mutex, 0, 0); + + ret = unlock_mutex(mutex, 123, &count); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + + ret = wait_any(fd, 1, &mutex, 456, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_mutex_state(mutex, 1, 456); + + ret = wait_any(fd, 1, &mutex, 456, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_mutex_state(mutex, 2, 456); + + ret = unlock_mutex(mutex, 456, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, count); + check_mutex_state(mutex, 1, 456); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + owner = 0; + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + owner = 123; + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); + check_mutex_state(mutex, 1, 456); + + owner = 456; + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner); + EXPECT_EQ(0, ret); + + memset(&mutex_args, 0xcc, sizeof(mutex_args)); + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, mutex_args.count); + EXPECT_EQ(0, mutex_args.owner); + + memset(&mutex_args, 0xcc, sizeof(mutex_args)); + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, mutex_args.count); + EXPECT_EQ(0, mutex_args.owner); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, index); + check_mutex_state(mutex, 1, 123); + + owner = 123; + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner); + EXPECT_EQ(0, ret); + + memset(&mutex_args, 0xcc, sizeof(mutex_args)); + ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, mutex_args.count); + EXPECT_EQ(0, mutex_args.owner); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(0, index); + check_mutex_state(mutex, 1, 123); + + close(mutex); + + mutex_args.owner = 0; + mutex_args.count = 0; + mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, mutex); + check_mutex_state(mutex, 0, 0); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_mutex_state(mutex, 1, 123); + + close(mutex); + + mutex_args.owner = 123; + mutex_args.count = ~0u; + mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, mutex); + check_mutex_state(mutex, ~0u, 123); + + ret = wait_any(fd, 1, &mutex, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + close(mutex); + + close(fd); +} + +TEST(manual_event_state) +{ + struct ntsync_event_args event_args; + __u32 index, signaled; + int fd, event, ret; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + event_args.manual = 1; + event_args.signaled = 0; + event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, event); + check_event_state(event, 0, 1); + + signaled = 0xdeadbeef; + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(event, 1, 1); + + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + check_event_state(event, 1, 1); + + ret = wait_any(fd, 1, &event, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_event_state(event, 1, 1); + + signaled = 0xdeadbeef; + ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + check_event_state(event, 0, 1); + + ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(event, 0, 1); + + ret = wait_any(fd, 1, &event, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + + ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + check_event_state(event, 0, 1); + + ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(event, 0, 1); + + close(event); + + close(fd); +} + +TEST(auto_event_state) +{ + struct ntsync_event_args event_args; + __u32 index, signaled; + int fd, event, ret; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + event_args.manual = 0; + event_args.signaled = 1; + event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, event); + + check_event_state(event, 1, 0); + + signaled = 0xdeadbeef; + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + check_event_state(event, 1, 0); + + ret = wait_any(fd, 1, &event, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_event_state(event, 0, 0); + + signaled = 0xdeadbeef; + ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(event, 0, 0); + + ret = wait_any(fd, 1, &event, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + + ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + check_event_state(event, 0, 0); + + ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(event, 0, 0); + + close(event); + + close(fd); +} + +TEST(test_wait_any) +{ + int objs[NTSYNC_MAX_WAIT_COUNT + 1], fd, ret; + struct ntsync_mutex_args mutex_args = {0}; + struct ntsync_sem_args sem_args = {0}; + __u32 owner, index, count, i; + struct timespec timeout; + + clock_gettime(CLOCK_MONOTONIC, &timeout); + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 2; + sem_args.max = 3; + objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[0]); + + mutex_args.owner = 0; + mutex_args.count = 0; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, objs[1]); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 1, 3); + check_mutex_state(objs[1], 0, 0); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 0, 3); + check_mutex_state(objs[1], 0, 0); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, index); + check_sem_state(objs[0], 0, 3); + check_mutex_state(objs[1], 1, 123); + + count = 1; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 0, 3); + check_mutex_state(objs[1], 1, 123); + + ret = wait_any(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, index); + check_sem_state(objs[0], 0, 3); + check_mutex_state(objs[1], 2, 123); + + ret = wait_any(fd, 2, objs, 456, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + owner = 123; + ret = ioctl(objs[1], NTSYNC_IOC_MUTEX_KILL, &owner); + EXPECT_EQ(0, ret); + + ret = wait_any(fd, 2, objs, 456, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + EXPECT_EQ(1, index); + + ret = wait_any(fd, 2, objs, 456, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, index); + + close(objs[1]); + + /* test waiting on the same object twice */ + + count = 2; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + + objs[1] = objs[0]; + ret = wait_any(fd, 2, objs, 456, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 1, 3); + + ret = wait_any(fd, 0, NULL, 456, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + for (i = 1; i < NTSYNC_MAX_WAIT_COUNT + 1; ++i) + objs[i] = objs[0]; + + ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT + 1, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + ret = wait_any(fd, -1, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + close(objs[0]); + + close(fd); +} + +TEST(test_wait_all) +{ + struct ntsync_event_args event_args = {0}; + struct ntsync_mutex_args mutex_args = {0}; + struct ntsync_sem_args sem_args = {0}; + __u32 owner, index, count; + int objs[2], fd, ret; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 2; + sem_args.max = 3; + objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[0]); + + mutex_args.owner = 0; + mutex_args.count = 0; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, objs[1]); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 1, 3); + check_mutex_state(objs[1], 1, 123); + + ret = wait_all(fd, 2, objs, 456, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + check_sem_state(objs[0], 1, 3); + check_mutex_state(objs[1], 1, 123); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 0, 3); + check_mutex_state(objs[1], 2, 123); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + check_sem_state(objs[0], 0, 3); + check_mutex_state(objs[1], 2, 123); + + count = 3; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 2, 3); + check_mutex_state(objs[1], 3, 123); + + owner = 123; + ret = ioctl(objs[1], NTSYNC_IOC_MUTEX_KILL, &owner); + EXPECT_EQ(0, ret); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EOWNERDEAD, errno); + check_sem_state(objs[0], 1, 3); + check_mutex_state(objs[1], 1, 123); + + close(objs[1]); + + event_args.manual = true; + event_args.signaled = true; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, objs[1]); + + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + check_sem_state(objs[0], 0, 3); + check_event_state(objs[1], 1, 1); + + close(objs[1]); + + /* test waiting on the same object twice */ + objs[1] = objs[0]; + ret = wait_all(fd, 2, objs, 123, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + close(objs[0]); + + close(fd); +} + +struct wake_args { + int fd; + int obj; +}; + +struct wait_args { + int fd; + unsigned long request; + struct ntsync_wait_args *args; + int ret; + int err; +}; + +static void *wait_thread(void *arg) +{ + struct wait_args *args = arg; + + args->ret = ioctl(args->fd, args->request, args->args); + args->err = errno; + return NULL; +} + +static __u64 get_abs_timeout(unsigned int ms) +{ + struct timespec timeout; + clock_gettime(CLOCK_MONOTONIC, &timeout); + return (timeout.tv_sec * 1000000000) + timeout.tv_nsec + (ms * 1000000); +} + +static int wait_for_thread(pthread_t thread, unsigned int ms) +{ + struct timespec timeout; + + clock_gettime(CLOCK_REALTIME, &timeout); + timeout.tv_nsec += ms * 1000000; + timeout.tv_sec += (timeout.tv_nsec / 1000000000); + timeout.tv_nsec %= 1000000000; + return pthread_timedjoin_np(thread, NULL, &timeout); +} + +TEST(wake_any) +{ + struct ntsync_event_args event_args = {0}; + struct ntsync_mutex_args mutex_args = {0}; + struct ntsync_wait_args wait_args = {0}; + struct ntsync_sem_args sem_args = {0}; + struct wait_args thread_args; + __u32 count, index, signaled; + int objs[2], fd, ret; + pthread_t thread; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 0; + sem_args.max = 3; + objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[0]); + + mutex_args.owner = 123; + mutex_args.count = 1; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, objs[1]); + + /* test waking the semaphore */ + + wait_args.timeout = get_abs_timeout(1000); + wait_args.objs = (uintptr_t)objs; + wait_args.count = 2; + wait_args.owner = 456; + wait_args.index = 0xdeadbeef; + thread_args.fd = fd; + thread_args.args = &wait_args; + thread_args.request = NTSYNC_IOC_WAIT_ANY; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + count = 1; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + check_sem_state(objs[0], 0, 3); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(0, wait_args.index); + + /* test waking the mutex */ + + /* first grab it again for owner 123 */ + ret = wait_any(fd, 1, &objs[1], 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + wait_args.timeout = get_abs_timeout(1000); + wait_args.owner = 456; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = unlock_mutex(objs[1], 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, count); + + ret = pthread_tryjoin_np(thread, NULL); + EXPECT_EQ(EBUSY, ret); + + ret = unlock_mutex(objs[1], 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, mutex_args.count); + check_mutex_state(objs[1], 1, 456); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + close(objs[1]); + + /* test waking events */ + + event_args.manual = false; + event_args.signaled = false; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, objs[1]); + + wait_args.timeout = get_abs_timeout(1000); + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(objs[1], NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(objs[1], 0, 0); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + wait_args.timeout = get_abs_timeout(1000); + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(objs[1], NTSYNC_IOC_EVENT_PULSE, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(objs[1], 0, 0); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + close(objs[1]); + + event_args.manual = true; + event_args.signaled = false; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, objs[1]); + + wait_args.timeout = get_abs_timeout(1000); + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(objs[1], NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(objs[1], 1, 1); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + ret = ioctl(objs[1], NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + + wait_args.timeout = get_abs_timeout(1000); + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(objs[1], NTSYNC_IOC_EVENT_PULSE, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + check_event_state(objs[1], 0, 1); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(1, wait_args.index); + + /* delete an object while it's being waited on */ + + wait_args.timeout = get_abs_timeout(200); + wait_args.owner = 123; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + close(objs[0]); + close(objs[1]); + + ret = wait_for_thread(thread, 200); + EXPECT_EQ(0, ret); + EXPECT_EQ(-1, thread_args.ret); + EXPECT_EQ(ETIMEDOUT, thread_args.err); + + close(fd); +} + +TEST(wake_all) +{ + struct ntsync_event_args manual_event_args = {0}; + struct ntsync_event_args auto_event_args = {0}; + struct ntsync_mutex_args mutex_args = {0}; + struct ntsync_wait_args wait_args = {0}; + struct ntsync_sem_args sem_args = {0}; + struct wait_args thread_args; + __u32 count, index, signaled; + int objs[4], fd, ret; + pthread_t thread; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 0; + sem_args.max = 3; + objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[0]); + + mutex_args.owner = 123; + mutex_args.count = 1; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, objs[1]); + + manual_event_args.manual = true; + manual_event_args.signaled = true; + objs[2] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &manual_event_args); + EXPECT_LE(0, objs[2]); + + auto_event_args.manual = false; + auto_event_args.signaled = true; + objs[3] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &auto_event_args); + EXPECT_EQ(0, objs[3]); + + wait_args.timeout = get_abs_timeout(1000); + wait_args.objs = (uintptr_t)objs; + wait_args.count = 4; + wait_args.owner = 456; + thread_args.fd = fd; + thread_args.args = &wait_args; + thread_args.request = NTSYNC_IOC_WAIT_ALL; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + count = 1; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + + ret = pthread_tryjoin_np(thread, NULL); + EXPECT_EQ(EBUSY, ret); + + check_sem_state(objs[0], 1, 3); + + ret = wait_any(fd, 1, &objs[0], 123, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = unlock_mutex(objs[1], 123, &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, count); + + ret = pthread_tryjoin_np(thread, NULL); + EXPECT_EQ(EBUSY, ret); + + check_mutex_state(objs[1], 0, 0); + + ret = ioctl(objs[2], NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + + count = 2; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, count); + check_sem_state(objs[0], 2, 3); + + ret = ioctl(objs[3], NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, signaled); + + ret = ioctl(objs[2], NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + + ret = ioctl(objs[3], NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, signaled); + + check_sem_state(objs[0], 1, 3); + check_mutex_state(objs[1], 1, 456); + check_event_state(objs[2], 1, 1); + check_event_state(objs[3], 0, 0); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + + /* delete an object while it's being waited on */ + + wait_args.timeout = get_abs_timeout(200); + wait_args.owner = 123; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + close(objs[0]); + close(objs[1]); + close(objs[2]); + close(objs[3]); + + ret = wait_for_thread(thread, 200); + EXPECT_EQ(0, ret); + EXPECT_EQ(-1, thread_args.ret); + EXPECT_EQ(ETIMEDOUT, thread_args.err); + + close(fd); +} + +TEST(alert_any) +{ + struct ntsync_event_args event_args = {0}; + struct ntsync_wait_args wait_args = {0}; + struct ntsync_sem_args sem_args = {0}; + __u32 index, count, signaled; + struct wait_args thread_args; + int objs[2], event, fd, ret; + pthread_t thread; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 0; + sem_args.max = 2; + objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[0]); + + sem_args.count = 1; + sem_args.max = 2; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[1]); + + event_args.manual = true; + event_args.signaled = true; + event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, event); + + ret = wait_any_alert(fd, 0, NULL, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + + ret = wait_any_alert(fd, 0, NULL, 123, event, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + + ret = wait_any_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, index); + + ret = wait_any_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, index); + + /* test wakeup via alert */ + + ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + + wait_args.timeout = get_abs_timeout(1000); + wait_args.objs = (uintptr_t)objs; + wait_args.count = 2; + wait_args.owner = 123; + wait_args.index = 0xdeadbeef; + wait_args.alert = event; + thread_args.fd = fd; + thread_args.args = &wait_args; + thread_args.request = NTSYNC_IOC_WAIT_ANY; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(2, wait_args.index); + + close(event); + + /* test with an auto-reset event */ + + event_args.manual = false; + event_args.signaled = true; + event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, event); + + count = 1; + ret = release_sem(objs[0], &count); + EXPECT_EQ(0, ret); + + ret = wait_any_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = wait_any_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, index); + + ret = wait_any_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + close(event); + + close(objs[0]); + close(objs[1]); + + close(fd); +} + +TEST(alert_all) +{ + struct ntsync_event_args event_args = {0}; + struct ntsync_wait_args wait_args = {0}; + struct ntsync_sem_args sem_args = {0}; + struct wait_args thread_args; + __u32 index, count, signaled; + int objs[2], event, fd, ret; + pthread_t thread; + + fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, fd); + + sem_args.count = 2; + sem_args.max = 2; + objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[0]); + + sem_args.count = 1; + sem_args.max = 2; + objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args); + EXPECT_LE(0, objs[1]); + + event_args.manual = true; + event_args.signaled = true; + event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, event); + + ret = wait_all_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = wait_all_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, index); + + /* test wakeup via alert */ + + ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled); + EXPECT_EQ(0, ret); + + wait_args.timeout = get_abs_timeout(1000); + wait_args.objs = (uintptr_t)objs; + wait_args.count = 2; + wait_args.owner = 123; + wait_args.index = 0xdeadbeef; + wait_args.alert = event; + thread_args.fd = fd; + thread_args.args = &wait_args; + thread_args.request = NTSYNC_IOC_WAIT_ALL; + ret = pthread_create(&thread, NULL, wait_thread, &thread_args); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(ETIMEDOUT, ret); + + ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + + ret = wait_for_thread(thread, 100); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, thread_args.ret); + EXPECT_EQ(2, wait_args.index); + + close(event); + + /* test with an auto-reset event */ + + event_args.manual = false; + event_args.signaled = true; + event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, event); + + count = 2; + ret = release_sem(objs[1], &count); + EXPECT_EQ(0, ret); + + ret = wait_all_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, index); + + ret = wait_all_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(0, ret); + EXPECT_EQ(2, index); + + ret = wait_all_alert(fd, 2, objs, 123, event, &index); + EXPECT_EQ(-1, ret); + EXPECT_EQ(ETIMEDOUT, errno); + + close(event); + + close(objs[0]); + close(objs[1]); + + close(fd); +} + +#define STRESS_LOOPS 10000 +#define STRESS_THREADS 4 + +static unsigned int stress_counter; +static int stress_device, stress_start_event, stress_mutex; + +static void *stress_thread(void *arg) +{ + struct ntsync_wait_args wait_args = {0}; + __u32 index, count, i; + int ret; + + wait_args.timeout = UINT64_MAX; + wait_args.count = 1; + wait_args.objs = (uintptr_t)&stress_start_event; + wait_args.owner = gettid(); + wait_args.index = 0xdeadbeef; + + ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args); + + wait_args.objs = (uintptr_t)&stress_mutex; + + for (i = 0; i < STRESS_LOOPS; ++i) { + ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args); + + ++stress_counter; + + unlock_mutex(stress_mutex, wait_args.owner, &count); + } + + return NULL; +} + +TEST(stress_wait) +{ + struct ntsync_event_args event_args; + struct ntsync_mutex_args mutex_args; + pthread_t threads[STRESS_THREADS]; + __u32 signaled, i; + int ret; + + stress_device = open("/dev/ntsync", O_CLOEXEC | O_RDONLY); + ASSERT_LE(0, stress_device); + + mutex_args.owner = 0; + mutex_args.count = 0; + stress_mutex = ioctl(stress_device, NTSYNC_IOC_CREATE_MUTEX, &mutex_args); + EXPECT_LE(0, stress_mutex); + + event_args.manual = 1; + event_args.signaled = 0; + stress_start_event = ioctl(stress_device, NTSYNC_IOC_CREATE_EVENT, &event_args); + EXPECT_LE(0, stress_start_event); + + for (i = 0; i < STRESS_THREADS; ++i) + pthread_create(&threads[i], NULL, stress_thread, NULL); + + ret = ioctl(stress_start_event, NTSYNC_IOC_EVENT_SET, &signaled); + EXPECT_EQ(0, ret); + + for (i = 0; i < STRESS_THREADS; ++i) { + ret = pthread_join(threads[i], NULL); + EXPECT_EQ(0, ret); + } + + EXPECT_EQ(STRESS_LOOPS * STRESS_THREADS, stress_counter); + + close(stress_start_event); + close(stress_mutex); + close(stress_device); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile b/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile new file mode 100644 index 000000000000..03d0449d307c --- /dev/null +++ b/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for ifs(In Field Scan) selftests + +TEST_PROGS := test_ifs.sh + +include ../../../../../lib.mk diff --git a/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh b/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh new file mode 100755 index 000000000000..8b68964b29f4 --- /dev/null +++ b/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh @@ -0,0 +1,494 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test the functionality of the Intel IFS(In Field Scan) driver. +# + +# Matched with kselftest framework: tools/testing/selftests/kselftest.h +readonly KSFT_PASS=0 +readonly KSFT_FAIL=1 +readonly KSFT_XFAIL=2 +readonly KSFT_SKIP=4 + +readonly CPU_SYSFS="/sys/devices/system/cpu" +readonly CPU_OFFLINE_SYSFS="${CPU_SYSFS}/offline" +readonly IMG_PATH="/lib/firmware/intel/ifs_0" +readonly IFS_SCAN_MODE="0" +readonly IFS_ARRAY_BIST_SCAN_MODE="1" +readonly IFS_PATH="/sys/devices/virtual/misc/intel_ifs" +readonly IFS_SCAN_SYSFS_PATH="${IFS_PATH}_${IFS_SCAN_MODE}" +readonly IFS_ARRAY_BIST_SYSFS_PATH="${IFS_PATH}_${IFS_ARRAY_BIST_SCAN_MODE}" +readonly RUN_TEST="run_test" +readonly STATUS="status" +readonly DETAILS="details" +readonly STATUS_PASS="pass" +readonly PASS="PASS" +readonly FAIL="FAIL" +readonly INFO="INFO" +readonly XFAIL="XFAIL" +readonly SKIP="SKIP" +readonly IFS_NAME="intel_ifs" +readonly ALL="all" +readonly SIBLINGS="siblings" + +# Matches arch/x86/include/asm/intel-family.h and +# drivers/platform/x86/intel/ifs/core.c requirement as follows +readonly SAPPHIRERAPIDS_X="8f" +readonly EMERALDRAPIDS_X="cf" + +readonly INTEL_FAM6="06" + +LOOP_TIMES=3 +FML="" +MODEL="" +STEPPING="" +CPU_FMS="" +TRUE="true" +FALSE="false" +RESULT=$KSFT_PASS +IMAGE_NAME="" +INTERVAL_TIME=1 +OFFLINE_CPUS="" +# For IFS cleanup tags +ORIGIN_IFS_LOADED="" +IFS_IMAGE_NEED_RESTORE=$FALSE +IFS_LOG="/tmp/ifs_logs.$$" +RANDOM_CPU="" +DEFAULT_IMG_ID="" + +append_log() +{ + echo -e "$1" | tee -a "$IFS_LOG" +} + +online_offline_cpu_list() +{ + local on_off=$1 + local target_cpus=$2 + local cpu="" + local cpu_start="" + local cpu_end="" + local i="" + + if [[ -n "$target_cpus" ]]; then + for cpu in $(echo "$target_cpus" | tr ',' ' '); do + if [[ "$cpu" == *"-"* ]]; then + cpu_start="" + cpu_end="" + i="" + cpu_start=$(echo "$cpu" | cut -d "-" -f 1) + cpu_end=$(echo "$cpu" | cut -d "-" -f 2) + for((i=cpu_start;i<=cpu_end;i++)); do + append_log "[$INFO] echo $on_off > \ +${CPU_SYSFS}/cpu${i}/online" + echo "$on_off" > "$CPU_SYSFS"/cpu"$i"/online + done + else + set_target_cpu "$on_off" "$cpu" + fi + done + fi +} + +ifs_scan_result_summary() +{ + local failed_info pass_num skip_num fail_num + + if [[ -e "$IFS_LOG" ]]; then + failed_info=$(grep ^"\[${FAIL}\]" "$IFS_LOG") + fail_num=$(grep -c ^"\[${FAIL}\]" "$IFS_LOG") + skip_num=$(grep -c ^"\[${SKIP}\]" "$IFS_LOG") + pass_num=$(grep -c ^"\[${PASS}\]" "$IFS_LOG") + + if [[ "$fail_num" -ne 0 ]]; then + RESULT=$KSFT_FAIL + echo "[$INFO] IFS test failure summary:" + echo "$failed_info" + elif [[ "$skip_num" -ne 0 ]]; then + RESULT=$KSFT_SKIP + fi + echo "[$INFO] IFS test pass:$pass_num, skip:$skip_num, fail:$fail_num" + else + echo "[$INFO] No file $IFS_LOG for IFS scan summary" + fi +} + +ifs_cleanup() +{ + echo "[$INFO] Restore environment after IFS test" + + # Restore ifs origin image if origin image backup step is needed + [[ "$IFS_IMAGE_NEED_RESTORE" == "$TRUE" ]] && { + mv -f "$IMG_PATH"/"$IMAGE_NAME"_origin "$IMG_PATH"/"$IMAGE_NAME" + } + + # Restore the CPUs to the state before testing + [[ -z "$OFFLINE_CPUS" ]] || online_offline_cpu_list "0" "$OFFLINE_CPUS" + + lsmod | grep -q "$IFS_NAME" && [[ "$ORIGIN_IFS_LOADED" == "$FALSE" ]] && { + echo "[$INFO] modprobe -r $IFS_NAME" + modprobe -r "$IFS_NAME" + } + + ifs_scan_result_summary + [[ -e "$IFS_LOG" ]] && rm -rf "$IFS_LOG" + + echo "[RESULT] IFS test exit with $RESULT" + exit "$RESULT" +} + +do_cmd() +{ + local cmd=$* + local ret="" + + append_log "[$INFO] $cmd" + eval "$cmd" + ret=$? + if [[ $ret -ne 0 ]]; then + append_log "[$FAIL] $cmd failed. Return code is $ret" + RESULT=$KSFT_XFAIL + ifs_cleanup + fi +} + +test_exit() +{ + local info=$1 + RESULT=$2 + + declare -A EXIT_MAP + EXIT_MAP[$KSFT_PASS]=$PASS + EXIT_MAP[$KSFT_FAIL]=$FAIL + EXIT_MAP[$KSFT_XFAIL]=$XFAIL + EXIT_MAP[$KSFT_SKIP]=$SKIP + + append_log "[${EXIT_MAP[$RESULT]}] $info" + ifs_cleanup +} + +online_all_cpus() +{ + local off_cpus="" + + OFFLINE_CPUS=$(cat "$CPU_OFFLINE_SYSFS") + online_offline_cpu_list "1" "$OFFLINE_CPUS" + + off_cpus=$(cat "$CPU_OFFLINE_SYSFS") + if [[ -z "$off_cpus" ]]; then + append_log "[$INFO] All CPUs are online." + else + append_log "[$XFAIL] There is offline cpu:$off_cpus after online all cpu!" + RESULT=$KSFT_XFAIL + ifs_cleanup + fi +} + +get_cpu_fms() +{ + FML=$(grep -m 1 "family" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}') + MODEL=$(grep -m 1 "model" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}') + STEPPING=$(grep -m 1 "stepping" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}') + CPU_FMS="${FML}-${MODEL}-${STEPPING}" +} + +check_cpu_ifs_support_interval_time() +{ + get_cpu_fms + + if [[ "$FML" != "$INTEL_FAM6" ]]; then + test_exit "CPU family:$FML does not support IFS" "$KSFT_SKIP" + fi + + # Ucode has time interval requirement for IFS scan on same CPU as follows: + case $MODEL in + "$SAPPHIRERAPIDS_X") + INTERVAL_TIME=180; + ;; + "$EMERALDRAPIDS_X") + INTERVAL_TIME=30; + ;; + *) + # Set default interval time for other platforms + INTERVAL_TIME=1; + append_log "[$INFO] CPU FML:$FML model:0x$MODEL, default: 1s interval time" + ;; + esac +} + +check_ifs_loaded() +{ + local ifs_info="" + + ifs_info=$(lsmod | grep "$IFS_NAME") + if [[ -z "$ifs_info" ]]; then + append_log "[$INFO] modprobe $IFS_NAME" + modprobe "$IFS_NAME" || { + test_exit "Check if CONFIG_INTEL_IFS is set to m or \ +platform doesn't support ifs" "$KSFT_SKIP" + } + ifs_info=$(lsmod | grep "$IFS_NAME") + [[ -n "$ifs_info" ]] || test_exit "No ifs module listed by lsmod" "$KSFT_FAIL" + fi +} + +test_ifs_scan_entry() +{ + local ifs_info="" + + ifs_info=$(lsmod | grep "$IFS_NAME") + + if [[ -z "$ifs_info" ]]; then + ORIGIN_IFS_LOADED="$FALSE" + check_ifs_loaded + else + ORIGIN_IFS_LOADED="$TRUE" + append_log "[$INFO] Module $IFS_NAME is already loaded" + fi + + if [[ -d "$IFS_SCAN_SYSFS_PATH" ]]; then + append_log "[$PASS] IFS sysfs $IFS_SCAN_SYSFS_PATH entry is created\n" + else + test_exit "No sysfs entry in $IFS_SCAN_SYSFS_PATH" "$KSFT_FAIL" + fi +} + +load_image() +{ + local image_id=$1 + local image_info="" + local ret="" + + check_ifs_loaded + if [[ -e "${IMG_PATH}/${IMAGE_NAME}" ]]; then + append_log "[$INFO] echo 0x$image_id > ${IFS_SCAN_SYSFS_PATH}/current_batch" + echo "0x$image_id" > "$IFS_SCAN_SYSFS_PATH"/current_batch 2>/dev/null + ret=$? + [[ "$ret" -eq 0 ]] || { + append_log "[$FAIL] Load ifs image $image_id failed with ret:$ret\n" + return "$ret" + } + image_info=$(cat ${IFS_SCAN_SYSFS_PATH}/current_batch) + if [[ "$image_info" == 0x"$image_id" ]]; then + append_log "[$PASS] load IFS current_batch:$image_info" + else + append_log "[$FAIL] current_batch:$image_info is not expected:$image_id" + return "$KSFT_FAIL" + fi + else + append_log "[$FAIL] No IFS image file ${IMG_PATH}/${IMAGE_NAME}"\ + return "$KSFT_FAIL" + fi + return 0 +} + +test_load_origin_ifs_image() +{ + local image_id=$1 + + IMAGE_NAME="${CPU_FMS}-${image_id}.scan" + + load_image "$image_id" || return $? + return 0 +} + +test_load_bad_ifs_image() +{ + local image_id=$1 + + IMAGE_NAME="${CPU_FMS}-${image_id}.scan" + + do_cmd "mv -f ${IMG_PATH}/${IMAGE_NAME} ${IMG_PATH}/${IMAGE_NAME}_origin" + + # Set IFS_IMAGE_NEED_RESTORE to true before corrupt the origin ifs image file + IFS_IMAGE_NEED_RESTORE=$TRUE + do_cmd "dd if=/dev/urandom of=${IMG_PATH}/${IMAGE_NAME} bs=1K count=6 2>/dev/null" + + # Use the specified judgment for negative testing + append_log "[$INFO] echo 0x$image_id > ${IFS_SCAN_SYSFS_PATH}/current_batch" + echo "0x$image_id" > "$IFS_SCAN_SYSFS_PATH"/current_batch 2>/dev/null + ret=$? + if [[ "$ret" -ne 0 ]]; then + append_log "[$PASS] Load invalid ifs image failed with ret:$ret not 0 as expected" + else + append_log "[$FAIL] Load invalid ifs image ret:$ret unexpectedly" + fi + + do_cmd "mv -f ${IMG_PATH}/${IMAGE_NAME}_origin ${IMG_PATH}/${IMAGE_NAME}" + IFS_IMAGE_NEED_RESTORE=$FALSE +} + +test_bad_and_origin_ifs_image() +{ + local image_id=$1 + + append_log "[$INFO] Test loading bad and then loading original IFS image:" + test_load_origin_ifs_image "$image_id" || return $? + test_load_bad_ifs_image "$image_id" + # Load origin image again and make sure it's worked + test_load_origin_ifs_image "$image_id" || return $? + append_log "[$INFO] Loading invalid IFS image and then loading initial image passed.\n" +} + +ifs_test_cpu() +{ + local ifs_mode=$1 + local cpu_num=$2 + local image_id status details ret result result_info + + echo "$cpu_num" > "$IFS_PATH"_"$ifs_mode"/"$RUN_TEST" + ret=$? + + status=$(cat "${IFS_PATH}_${ifs_mode}/${STATUS}") + details=$(cat "${IFS_PATH}_${ifs_mode}/${DETAILS}") + + if [[ "$ret" -eq 0 && "$status" == "$STATUS_PASS" ]]; then + result="$PASS" + else + result="$FAIL" + fi + + cpu_num=$(cat "${CPU_SYSFS}/cpu${cpu_num}/topology/thread_siblings_list") + + # There is no image file for IFS ARRAY BIST scan + if [[ -e "${IFS_PATH}_${ifs_mode}/current_batch" ]]; then + image_id=$(cat "${IFS_PATH}_${ifs_mode}/current_batch") + result_info=$(printf "[%s] ifs_%1d cpu(s):%s, current_batch:0x%02x, \ +ret:%2d, status:%s, details:0x%016x" \ + "$result" "$ifs_mode" "$cpu_num" "$image_id" "$ret" \ + "$status" "$details") + else + result_info=$(printf "[%s] ifs_%1d cpu(s):%s, ret:%2d, status:%s, details:0x%016x" \ + "$result" "$ifs_mode" "$cpu_num" "$ret" "$status" "$details") + fi + + append_log "$result_info" +} + +ifs_test_cpus() +{ + local cpus_type=$1 + local ifs_mode=$2 + local image_id=$3 + local cpu_max_num="" + local cpu_num="" + + case "$cpus_type" in + "$ALL") + cpu_max_num=$(($(nproc) - 1)) + cpus=$(seq 0 $cpu_max_num) + ;; + "$SIBLINGS") + cpus=$(cat ${CPU_SYSFS}/cpu*/topology/thread_siblings_list \ + | sed -e 's/,.*//' \ + | sed -e 's/-.*//' \ + | sort -n \ + | uniq) + ;; + *) + test_exit "Invalid cpus_type:$cpus_type" "$KSFT_XFAIL" + ;; + esac + + for cpu_num in $cpus; do + ifs_test_cpu "$ifs_mode" "$cpu_num" + done + + if [[ -z "$image_id" ]]; then + append_log "[$INFO] ifs_$ifs_mode test $cpus_type cpus completed\n" + else + append_log "[$INFO] ifs_$ifs_mode $cpus_type cpus with $CPU_FMS-$image_id.scan \ +completed\n" + fi +} + +test_ifs_same_cpu_loop() +{ + local ifs_mode=$1 + local cpu_num=$2 + local loop_times=$3 + + append_log "[$INFO] Test ifs mode $ifs_mode on CPU:$cpu_num for $loop_times rounds:" + [[ "$ifs_mode" == "$IFS_SCAN_MODE" ]] && { + load_image "$DEFAULT_IMG_ID" || return $? + } + for (( i=1; i<=loop_times; i++ )); do + append_log "[$INFO] Loop iteration: $i in total of $loop_times" + # Only IFS scan needs the interval time + if [[ "$ifs_mode" == "$IFS_SCAN_MODE" ]]; then + do_cmd "sleep $INTERVAL_TIME" + elif [[ "$ifs_mode" == "$IFS_ARRAY_BIST_SCAN_MODE" ]]; then + true + else + test_exit "Invalid ifs_mode:$ifs_mode" "$KSFT_XFAIL" + fi + + ifs_test_cpu "$ifs_mode" "$cpu_num" + done + append_log "[$INFO] $loop_times rounds of ifs_$ifs_mode test on CPU:$cpu_num completed.\n" +} + +test_ifs_scan_available_imgs() +{ + local image_ids="" + local image_id="" + + append_log "[$INFO] Test ifs scan with available images:" + image_ids=$(find "$IMG_PATH" -maxdepth 1 -name "${CPU_FMS}-[0-9a-fA-F][0-9a-fA-F].scan" \ + 2>/dev/null \ + | sort \ + | awk -F "-" '{print $NF}' \ + | cut -d "." -f 1) + + for image_id in $image_ids; do + load_image "$image_id" || return $? + + ifs_test_cpus "$SIBLINGS" "$IFS_SCAN_MODE" "$image_id" + # IFS scan requires time interval for the scan on the same CPU + do_cmd "sleep $INTERVAL_TIME" + done +} + +prepare_ifs_test_env() +{ + local max_cpu="" + + check_cpu_ifs_support_interval_time + + online_all_cpus + max_cpu=$(($(nproc) - 1)) + RANDOM_CPU=$(shuf -i 0-$max_cpu -n 1) + + DEFAULT_IMG_ID=$(find $IMG_PATH -maxdepth 1 -name "${CPU_FMS}-[0-9a-fA-F][0-9a-fA-F].scan" \ + 2>/dev/null \ + | sort \ + | head -n 1 \ + | awk -F "-" '{print $NF}' \ + | cut -d "." -f 1) +} + +test_ifs() +{ + prepare_ifs_test_env + + test_ifs_scan_entry + + if [[ -z "$DEFAULT_IMG_ID" ]]; then + append_log "[$SKIP] No proper ${IMG_PATH}/${CPU_FMS}-*.scan, skip ifs_0 scan" + else + test_bad_and_origin_ifs_image "$DEFAULT_IMG_ID" + test_ifs_scan_available_imgs + test_ifs_same_cpu_loop "$IFS_SCAN_MODE" "$RANDOM_CPU" "$LOOP_TIMES" + fi + + if [[ -d "$IFS_ARRAY_BIST_SYSFS_PATH" ]]; then + ifs_test_cpus "$SIBLINGS" "$IFS_ARRAY_BIST_SCAN_MODE" + test_ifs_same_cpu_loop "$IFS_ARRAY_BIST_SCAN_MODE" "$RANDOM_CPU" "$LOOP_TIMES" + else + append_log "[$SKIP] No $IFS_ARRAY_BIST_SYSFS_PATH, skip IFS ARRAY BIST scan" + fi +} + +trap ifs_cleanup SIGTERM SIGINT +test_ifs +ifs_cleanup diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/Makefile b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile new file mode 100644 index 000000000000..755d164384c4 --- /dev/null +++ b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile @@ -0,0 +1,20 @@ +include ../../../../../build/Build.include + +UNAME_M := $(shell uname -m) + +ifneq ($(UNAME_M),s390x) +nothing: +.PHONY: all clean run_tests install +.SILENT: +else + +TEST_GEN_PROGS := test_uvdevice + +top_srcdir ?= ../../../../../.. +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include + +CFLAGS += -Wall -Werror -static $(KHDR_INCLUDES) -I$(LINUX_TOOL_ARCH_INCLUDE) + +include ../../../lib.mk + +endif diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/config b/tools/testing/selftests/drivers/s390x/uvdevice/config new file mode 100644 index 000000000000..f28a04b99eff --- /dev/null +++ b/tools/testing/selftests/drivers/s390x/uvdevice/config @@ -0,0 +1 @@ +CONFIG_S390_UV_UAPI=y diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c new file mode 100644 index 000000000000..14df9aa07308 --- /dev/null +++ b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * selftest for the Ultravisor UAPI device + * + * Copyright IBM Corp. 2022 + * Author(s): Steffen Eiden <seiden@linux.ibm.com> + */ + +#include <stdint.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/ioctl.h> +#include <sys/mman.h> + +#include <asm/uvdevice.h> + +#include "kselftest_harness.h" + +#define UV_PATH "/dev/uv" +#define BUFFER_SIZE 0x200 +FIXTURE(uvio_fixture) { + int uv_fd; + struct uvio_ioctl_cb uvio_ioctl; + uint8_t buffer[BUFFER_SIZE]; + __u64 fault_page; +}; + +FIXTURE_VARIANT(uvio_fixture) { + unsigned long ioctl_cmd; + uint32_t arg_size; +}; + +FIXTURE_VARIANT_ADD(uvio_fixture, att) { + .ioctl_cmd = UVIO_IOCTL_ATT, + .arg_size = sizeof(struct uvio_attest), +}; + +FIXTURE_SETUP(uvio_fixture) +{ + self->uv_fd = open(UV_PATH, O_ACCMODE); + + self->uvio_ioctl.argument_addr = (__u64)self->buffer; + self->uvio_ioctl.argument_len = variant->arg_size; + self->fault_page = + (__u64)mmap(NULL, (size_t)getpagesize(), PROT_NONE, MAP_ANONYMOUS, -1, 0); +} + +FIXTURE_TEARDOWN(uvio_fixture) +{ + if (self->uv_fd) + close(self->uv_fd); + munmap((void *)self->fault_page, (size_t)getpagesize()); +} + +TEST_F(uvio_fixture, fault_ioctl_arg) +{ + int rc, errno_cache; + + rc = ioctl(self->uv_fd, variant->ioctl_cmd, NULL); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EFAULT); + + rc = ioctl(self->uv_fd, variant->ioctl_cmd, self->fault_page); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EFAULT); +} + +TEST_F(uvio_fixture, fault_uvio_arg) +{ + int rc, errno_cache; + + self->uvio_ioctl.argument_addr = 0; + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EFAULT); + + self->uvio_ioctl.argument_addr = self->fault_page; + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EFAULT); +} + +/* + * Test to verify that IOCTLs with invalid values in the ioctl_control block + * are rejected. + */ +TEST_F(uvio_fixture, inval_ioctl_cb) +{ + int rc, errno_cache; + + self->uvio_ioctl.argument_len = 0; + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + + self->uvio_ioctl.argument_len = (uint32_t)-1; + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + self->uvio_ioctl.argument_len = variant->arg_size; + + self->uvio_ioctl.flags = (uint32_t)-1; + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + self->uvio_ioctl.flags = 0; + + memset(self->uvio_ioctl.reserved14, 0xff, sizeof(self->uvio_ioctl.reserved14)); + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + + memset(&self->uvio_ioctl, 0x11, sizeof(self->uvio_ioctl)); + rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl); + ASSERT_EQ(rc, -1); +} + +TEST_F(uvio_fixture, inval_ioctl_cmd) +{ + int rc, errno_cache; + uint8_t nr = _IOC_NR(variant->ioctl_cmd); + unsigned long cmds[] = { + _IOWR('a', nr, struct uvio_ioctl_cb), + _IOWR(UVIO_TYPE_UVC, nr, int), + _IO(UVIO_TYPE_UVC, nr), + _IOR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb), + _IOW(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb), + }; + + for (size_t i = 0; i < ARRAY_SIZE(cmds); i++) { + rc = ioctl(self->uv_fd, cmds[i], &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, ENOTTY); + } +} + +struct test_attest_buffer { + uint8_t arcb[0x180]; + uint8_t meas[64]; + uint8_t add[32]; +}; + +FIXTURE(attest_fixture) { + int uv_fd; + struct uvio_ioctl_cb uvio_ioctl; + struct uvio_attest uvio_attest; + struct test_attest_buffer attest_buffer; + __u64 fault_page; +}; + +FIXTURE_SETUP(attest_fixture) +{ + self->uv_fd = open(UV_PATH, O_ACCMODE); + + self->uvio_ioctl.argument_addr = (__u64)&self->uvio_attest; + self->uvio_ioctl.argument_len = sizeof(self->uvio_attest); + + self->uvio_attest.arcb_addr = (__u64)&self->attest_buffer.arcb; + self->uvio_attest.arcb_len = sizeof(self->attest_buffer.arcb); + + self->uvio_attest.meas_addr = (__u64)&self->attest_buffer.meas; + self->uvio_attest.meas_len = sizeof(self->attest_buffer.meas); + + self->uvio_attest.add_data_addr = (__u64)&self->attest_buffer.add; + self->uvio_attest.add_data_len = sizeof(self->attest_buffer.add); + self->fault_page = + (__u64)mmap(NULL, (size_t)getpagesize(), PROT_NONE, MAP_ANONYMOUS, -1, 0); +} + +FIXTURE_TEARDOWN(attest_fixture) +{ + if (self->uv_fd) + close(self->uv_fd); + munmap((void *)self->fault_page, (size_t)getpagesize()); +} + +static void att_inval_sizes_test(uint32_t *size, uint32_t max_size, bool test_zero, + struct __test_metadata *_metadata, + FIXTURE_DATA(attest_fixture) *self) +{ + int rc, errno_cache; + uint32_t tmp = *size; + + if (test_zero) { + *size = 0; + rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + } + *size = max_size + 1; + rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + *size = tmp; +} + +/* + * Test to verify that attestation IOCTLs with invalid values in the UVIO + * attestation control block are rejected. + */ +TEST_F(attest_fixture, att_inval_request) +{ + int rc, errno_cache; + + att_inval_sizes_test(&self->uvio_attest.add_data_len, UVIO_ATT_ADDITIONAL_MAX_LEN, + false, _metadata, self); + att_inval_sizes_test(&self->uvio_attest.meas_len, UVIO_ATT_MEASUREMENT_MAX_LEN, + true, _metadata, self); + att_inval_sizes_test(&self->uvio_attest.arcb_len, UVIO_ATT_ARCB_MAX_LEN, + true, _metadata, self); + + self->uvio_attest.reserved136 = (uint16_t)-1; + rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EINVAL); + + memset(&self->uvio_attest, 0x11, sizeof(self->uvio_attest)); + rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl); + ASSERT_EQ(rc, -1); +} + +static void att_inval_addr_test(__u64 *addr, struct __test_metadata *_metadata, + FIXTURE_DATA(attest_fixture) *self) +{ + int rc, errno_cache; + __u64 tmp = *addr; + + *addr = 0; + rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EFAULT); + *addr = self->fault_page; + rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl); + errno_cache = errno; + ASSERT_EQ(rc, -1); + ASSERT_EQ(errno_cache, EFAULT); + *addr = tmp; +} + +TEST_F(attest_fixture, att_inval_addr) +{ + att_inval_addr_test(&self->uvio_attest.arcb_addr, _metadata, self); + att_inval_addr_test(&self->uvio_attest.add_data_addr, _metadata, self); + att_inval_addr_test(&self->uvio_attest.meas_addr, _metadata, self); +} + +int main(int argc, char **argv) +{ + int fd = open(UV_PATH, O_ACCMODE); + + if (fd < 0) + ksft_exit_skip("No uv-device or cannot access " UV_PATH "\n" + "Enable CONFIG_S390_UV_UAPI and check the access rights on " + UV_PATH ".\n"); + close(fd); + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/drivers/sdsi/sdsi.sh b/tools/testing/selftests/drivers/sdsi/sdsi.sh new file mode 100755 index 000000000000..9b84b9b82b49 --- /dev/null +++ b/tools/testing/selftests/drivers/sdsi/sdsi.sh @@ -0,0 +1,25 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Runs tests for the intel_sdsi driver + +if ! command -v python3 > /dev/null 2>&1; then + echo "drivers/sdsi: [SKIP] python3 not installed" + exit 77 +fi + +if ! python3 -c "import pytest" > /dev/null 2>&1; then + echo "drivers/sdsi: [SKIP] pytest module not installed" + exit 77 +fi + +if ! /sbin/modprobe -q -r intel_sdsi; then + echo "drivers/sdsi: [SKIP]" + exit 77 +fi + +if /sbin/modprobe -q intel_sdsi && python3 -m pytest sdsi_test.py; then + echo "drivers/sdsi: [OK]" +else + echo "drivers/sdsi: [FAIL]" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/sdsi/sdsi_test.py b/tools/testing/selftests/drivers/sdsi/sdsi_test.py new file mode 100644 index 000000000000..5efb29feee70 --- /dev/null +++ b/tools/testing/selftests/drivers/sdsi/sdsi_test.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from struct import pack +from time import sleep + +import errno +import glob +import os +import subprocess + +try: + import pytest +except ImportError: + print("Unable to import pytest python module.") + print("\nIf not already installed, you may do so with:") + print("\t\tpip3 install pytest") + exit(1) + +SOCKETS = glob.glob('/sys/bus/auxiliary/devices/intel_vsec.sdsi.*') +NUM_SOCKETS = len(SOCKETS) + +MODULE_NAME = 'intel_sdsi' +DEV_PREFIX = 'intel_vsec.sdsi' +CLASS_DIR = '/sys/bus/auxiliary/devices' +GUID = "0x6dd191" + +def read_bin_file(file): + with open(file, mode='rb') as f: + content = f.read() + return content + +def get_dev_file_path(socket, file): + return CLASS_DIR + '/' + DEV_PREFIX + '.' + str(socket) + '/' + file + +def kmemleak_enabled(): + kmemleak = "/sys/kernel/debug/kmemleak" + return os.path.isfile(kmemleak) + +class TestSDSiDriver: + def test_driver_loaded(self): + lsmod_p = subprocess.Popen(('lsmod'), stdout=subprocess.PIPE) + result = subprocess.check_output(('grep', '-q', MODULE_NAME), stdin=lsmod_p.stdout) + +@pytest.mark.parametrize('socket', range(0, NUM_SOCKETS)) +class TestSDSiFilesClass: + + def read_value(self, file): + f = open(file, "r") + value = f.read().strip("\n") + return value + + def get_dev_folder(self, socket): + return CLASS_DIR + '/' + DEV_PREFIX + '.' + str(socket) + '/' + + def test_sysfs_files_exist(self, socket): + folder = self.get_dev_folder(socket) + print (folder) + assert os.path.isfile(folder + "guid") == True + assert os.path.isfile(folder + "provision_akc") == True + assert os.path.isfile(folder + "provision_cap") == True + assert os.path.isfile(folder + "state_certificate") == True + assert os.path.isfile(folder + "registers") == True + + def test_sysfs_file_permissions(self, socket): + folder = self.get_dev_folder(socket) + mode = os.stat(folder + "guid").st_mode & 0o777 + assert mode == 0o444 # Read all + mode = os.stat(folder + "registers").st_mode & 0o777 + assert mode == 0o400 # Read owner + mode = os.stat(folder + "provision_akc").st_mode & 0o777 + assert mode == 0o200 # Read owner + mode = os.stat(folder + "provision_cap").st_mode & 0o777 + assert mode == 0o200 # Read owner + mode = os.stat(folder + "state_certificate").st_mode & 0o777 + assert mode == 0o400 # Read owner + + def test_sysfs_file_ownership(self, socket): + folder = self.get_dev_folder(socket) + + st = os.stat(folder + "guid") + assert st.st_uid == 0 + assert st.st_gid == 0 + + st = os.stat(folder + "registers") + assert st.st_uid == 0 + assert st.st_gid == 0 + + st = os.stat(folder + "provision_akc") + assert st.st_uid == 0 + assert st.st_gid == 0 + + st = os.stat(folder + "provision_cap") + assert st.st_uid == 0 + assert st.st_gid == 0 + + st = os.stat(folder + "state_certificate") + assert st.st_uid == 0 + assert st.st_gid == 0 + + def test_sysfs_file_sizes(self, socket): + folder = self.get_dev_folder(socket) + + if self.read_value(folder + "guid") == GUID: + st = os.stat(folder + "registers") + assert st.st_size == 72 + + st = os.stat(folder + "provision_akc") + assert st.st_size == 1024 + + st = os.stat(folder + "provision_cap") + assert st.st_size == 1024 + + st = os.stat(folder + "state_certificate") + assert st.st_size == 4096 + + def test_no_seek_allowed(self, socket): + folder = self.get_dev_folder(socket) + rand_file = bytes(os.urandom(8)) + + f = open(folder + "provision_cap", "wb", 0) + f.seek(1) + with pytest.raises(OSError) as error: + f.write(rand_file) + assert error.value.errno == errno.ESPIPE + f.close() + + f = open(folder + "provision_akc", "wb", 0) + f.seek(1) + with pytest.raises(OSError) as error: + f.write(rand_file) + assert error.value.errno == errno.ESPIPE + f.close() + + def test_registers_seek(self, socket): + folder = self.get_dev_folder(socket) + + # Check that the value read from an offset of the entire + # file is none-zero and the same as the value read + # from seeking to the same location + f = open(folder + "registers", "rb") + data = f.read() + f.seek(64) + id = f.read() + assert id != bytes(0) + assert data[64:] == id + f.close() + +@pytest.mark.parametrize('socket', range(0, NUM_SOCKETS)) +class TestSDSiMailboxCmdsClass: + def test_provision_akc_eoverflow_1017_bytes(self, socket): + + # The buffer for writes is 1k, of with 8 bytes must be + # reserved for the command, leaving 1016 bytes max. + # Check that we get an overflow error for 1017 bytes. + node = get_dev_file_path(socket, "provision_akc") + rand_file = bytes(os.urandom(1017)) + + f = open(node, 'wb', 0) + with pytest.raises(OSError) as error: + f.write(rand_file) + assert error.value.errno == errno.EOVERFLOW + f.close() + +@pytest.mark.parametrize('socket', range(0, NUM_SOCKETS)) +class TestSdsiDriverLocksClass: + def test_enodev_when_pci_device_removed(self, socket): + node = get_dev_file_path(socket, "provision_akc") + dev_name = DEV_PREFIX + '.' + str(socket) + driver_dir = CLASS_DIR + '/' + dev_name + "/driver/" + rand_file = bytes(os.urandom(8)) + + f = open(node, 'wb', 0) + g = open(node, 'wb', 0) + + with open(driver_dir + 'unbind', 'w') as k: + print(dev_name, file = k) + + with pytest.raises(OSError) as error: + f.write(rand_file) + assert error.value.errno == errno.ENODEV + + with pytest.raises(OSError) as error: + g.write(rand_file) + assert error.value.errno == errno.ENODEV + + f.close() + g.close() + + # Short wait needed to allow file to close before pulling driver + sleep(1) + + p = subprocess.Popen(('modprobe', '-r', 'intel_sdsi')) + p.wait() + p = subprocess.Popen(('modprobe', '-r', 'intel_vsec')) + p.wait() + p = subprocess.Popen(('modprobe', 'intel_vsec')) + p.wait() + + # Short wait needed to allow driver time to get inserted + # before continuing tests + sleep(1) + + def test_memory_leak(self, socket): + if not kmemleak_enabled(): + pytest.skip("kmemleak not enabled in kernel") + + dev_name = DEV_PREFIX + '.' + str(socket) + driver_dir = CLASS_DIR + '/' + dev_name + "/driver/" + + with open(driver_dir + 'unbind', 'w') as k: + print(dev_name, file = k) + + sleep(1) + + subprocess.check_output(('modprobe', '-r', 'intel_sdsi')) + subprocess.check_output(('modprobe', '-r', 'intel_vsec')) + + with open('/sys/kernel/debug/kmemleak', 'w') as f: + print('scan', file = f) + sleep(5) + + assert os.stat('/sys/kernel/debug/kmemleak').st_size == 0 + + subprocess.check_output(('modprobe', 'intel_vsec')) + sleep(1) diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh new file mode 100755 index 000000000000..128f0ab24307 --- /dev/null +++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh @@ -0,0 +1,200 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +usage() { echo "usbip_test.sh -b <busid> -p <usbip tools path>"; exit 1; } + +while getopts "h:b:p:" arg; do + case "${arg}" in + h) + usage + ;; + b) + busid=${OPTARG} + ;; + p) + tools_path=${OPTARG} + ;; + *) + usage + ;; + esac +done +shift $((OPTIND-1)) + +if [ -z "${busid}" ]; then + usage +fi + +echo "Running USB over IP Testing on $busid"; + +test_end_msg="End of USB over IP Testing on $busid" + +if [ $UID != 0 ]; then + echo "Please run usbip_test as root [SKIP]" + echo $test_end_msg + exit $ksft_skip +fi + +echo "Load usbip_host module" +if ! /sbin/modprobe -q -n usbip_host; then + echo "usbip_test: module usbip_host is not found [SKIP]" + echo $test_end_msg + exit $ksft_skip +fi + +if /sbin/modprobe -q usbip_host; then + echo "usbip_test: module usbip_host is loaded [OK]" +else + echo "usbip_test: module usbip_host failed to load [FAIL]" + echo $test_end_msg + exit 1 +fi + +echo "Load vhci_hcd module" +if /sbin/modprobe -q vhci_hcd; then + echo "usbip_test: module vhci_hcd is loaded [OK]" +else + echo "usbip_test: module vhci_hcd failed to load [FAIL]" + echo $test_end_msg + exit 1 +fi +echo "==============================================================" + +cd $tools_path; + +if [ ! -f src/usbip ]; then + echo "Please build usbip tools" + echo $test_end_msg + exit $ksft_skip +fi + +echo "Expect to see export-able devices"; +src/usbip list -l; +echo "==============================================================" + +echo "Run lsusb to see all usb devices" +lsusb -t; +echo "==============================================================" + +src/usbipd -D; + +echo "Get exported devices from localhost - expect to see none"; +src/usbip list -r localhost; +echo "==============================================================" + +echo "bind devices"; +src/usbip bind -b $busid; +echo "==============================================================" + +echo "Run lsusb - bound devices should be under usbip_host control" +lsusb -t; +echo "==============================================================" + +echo "bind devices - expect already bound messages" +src/usbip bind -b $busid; +echo "==============================================================" + +echo "Get exported devices from localhost - expect to see exported devices"; +src/usbip list -r localhost; +echo "==============================================================" + +echo "unbind devices"; +src/usbip unbind -b $busid; +echo "==============================================================" + +echo "Run lsusb - bound devices should be rebound to original drivers" +lsusb -t; +echo "==============================================================" + +echo "unbind devices - expect no devices bound message"; +src/usbip unbind -b $busid; +echo "==============================================================" + +echo "Get exported devices from localhost - expect to see none"; +src/usbip list -r localhost; +echo "==============================================================" + +echo "List imported devices - expect to see none"; +src/usbip port; +echo "==============================================================" + +echo "Import devices from localhost - should fail with no devices" +src/usbip attach -r localhost -b $busid; +echo "==============================================================" + +echo "bind devices"; +src/usbip bind -b $busid; +echo "==============================================================" + +echo "List imported devices - expect to see exported devices"; +src/usbip list -r localhost; +echo "==============================================================" + +echo "List imported devices - expect to see none"; +src/usbip port; +echo "==============================================================" + +echo "Import devices from localhost - should work" +src/usbip attach -r localhost -b $busid; +echo "==============================================================" + +# Wait for sysfs file to be updated. Without this sleep, usbip port +# shows no imported devices. +sleep 3; + +echo "List imported devices - expect to see imported devices"; +src/usbip port; +echo "==============================================================" + +echo "Import devices from localhost - expect already imported messages" +src/usbip attach -r localhost -b $busid; +echo "==============================================================" + +echo "Un-import devices"; +src/usbip detach -p 00; +src/usbip detach -p 01; +echo "==============================================================" + +echo "List imported devices - expect to see none"; +src/usbip port; +echo "==============================================================" + +echo "Un-import devices - expect no devices to detach messages"; +src/usbip detach -p 00; +src/usbip detach -p 01; +echo "==============================================================" + +echo "Detach invalid port tests - expect invalid port error message"; +src/usbip detach -p 100; +echo "==============================================================" + +echo "Expect to see export-able devices"; +src/usbip list -l; +echo "==============================================================" + +echo "Remove usbip_host module"; +rmmod usbip_host; + +echo "Run lsusb - bound devices should be rebound to original drivers" +lsusb -t; +echo "==============================================================" + +echo "Run bind without usbip_host - expect fail" +src/usbip bind -b $busid; +echo "==============================================================" + +echo "Run lsusb - devices that failed to bind aren't bound to any driver" +lsusb -t; +echo "==============================================================" + +echo "modprobe usbip_host - does it work?" +/sbin/modprobe usbip_host +echo "Should see -busid- is not in match_busid table... skip! dmesg" +echo "==============================================================" +dmesg | grep "is not in match_busid table" +echo "==============================================================" + +echo $test_end_msg |
