summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/net
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/net')
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile5
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c125
-rw-r--r--tools/testing/selftests/net/bench/Makefile7
-rw-r--r--tools/testing/selftests/net/bench/page_pool/Makefile17
-rw-r--r--tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c267
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.c394
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.h238
-rwxr-xr-xtools/testing/selftests/net/bench/test_bench_page_pool.sh32
-rwxr-xr-xtools/testing/selftests/net/broadcast_pmtu.sh47
-rw-r--r--tools/testing/selftests/net/config11
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh69
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multicast.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh52
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh771
-rwxr-xr-xtools/testing/selftests/net/ipv6_force_forwarding.sh105
-rw-r--r--tools/testing/selftests/net/lib.sh35
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py2
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py7
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py39
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py5
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c621
-rw-r--r--tools/testing/selftests/net/mptcp/config2
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c24
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh84
-rwxr-xr-xtools/testing/selftests/net/netdev-l2addr.sh59
-rw-r--r--tools/testing/selftests/net/netfilter/config7
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh4
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_interface_stress.sh5
-rw-r--r--tools/testing/selftests/net/nettest.c12
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py127
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh4
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt45
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt44
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt33
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh92
-rwxr-xr-xtools/testing/selftests/net/rtnetlink_notification.sh112
-rwxr-xr-xtools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh2
-rwxr-xr-xtools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh50
-rwxr-xr-xtools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh2
-rwxr-xr-xtools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh2
-rw-r--r--tools/testing/selftests/net/tcp_ao/seq-ext.c2
-rwxr-xr-xtools/testing/selftests/net/test_neigh.sh366
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifiltering.sh9
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh4
51 files changed, 3916 insertions, 98 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index c6dd2a335cf4..47c293c2962f 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -34,6 +34,7 @@ reuseport_bpf_numa
reuseport_dualstack
rxtimestamp
sctp_hello
+scm_inq
scm_pidfd
scm_rights
sk_bind_sendto_listen
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 332f387615d7..b31a71f2b372 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -41,6 +41,7 @@ TEST_PROGS += netns-name.sh
TEST_PROGS += link_netns.py
TEST_PROGS += nl_netdev.py
TEST_PROGS += rtnetlink.py
+TEST_PROGS += rtnetlink_notification.sh
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -62,6 +63,7 @@ TEST_PROGS += ip_local_port_range.sh
TEST_PROGS += rps_default_mask.sh
TEST_PROGS += big_tcp.sh
TEST_PROGS += netns-sysctl.sh
+TEST_PROGS += netdev-l2addr.sh
TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -99,6 +101,7 @@ TEST_PROGS += test_vxlan_mdb.sh
TEST_PROGS += test_bridge_neigh_suppress.sh
TEST_PROGS += test_vxlan_nolocalbypass.sh
TEST_PROGS += test_bridge_backup_port.sh
+TEST_PROGS += test_neigh.sh
TEST_PROGS += fdb_flush.sh fdb_notify.sh
TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
@@ -112,6 +115,8 @@ TEST_PROGS += skf_net_off.sh
TEST_GEN_FILES += skf_net_off
TEST_GEN_FILES += tfo
TEST_PROGS += tfo_passive.sh
+TEST_PROGS += broadcast_pmtu.sh
+TEST_PROGS += ipv6_force_forwarding.sh
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 50584479540b..a4b61c6d0290 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,4 @@
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect
+TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c
new file mode 100644
index 000000000000..9d22561e7b8f
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_inq.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "../../kselftest_harness.h"
+
+#define NR_CHUNKS 100
+#define MSG_LEN 256
+
+struct scm_inq {
+ struct cmsghdr cmsghdr;
+ int inq;
+};
+
+FIXTURE(scm_inq)
+{
+ int fd[2];
+};
+
+FIXTURE_VARIANT(scm_inq)
+{
+ int type;
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, stream)
+{
+ .type = SOCK_STREAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, dgram)
+{
+ .type = SOCK_DGRAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, seqpacket)
+{
+ .type = SOCK_SEQPACKET,
+};
+
+FIXTURE_SETUP(scm_inq)
+{
+ int err;
+
+ err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd);
+ ASSERT_EQ(0, err);
+}
+
+FIXTURE_TEARDOWN(scm_inq)
+{
+ close(self->fd[0]);
+ close(self->fd[1]);
+}
+
+static void send_chunks(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_inq) *self)
+{
+ char buf[MSG_LEN] = {};
+ int i, ret;
+
+ for (i = 0; i < NR_CHUNKS; i++) {
+ ret = send(self->fd[0], buf, sizeof(buf), 0);
+ ASSERT_EQ(sizeof(buf), ret);
+ }
+}
+
+static void recv_chunks(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_inq) *self)
+{
+ struct msghdr msg = {};
+ struct iovec iov = {};
+ struct scm_inq cmsg;
+ char buf[MSG_LEN];
+ int i, ret;
+ int inq;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &cmsg;
+ msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq));
+
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+
+ for (i = 0; i < NR_CHUNKS; i++) {
+ memset(buf, 0, sizeof(buf));
+ memset(&cmsg, 0, sizeof(cmsg));
+
+ ret = recvmsg(self->fd[1], &msg, 0);
+ ASSERT_EQ(MSG_LEN, ret);
+ ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg));
+ ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len);
+ ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level);
+ ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type);
+
+ ret = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, ret);
+ ASSERT_EQ(cmsg.inq, inq);
+ }
+}
+
+TEST_F(scm_inq, basic)
+{
+ int err, inq;
+
+ err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int));
+ if (variant->type != SOCK_STREAM) {
+ ASSERT_EQ(-ENOPROTOOPT, -errno);
+ return;
+ }
+
+ ASSERT_EQ(0, err);
+
+ err = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, err);
+ ASSERT_EQ(0, inq);
+
+ send_chunks(_metadata, self);
+ recv_chunks(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile
new file mode 100644
index 000000000000..2546c45e42f7
--- /dev/null
+++ b/tools/testing/selftests/net/bench/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_MODS_DIR := page_pool
+
+TEST_PROGS += test_bench_page_pool.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile
new file mode 100644
index 000000000000..0549a16ba275
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/Makefile
@@ -0,0 +1,17 @@
+BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+obj-m += bench_page_pool.o
+bench_page_pool-y += bench_page_pool_simple.o time_bench.o
+
+all:
+ +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules
+
+clean:
+ +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean
diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
new file mode 100644
index 000000000000..cb6468adbda4
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmark module for page_pool.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/interrupt.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <net/page_pool/helpers.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+#define MY_POOL_SIZE 1024
+
+/* Makes tests selectable. Useful for perf-record to analyze a single test.
+ * Hint: Bash shells support writing binary number like: $((2#101010)
+ *
+ * # modprobe bench_page_pool_simple run_flags=$((2#100))
+ */
+static unsigned long run_flags = 0xFFFFFFFF;
+module_param(run_flags, ulong, 0);
+MODULE_PARM_DESC(run_flags, "Limit which bench test that runs");
+
+/* Count the bit number from the enum */
+enum benchmark_bit {
+ bit_run_bench_baseline,
+ bit_run_bench_no_softirq01,
+ bit_run_bench_no_softirq02,
+ bit_run_bench_no_softirq03,
+};
+
+#define bit(b) (1 << (b))
+#define enabled(b) ((run_flags & (bit(b))))
+
+/* notice time_bench is limited to U32_MAX nr loops */
+static unsigned long loops = 10000000;
+module_param(loops, ulong, 0);
+MODULE_PARM_DESC(loops, "Specify loops bench will run");
+
+/* Timing at the nanosec level, we need to know the overhead
+ * introduced by the for loop itself
+ */
+static int time_bench_for_loop(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ int i;
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+ }
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+static int time_bench_atomic_inc(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ atomic_t cnt;
+ int i;
+
+ atomic_set(&cnt, 0);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ atomic_inc(&cnt);
+ barrier(); /* avoid compiler to optimize this loop */
+ }
+ loops_cnt = atomic_read(&cnt);
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum
+ * overhead of taking+releasing a spinlock, to know the cycles that can be saved
+ * by e.g. amortizing this via bulking.
+ */
+static int time_bench_lock(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ spinlock_t lock;
+ int i;
+
+ spin_lock_init(&lock);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ spin_lock(&lock);
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+ spin_unlock(&lock);
+ }
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+/* Helper for filling some page's into ptr_ring */
+static void pp_fill_ptr_ring(struct page_pool *pp, int elems)
+{
+ /* GFP_ATOMIC needed when under run softirq */
+ gfp_t gfp_mask = GFP_ATOMIC;
+ struct page **array;
+ int i;
+
+ array = kcalloc(elems, sizeof(struct page *), gfp_mask);
+
+ for (i = 0; i < elems; i++)
+ array[i] = page_pool_alloc_pages(pp, gfp_mask);
+ for (i = 0; i < elems; i++)
+ page_pool_put_page(pp, array[i], -1, false);
+
+ kfree(array);
+}
+
+enum test_type { type_fast_path, type_ptr_ring, type_page_allocator };
+
+/* Depends on compile optimizing this function */
+static int time_bench_page_pool(struct time_bench_record *rec, void *data,
+ enum test_type type, const char *func)
+{
+ uint64_t loops_cnt = 0;
+ gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */
+ int i, err;
+
+ struct page_pool *pp;
+ struct page *page;
+
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = 0,
+ .pool_size = MY_POOL_SIZE,
+ .nid = NUMA_NO_NODE,
+ .dev = NULL, /* Only use for DMA mapping */
+ .dma_dir = DMA_BIDIRECTIONAL,
+ };
+
+ pp = page_pool_create(&pp_params);
+ if (IS_ERR(pp)) {
+ err = PTR_ERR(pp);
+ pr_warn("%s: Error(%d) creating page_pool\n", func, err);
+ goto out;
+ }
+ pp_fill_ptr_ring(pp, 64);
+
+ if (in_serving_softirq())
+ pr_warn("%s(): in_serving_softirq fast-path\n", func);
+ else
+ pr_warn("%s(): Cannot use page_pool fast-path\n", func);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ /* Common fast-path alloc that depend on in_serving_softirq() */
+ page = page_pool_alloc_pages(pp, gfp_mask);
+ if (!page)
+ break;
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+
+ /* The benchmarks purpose it to test different return paths.
+ * Compiler should inline optimize other function calls out
+ */
+ if (type == type_fast_path) {
+ /* Fast-path recycling e.g. XDP_DROP use-case */
+ page_pool_recycle_direct(pp, page);
+
+ } else if (type == type_ptr_ring) {
+ /* Normal return path */
+ page_pool_put_page(pp, page, -1, false);
+
+ } else if (type == type_page_allocator) {
+ /* Test if not pages are recycled, but instead
+ * returned back into systems page allocator
+ */
+ get_page(page); /* cause no-recycling */
+ page_pool_put_page(pp, page, -1, false);
+ put_page(page);
+ } else {
+ BUILD_BUG();
+ }
+ }
+ time_bench_stop(rec, loops_cnt);
+out:
+ page_pool_destroy(pp);
+ return loops_cnt;
+}
+
+static int time_bench_page_pool01_fast_path(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_fast_path, __func__);
+}
+
+static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_ptr_ring, __func__);
+}
+
+static int time_bench_page_pool03_slow(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_page_allocator, __func__);
+}
+
+static int run_benchmark_tests(void)
+{
+ uint32_t nr_loops = loops;
+
+ /* Baseline tests */
+ if (enabled(bit_run_bench_baseline)) {
+ time_bench_loop(nr_loops * 10, 0, "for_loop", NULL,
+ time_bench_for_loop);
+ time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL,
+ time_bench_atomic_inc);
+ time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock);
+ }
+
+ /* This test cannot activate correct code path, due to no-softirq ctx */
+ if (enabled(bit_run_bench_no_softirq01))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL,
+ time_bench_page_pool01_fast_path);
+ if (enabled(bit_run_bench_no_softirq02))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL,
+ time_bench_page_pool02_ptr_ring);
+ if (enabled(bit_run_bench_no_softirq03))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL,
+ time_bench_page_pool03_slow);
+
+ return 0;
+}
+
+static int __init bench_page_pool_simple_module_init(void)
+{
+ if (verbose)
+ pr_info("Loaded\n");
+
+ if (loops > U32_MAX) {
+ pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops,
+ U32_MAX);
+ return -ECHRNG;
+ }
+
+ run_benchmark_tests();
+
+ return 0;
+}
+module_init(bench_page_pool_simple_module_init);
+
+static void __exit bench_page_pool_simple_module_exit(void)
+{
+ if (verbose)
+ pr_info("Unloaded\n");
+}
+module_exit(bench_page_pool_simple_module_exit);
+
+MODULE_DESCRIPTION("Benchmark of page_pool simple cases");
+MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c
new file mode 100644
index 000000000000..073bb36ec5f2
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/time.h>
+
+#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */
+
+/* For concurrency testing */
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+
+/** TSC (Time-Stamp Counter) based **
+ * See: linux/time_bench.h
+ * tsc_start_clock() and tsc_stop_clock()
+ */
+
+/** Wall-clock based **
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ */
+#define PERF_FORMAT \
+ (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \
+ PERF_FORMAT_TOTAL_TIME_RUNNING)
+
+struct raw_perf_event {
+ uint64_t config; /* event */
+ uint64_t config1; /* umask */
+ struct perf_event *save;
+ char *desc;
+};
+
+/* if HT is enable a maximum of 4 events (5 if one is instructions
+ * retired can be specified, if HT is disabled a maximum of 8 (9 if
+ * one is instructions retired) can be specified.
+ *
+ * From Table 19-1. Architectural Performance Events
+ * Architectures Software Developer’s Manual Volume 3: System Programming
+ * Guide
+ */
+struct raw_perf_event perf_events[] = {
+ { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" },
+ { 0xc0, 0x00, NULL, "Instruction Retired" }
+};
+
+#define NUM_EVTS (ARRAY_SIZE(perf_events))
+
+/* WARNING: PMU config is currently broken!
+ */
+bool time_bench_PMU_config(bool enable)
+{
+ int i;
+ struct perf_event_attr perf_conf;
+ struct perf_event *perf_event;
+ int cpu;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+ pr_info("DEBUG: cpu:%d\n", cpu);
+ preempt_enable();
+
+ memset(&perf_conf, 0, sizeof(struct perf_event_attr));
+ perf_conf.type = PERF_TYPE_RAW;
+ perf_conf.size = sizeof(struct perf_event_attr);
+ perf_conf.read_format = PERF_FORMAT;
+ perf_conf.pinned = 1;
+ perf_conf.exclude_user = 1; /* No userspace events */
+ perf_conf.exclude_kernel = 0; /* Only kernel events */
+
+ for (i = 0; i < NUM_EVTS; i++) {
+ perf_conf.disabled = enable;
+ //perf_conf.disabled = (i == 0) ? 1 : 0;
+ perf_conf.config = perf_events[i].config;
+ perf_conf.config1 = perf_events[i].config1;
+ if (verbose)
+ pr_info("%s() enable PMU counter: %s\n",
+ __func__, perf_events[i].desc);
+ perf_event = perf_event_create_kernel_counter(&perf_conf, cpu,
+ NULL /* task */,
+ NULL /* overflow_handler*/,
+ NULL /* context */);
+ if (perf_event) {
+ perf_events[i].save = perf_event;
+ pr_info("%s():DEBUG perf_event success\n", __func__);
+
+ perf_event_enable(perf_event);
+ } else {
+ pr_info("%s():DEBUG perf_event is NULL\n", __func__);
+ }
+ }
+
+ return true;
+}
+
+/** Generic functions **
+ */
+
+/* Calculate stats, store results in record */
+bool time_bench_calc_stats(struct time_bench_record *rec)
+{
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+ uint64_t ns_per_call_tmp_rem = 0;
+ uint32_t ns_per_call_remainder = 0;
+ uint64_t pmc_ipc_tmp_rem = 0;
+ uint32_t pmc_ipc_remainder = 0;
+ uint32_t pmc_ipc_div = 0;
+ uint32_t invoked_cnt_precision = 0;
+ uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */
+
+ if (rec->flags & TIME_BENCH_LOOP) {
+ if (rec->invoked_cnt < 1000) {
+ pr_err("ERR: need more(>1000) loops(%llu) for timing\n",
+ rec->invoked_cnt);
+ return false;
+ }
+ if (rec->invoked_cnt > ((1ULL << 32) - 1)) {
+ /* div_u64_rem() can only support div with 32bit*/
+ pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n",
+ rec->invoked_cnt);
+ return false;
+ }
+ invoked_cnt = (uint32_t)rec->invoked_cnt;
+ }
+
+ /* TSC (Time-Stamp Counter) records */
+ if (rec->flags & TIME_BENCH_TSC) {
+ rec->tsc_interval = rec->tsc_stop - rec->tsc_start;
+ if (rec->tsc_interval == 0) {
+ pr_err("ABORT: timing took ZERO TSC time\n");
+ return false;
+ }
+ /* Calculate stats */
+ if (rec->flags & TIME_BENCH_LOOP)
+ rec->tsc_cycles = rec->tsc_interval / invoked_cnt;
+ else
+ rec->tsc_cycles = rec->tsc_interval;
+ }
+
+ /* Wall-clock time calc */
+ if (rec->flags & TIME_BENCH_WALLCLOCK) {
+ rec->time_start = rec->ts_start.tv_nsec +
+ (NANOSEC_PER_SEC * rec->ts_start.tv_sec);
+ rec->time_stop = rec->ts_stop.tv_nsec +
+ (NANOSEC_PER_SEC * rec->ts_stop.tv_sec);
+ rec->time_interval = rec->time_stop - rec->time_start;
+ if (rec->time_interval == 0) {
+ pr_err("ABORT: timing took ZERO wallclock time\n");
+ return false;
+ }
+ /* Calculate stats */
+ /*** Division in kernel it tricky ***/
+ /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */
+ /* remainder only correct because NANOSEC_PER_SEC is 10^9 */
+ rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC,
+ &rec->time_sec_remainder);
+ //TODO: use existing struct timespec records instead of div?
+
+ if (rec->flags & TIME_BENCH_LOOP) {
+ /*** Division in kernel it tricky ***/
+ /* Orig: ns = ((double)time_interval / invoked_cnt); */
+ /* First get quotient */
+ rec->ns_per_call_quotient =
+ div_u64_rem(rec->time_interval, invoked_cnt,
+ &ns_per_call_remainder);
+ /* Now get decimals .xxx precision (incorrect roundup)*/
+ ns_per_call_tmp_rem = ns_per_call_remainder;
+ invoked_cnt_precision = invoked_cnt / 1000;
+ if (invoked_cnt_precision > 0) {
+ rec->ns_per_call_decimal =
+ div_u64_rem(ns_per_call_tmp_rem,
+ invoked_cnt_precision,
+ &ns_per_call_remainder);
+ }
+ }
+ }
+
+ /* Performance Monitor Unit (PMU) counters */
+ if (rec->flags & TIME_BENCH_PMU) {
+ //FIXME: Overflow handling???
+ rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start;
+ rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start;
+
+ /* Calc Instruction Per Cycle (IPC) */
+ /* First get quotient */
+ rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk,
+ &pmc_ipc_remainder);
+ /* Now get decimals .xxx precision (incorrect roundup)*/
+ pmc_ipc_tmp_rem = pmc_ipc_remainder;
+ pmc_ipc_div = rec->pmc_clk / 1000;
+ if (pmc_ipc_div > 0) {
+ rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem,
+ pmc_ipc_div,
+ &pmc_ipc_remainder);
+ }
+ }
+
+ return true;
+}
+
+/* Generic function for invoking a loop function and calculating
+ * execution time stats. The function being called/timed is assumed
+ * to perform a tight loop, and update the timing record struct.
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+ int (*func)(struct time_bench_record *record, void *data))
+{
+ struct time_bench_record rec;
+
+ /* Setup record */
+ memset(&rec, 0, sizeof(rec)); /* zero func might not update all */
+ rec.version_abi = 1;
+ rec.loops = loops;
+ rec.step = step;
+ rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK);
+
+ /*** Loop function being timed ***/
+ if (!func(&rec, data)) {
+ pr_err("ABORT: function being timed failed\n");
+ return false;
+ }
+
+ if (rec.invoked_cnt < loops)
+ pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n",
+ rec.invoked_cnt, loops);
+
+ /* Calculate stats */
+ time_bench_calc_stats(&rec);
+
+ pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+ txt, rec.tsc_cycles, rec.ns_per_call_quotient,
+ rec.ns_per_call_decimal, rec.step, rec.time_sec,
+ rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt,
+ rec.tsc_interval);
+ if (rec.flags & TIME_BENCH_PMU)
+ pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n",
+ txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient,
+ rec.pmc_ipc_decimal);
+ return true;
+}
+
+/* Function getting invoked by kthread */
+static int invoke_test_on_cpu_func(void *private)
+{
+ struct time_bench_cpu *cpu = private;
+ struct time_bench_sync *sync = cpu->sync;
+ cpumask_t newmask = CPU_MASK_NONE;
+ void *data = cpu->data;
+
+ /* Restrict CPU */
+ cpumask_set_cpu(cpu->rec.cpu, &newmask);
+ set_cpus_allowed_ptr(current, &newmask);
+
+ /* Synchronize start of concurrency test */
+ atomic_inc(&sync->nr_tests_running);
+ wait_for_completion(&sync->start_event);
+
+ /* Start benchmark function */
+ if (!cpu->bench_func(&cpu->rec, data)) {
+ pr_err("ERROR: function being timed failed on CPU:%d(%d)\n",
+ cpu->rec.cpu, smp_processor_id());
+ } else {
+ if (verbose)
+ pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu,
+ smp_processor_id());
+ }
+ cpu->did_bench_run = true;
+
+ /* End test */
+ atomic_dec(&sync->nr_tests_running);
+ /* Wait for kthread_stop() telling us to stop */
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+ return 0;
+}
+
+void time_bench_print_stats_cpumask(const char *desc,
+ struct time_bench_cpu *cpu_tasks,
+ const struct cpumask *mask)
+{
+ uint64_t average = 0;
+ int cpu;
+ int step = 0;
+ struct sum {
+ uint64_t tsc_cycles;
+ int records;
+ } sum = { 0 };
+
+ /* Get stats */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+ struct time_bench_record *rec = &c->rec;
+
+ /* Calculate stats */
+ time_bench_calc_stats(rec);
+
+ pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+ desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient,
+ rec->ns_per_call_decimal, rec->step, rec->time_sec,
+ rec->time_sec_remainder, rec->time_interval,
+ rec->invoked_cnt, rec->tsc_interval);
+
+ /* Collect average */
+ sum.records++;
+ sum.tsc_cycles += rec->tsc_cycles;
+ step = rec->step;
+ }
+
+ if (sum.records) /* avoid div-by-zero */
+ average = sum.tsc_cycles / sum.records;
+ pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc,
+ average, sum.records, step);
+}
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+ const struct cpumask *mask, /* Support masking outsome CPUs*/
+ struct time_bench_sync *sync,
+ struct time_bench_cpu *cpu_tasks,
+ int (*func)(struct time_bench_record *record, void *data))
+{
+ int cpu, running = 0;
+
+ if (verbose) // DEBUG
+ pr_warn("%s() Started on CPU:%d\n", __func__,
+ smp_processor_id());
+
+ /* Reset sync conditions */
+ atomic_set(&sync->nr_tests_running, 0);
+ init_completion(&sync->start_event);
+
+ /* Spawn off jobs on all CPUs */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+ running++;
+ c->sync = sync; /* Send sync variable along */
+ c->data = data; /* Send opaque along */
+
+ /* Init benchmark record */
+ memset(&c->rec, 0, sizeof(struct time_bench_record));
+ c->rec.version_abi = 1;
+ c->rec.loops = loops;
+ c->rec.step = step;
+ c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC |
+ TIME_BENCH_WALLCLOCK);
+ c->rec.cpu = cpu;
+ c->bench_func = func;
+ c->task = kthread_run(invoke_test_on_cpu_func, c,
+ "time_bench%d", cpu);
+ if (IS_ERR(c->task)) {
+ pr_err("%s(): Failed to start test func\n", __func__);
+ return; /* Argh, what about cleanup?! */
+ }
+ }
+
+ /* Wait until all processes are running */
+ while (atomic_read(&sync->nr_tests_running) < running) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+ /* Kick off all CPU concurrently on completion event */
+ complete_all(&sync->start_event);
+
+ /* Wait for CPUs to finish */
+ while (atomic_read(&sync->nr_tests_running)) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+
+ /* Stop the kthreads */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+ kthread_stop(c->task);
+ }
+
+ if (verbose) // DEBUG - happens often, finish on another CPU
+ pr_warn("%s() Finished on CPU:%d\n", __func__,
+ smp_processor_id());
+}
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h
new file mode 100644
index 000000000000..e113fcf341dc
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ * for licensing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_TIME_BENCH_H
+#define _LINUX_TIME_BENCH_H
+
+/* Main structure used for recording a benchmark run */
+struct time_bench_record {
+ uint32_t version_abi;
+ uint32_t loops; /* Requested loop invocations */
+ uint32_t step; /* option for e.g. bulk invocations */
+
+ uint32_t flags; /* Measurements types enabled */
+#define TIME_BENCH_LOOP BIT(0)
+#define TIME_BENCH_TSC BIT(1)
+#define TIME_BENCH_WALLCLOCK BIT(2)
+#define TIME_BENCH_PMU BIT(3)
+
+ uint32_t cpu; /* Used when embedded in time_bench_cpu */
+
+ /* Records */
+ uint64_t invoked_cnt; /* Returned actual invocations */
+ uint64_t tsc_start;
+ uint64_t tsc_stop;
+ struct timespec64 ts_start;
+ struct timespec64 ts_stop;
+ /* PMU counters for instruction and cycles
+ * instructions counter including pipelined instructions
+ */
+ uint64_t pmc_inst_start;
+ uint64_t pmc_inst_stop;
+ /* CPU unhalted clock counter */
+ uint64_t pmc_clk_start;
+ uint64_t pmc_clk_stop;
+
+ /* Result records */
+ uint64_t tsc_interval;
+ uint64_t time_start, time_stop, time_interval; /* in nanosec */
+ uint64_t pmc_inst, pmc_clk;
+
+ /* Derived result records */
+ uint64_t tsc_cycles; // +decimal?
+ uint64_t ns_per_call_quotient, ns_per_call_decimal;
+ uint64_t time_sec;
+ uint32_t time_sec_remainder;
+ uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */
+};
+
+/* For synchronizing parallel CPUs to run concurrently */
+struct time_bench_sync {
+ atomic_t nr_tests_running;
+ struct completion start_event;
+};
+
+/* Keep track of CPUs executing our bench function.
+ *
+ * Embed a time_bench_record for storing info per cpu
+ */
+struct time_bench_cpu {
+ struct time_bench_record rec;
+ struct time_bench_sync *sync; /* back ptr */
+ struct task_struct *task;
+ /* "data" opaque could have been placed in time_bench_sync,
+ * but to avoid any false sharing, place it per CPU
+ */
+ void *data;
+ /* Support masking outsome CPUs, mark if it ran */
+ bool did_bench_run;
+ /* int cpu; // note CPU stored in time_bench_record */
+ int (*bench_func)(struct time_bench_record *record, void *data);
+};
+
+/*
+ * Below TSC assembler code is not compatible with other archs, and
+ * can also fail on guests if cpu-flags are not correct.
+ *
+ * The way TSC reading is used, many iterations, does not require as
+ * high accuracy as described below (in Intel Doc #324264).
+ *
+ * Considering changing to use get_cycles() (#include <asm/timex.h>).
+ */
+
+/** TSC (Time-Stamp Counter) based **
+ * Recommend reading, to understand details of reading TSC accurately:
+ * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel"
+ *
+ * Consider getting exclusive ownership of CPU by using:
+ * unsigned long flags;
+ * preempt_disable();
+ * raw_local_irq_save(flags);
+ * _your_code_
+ * raw_local_irq_restore(flags);
+ * preempt_enable();
+ *
+ * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx"
+ * RDTSC only change "%rax" and "%rdx" but
+ * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx)
+ */
+static __always_inline uint64_t tsc_start_clock(void)
+{
+ /* See: Intel Doc #324264 */
+ unsigned int hi, lo;
+
+ asm volatile("CPUID\n\t"
+ "RDTSC\n\t"
+ "mov %%edx, %0\n\t"
+ "mov %%eax, %1\n\t"
+ : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+ //FIXME: on 32bit use clobbered %eax + %edx
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+static __always_inline uint64_t tsc_stop_clock(void)
+{
+ /* See: Intel Doc #324264 */
+ unsigned int hi, lo;
+
+ asm volatile("RDTSCP\n\t"
+ "mov %%edx, %0\n\t"
+ "mov %%eax, %1\n\t"
+ "CPUID\n\t"
+ : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+/** Wall-clock based **
+ *
+ * use: getnstimeofday()
+ * getnstimeofday(&rec->ts_start);
+ * getnstimeofday(&rec->ts_stop);
+ *
+ * API changed see: Documentation/core-api/timekeeping.rst
+ * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday
+ *
+ * We should instead use: ktime_get_real_ts64() is a direct
+ * replacement, but consider using monotonic time (ktime_get_ts64())
+ * and/or a ktime_t based interface (ktime_get()/ktime_get_real()).
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ *
+ * Needed for calculating: Instructions Per Cycle (IPC)
+ * - The IPC number tell how efficient the CPU pipelining were
+ */
+//lookup: perf_event_create_kernel_counter()
+
+bool time_bench_PMU_config(bool enable);
+
+/* Raw reading via rdpmc() using fixed counters
+ *
+ * From: https://github.com/andikleen/simple-pmu
+ */
+enum {
+ FIXED_SELECT = (1U << 30), /* == 0x40000000 */
+ FIXED_INST_RETIRED_ANY = 0,
+ FIXED_CPU_CLK_UNHALTED_CORE = 1,
+ FIXED_CPU_CLK_UNHALTED_REF = 2,
+};
+
+static __always_inline unsigned int long long p_rdpmc(unsigned int in)
+{
+ unsigned int d, a;
+
+ asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory");
+ return ((unsigned long long)d << 32) | a;
+}
+
+/* These PMU counter needs to be enabled, but I don't have the
+ * configure code implemented. My current hack is running:
+ * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko
+ */
+/* Reading all pipelined instruction */
+static __always_inline unsigned long long pmc_inst(void)
+{
+ return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY);
+}
+
+/* Reading CPU clock cycles */
+static __always_inline unsigned long long pmc_clk(void)
+{
+ return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE);
+}
+
+/* Raw reading via MSR rdmsr() is likely wrong
+ * FIXME: How can I know which raw MSR registers are conf for what?
+ */
+#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */
+#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */
+#define MSR_IA32_PCM2 0x400000C3
+static inline uint64_t msr_inst(unsigned long long *msr_result)
+{
+ return rdmsrq_safe(MSR_IA32_PCM0, msr_result);
+}
+
+/** Generic functions **
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+ int (*func)(struct time_bench_record *rec, void *data));
+bool time_bench_calc_stats(struct time_bench_record *rec);
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+ const struct cpumask *mask, /* Support masking outsome CPUs*/
+ struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks,
+ int (*func)(struct time_bench_record *record, void *data));
+void time_bench_print_stats_cpumask(const char *desc,
+ struct time_bench_cpu *cpu_tasks,
+ const struct cpumask *mask);
+
+//FIXME: use rec->flags to select measurement, should be MACRO
+static __always_inline void time_bench_start(struct time_bench_record *rec)
+{
+ //getnstimeofday(&rec->ts_start);
+ ktime_get_real_ts64(&rec->ts_start);
+ if (rec->flags & TIME_BENCH_PMU) {
+ rec->pmc_inst_start = pmc_inst();
+ rec->pmc_clk_start = pmc_clk();
+ }
+ rec->tsc_start = tsc_start_clock();
+}
+
+static __always_inline void time_bench_stop(struct time_bench_record *rec,
+ uint64_t invoked_cnt)
+{
+ rec->tsc_stop = tsc_stop_clock();
+ if (rec->flags & TIME_BENCH_PMU) {
+ rec->pmc_inst_stop = pmc_inst();
+ rec->pmc_clk_stop = pmc_clk();
+ }
+ //getnstimeofday(&rec->ts_stop);
+ ktime_get_real_ts64(&rec->ts_stop);
+ rec->invoked_cnt = invoked_cnt;
+}
+
+#endif /* _LINUX_TIME_BENCH_H */
diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
new file mode 100755
index 000000000000..7b8b18cfedce
--- /dev/null
+++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+set -e
+
+DRIVER="./page_pool/bench_page_pool.ko"
+result=""
+
+function run_test()
+{
+ rmmod "bench_page_pool.ko" || true
+ insmod $DRIVER > /dev/null 2>&1
+ result=$(dmesg | tail -10)
+ echo "$result"
+
+ echo
+ echo "Fast path results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+ echo
+ echo "ptr_ring results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+ echo
+ echo "slow path results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+}
+
+run_test
+
+exit 0
diff --git a/tools/testing/selftests/net/broadcast_pmtu.sh b/tools/testing/selftests/net/broadcast_pmtu.sh
new file mode 100755
index 000000000000..726eb5d25839
--- /dev/null
+++ b/tools/testing/selftests/net/broadcast_pmtu.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Ensures broadcast route MTU is respected
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP4="192.168.0.1/24"
+CLIENT_BROADCAST_ADDRESS="192.168.0.255"
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP4="192.168.0.2/24"
+
+setup() {
+ ip netns add "${CLIENT_NS}"
+ ip netns add "${SERVER_NS}"
+
+ ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" link set link0 mtu 9000
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}" dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+ ip -net "${SERVER_NS}" link set link1 mtu 1500
+ ip -net "${SERVER_NS}" addr add "${SERVER_IP4}" dev link1
+
+ read -r -a CLIENT_BROADCAST_ENTRY <<< "$(ip -net "${CLIENT_NS}" route show table local type broadcast)"
+ ip -net "${CLIENT_NS}" route del "${CLIENT_BROADCAST_ENTRY[@]}"
+ ip -net "${CLIENT_NS}" route add "${CLIENT_BROADCAST_ENTRY[@]}" mtu 1500
+
+ ip net exec "${SERVER_NS}" sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0
+}
+
+cleanup() {
+ ip -net "${SERVER_NS}" link del link1
+ ip netns del "${CLIENT_NS}"
+ ip netns del "${SERVER_NS}"
+}
+
+trap cleanup EXIT
+
+setup &&
+ echo "Testing for broadcast route MTU" &&
+ ip net exec "${CLIENT_NS}" ping -f -M want -q -c 1 -s 8000 -w 1 -b "${CLIENT_BROADCAST_ADDRESS}" > /dev/null 2>&1
+
+exit $?
+
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 3cfef5153823..c24417d0047b 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -30,16 +30,25 @@ CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NF_CONNTRACK=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_SIT=y
CONFIG_NF_NAT=m
CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_RAW=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IPV6_GRE=m
CONFIG_IPV6_SEG6_LWTUNNEL=y
@@ -57,6 +66,8 @@ CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_NAT=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
+CONFIG_NETFILTER_XT_NAT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GACT=m
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 00bde7b6f39e..d7bb2e80e88c 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -102,6 +102,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
vxlan_bridge_1d_port_8472.sh \
vxlan_bridge_1d.sh \
vxlan_bridge_1q_ipv6.sh \
+ vxlan_bridge_1q_mc_ul.sh \
vxlan_bridge_1q_port_8472_ipv6.sh \
vxlan_bridge_1q_port_8472.sh \
vxlan_bridge_1q.sh \
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 508f3c700d71..890b3374dacd 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -37,6 +37,7 @@ declare -A NETIFS=(
: "${TEAMD:=teamd}"
: "${MCD:=smcrouted}"
: "${MC_CLI:=smcroutectl}"
+: "${MCD_TABLE_NAME:=selftests}"
# Constants for netdevice bring-up:
# Default time in seconds to wait for an interface to come up before giving up
@@ -141,6 +142,20 @@ check_tc_version()
fi
}
+check_tc_erspan_support()
+{
+ local dev=$1; shift
+
+ tc filter add dev $dev ingress pref 1 handle 1 flower \
+ erspan_opts 1:0:0:0 &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing erspan support"
+ return $ksft_skip
+ fi
+ tc filter del dev $dev ingress pref 1 handle 1 flower \
+ erspan_opts 1:0:0:0 &> /dev/null
+}
+
# Old versions of tc don't understand "mpls_uc"
check_tc_mpls_support()
{
@@ -525,9 +540,9 @@ setup_wait_dev_with_timeout()
return 1
}
-setup_wait()
+setup_wait_n()
{
- local num_netifs=${1:-$NUM_NETIFS}
+ local num_netifs=$1; shift
local i
for ((i = 1; i <= num_netifs; ++i)); do
@@ -538,6 +553,11 @@ setup_wait()
sleep $WAIT_TIME
}
+setup_wait()
+{
+ setup_wait_n "$NUM_NETIFS"
+}
+
wait_for_dev()
{
local dev=$1; shift
@@ -1757,6 +1777,51 @@ mc_send()
msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
}
+adf_mcd_start()
+{
+ local ifs=("$@")
+
+ local table_name="$MCD_TABLE_NAME"
+ local smcroutedir
+ local pid
+ local if
+ local i
+
+ check_command "$MCD" || return 1
+ check_command "$MC_CLI" || return 1
+
+ smcroutedir=$(mktemp -d)
+ defer rm -rf "$smcroutedir"
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ echo "phyint ${NETIFS[p$i]} enable" >> \
+ "$smcroutedir/$table_name.conf"
+ done
+
+ for if in "${ifs[@]}"; do
+ if ! ip_link_has_flag "$if" MULTICAST; then
+ ip link set dev "$if" multicast on
+ defer ip link set dev "$if" multicast off
+ fi
+
+ echo "phyint $if enable" >> \
+ "$smcroutedir/$table_name.conf"
+ done
+
+ "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \
+ -P "$smcroutedir/$table_name.pid"
+ busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid"
+ pid=$(cat "$smcroutedir/$table_name.pid")
+ defer kill_process "$pid"
+}
+
+mc_cli()
+{
+ local table_name="$MCD_TABLE_NAME"
+
+ "$MC_CLI" -I "$table_name" "$@"
+}
+
start_ip_monitor()
{
local mtype=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
index 5a58b1ec8aef..83e52abdbc2e 100755
--- a/tools/testing/selftests/net/forwarding/router_multicast.sh
+++ b/tools/testing/selftests/net/forwarding/router_multicast.sh
@@ -33,10 +33,6 @@ NUM_NETIFS=6
source lib.sh
source tc_common.sh
-require_command $MCD
-require_command $MC_CLI
-table_name=selftests
-
h1_create()
{
simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64
@@ -149,25 +145,6 @@ router_destroy()
ip link set dev $rp1 down
}
-start_mcd()
-{
- SMCROUTEDIR="$(mktemp -d)"
-
- for ((i = 1; i <= $NUM_NETIFS; ++i)); do
- echo "phyint ${NETIFS[p$i]} enable" >> \
- $SMCROUTEDIR/$table_name.conf
- done
-
- $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
- -P $SMCROUTEDIR/$table_name.pid
-}
-
-kill_mcd()
-{
- pkill $MCD
- rm -rf $SMCROUTEDIR
-}
-
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -179,7 +156,7 @@ setup_prepare()
rp3=${NETIFS[p5]}
h3=${NETIFS[p6]}
- start_mcd
+ adf_mcd_start || exit "$EXIT_STATUS"
vrf_prepare
@@ -206,7 +183,7 @@ cleanup()
vrf_cleanup
- kill_mcd
+ defer_scopes_cleanup
}
create_mcast_sg()
@@ -214,9 +191,9 @@ create_mcast_sg()
local if_name=$1; shift
local s_addr=$1; shift
local mcast=$1; shift
- local dest_ifs=${@}
+ local dest_ifs=("${@}")
- $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs
+ mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
}
delete_mcast_sg()
@@ -224,9 +201,9 @@ delete_mcast_sg()
local if_name=$1; shift
local s_addr=$1; shift
local mcast=$1; shift
- local dest_ifs=${@}
+ local dest_ifs=("${@}")
- $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs
+ mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
}
mcast_v4()
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index b1daad19b01e..b58909a93112 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -6,7 +6,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
match_ip_tos_test match_indev_test match_ip_ttl_test
match_mpls_label_test \
match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \
- match_mpls_lse_test"
+ match_mpls_lse_test match_erspan_opts_test"
NUM_NETIFS=2
source tc_common.sh
source lib.sh
@@ -676,6 +676,56 @@ match_mpls_lse_test()
log_test "mpls lse match ($tcflags)"
}
+match_erspan_opts_test()
+{
+ RET=0
+
+ check_tc_erspan_support $h2 || return 0
+
+ # h1 erspan setup
+ tunnel_create erspan1 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1001 \
+ tos C ttl 64 erspan_ver 1 erspan 6789 # ERSPAN Type II
+ tunnel_create erspan2 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1002 \
+ tos C ttl 64 erspan_ver 2 erspan_dir egress erspan_hwid 63 \
+ # ERSPAN Type III
+ ip link set dev erspan1 master v$h1
+ ip link set dev erspan2 master v$h1
+ # h2 erspan setup
+ ip link add ep-ex type erspan ttl 64 external # To collect tunnel info
+ ip link set ep-ex up
+ ip link set dev ep-ex master v$h2
+ tc qdisc add dev ep-ex clsact
+
+ # ERSPAN Type II [decap direction]
+ tc filter add dev ep-ex ingress protocol ip handle 101 flower \
+ $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+ enc_key_id 1001 erspan_opts 1:6789:0:0 \
+ action drop
+ # ERSPAN Type III [decap direction]
+ tc filter add dev ep-ex ingress protocol ip handle 102 flower \
+ $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+ enc_key_id 1002 erspan_opts 2:0:1:63 action drop
+
+ ep1mac=$(mac_get erspan1)
+ $MZ erspan1 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+ tc_check_packets "dev ep-ex ingress" 101 1
+ check_err $? "ERSPAN Type II"
+
+ ep2mac=$(mac_get erspan2)
+ $MZ erspan2 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+ tc_check_packets "dev ep-ex ingress" 102 1
+ check_err $? "ERSPAN Type III"
+
+ # h2 erspan cleanup
+ tc qdisc del dev ep-ex clsact
+ tunnel_destroy ep-ex
+ # h1 erspan cleanup
+ tunnel_destroy erspan2 # ERSPAN Type III
+ tunnel_destroy erspan1 # ERSPAN Type II
+
+ log_test "erspan_opts match ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
new file mode 100755
index 000000000000..462db0b603e7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
@@ -0,0 +1,771 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------------------------+
+# | + $h1.10 + $h1.20 |
+# | | 192.0.2.1/28 | 2001:db8:1::1/64 |
+# | \________ ________/ |
+# | \ / |
+# | + $h1 H1 (vrf) |
+# +-----------|-----------------------------+
+# |
+# +-----------|----------------------------------------------------------------+
+# | +---------|--------------------------------------+ SWITCH (main vrf) |
+# | | + $swp1 BR1 (802.1q) | |
+# | | vid 10 20 | |
+# | | | |
+# | | + vx10 (vxlan) + vx20 (vxlan) | + lo10 (dummy) |
+# | | local 192.0.2.100 local 2001:db8:4::1 | 192.0.2.100/28 |
+# | | group 233.252.0.1 group ff0e::1:2:3 | 2001:db8:4::1/64 |
+# | | id 1000 id 2000 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | +------------------------------------------------+ |
+# | |
+# | + $swp2 $swp3 + |
+# | | 192.0.2.33/28 192.0.2.65/28 | |
+# | | 2001:db8:2::1/64 2001:db8:3::1/64 | |
+# | | | |
+# +---|--------------------------------------------------------------------|---+
+# | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | | H2 (vrf) | | H3 (vrf) | |
+# | +-|----------------------------+ | | +-----------------------------|-+ |
+# | | + $h2 BR2 (802.1d) | | | | BR3 (802.1d) $h3 + | |
+# | | | | | | | |
+# | | + v1$h2 (veth) | | | | v1$h3 (veth) + | |
+# | +-|----------------------------+ | | +-----------------------------|-+ |
+# | | | | | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | + v2$h2 (veth) NS2 (netns) | | NS3 (netns) v2$h3 (veth) + |
+# | 192.0.2.34/28 | | 192.0.2.66/28 |
+# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 |
+# | | | |
+# | +--------------------------------+ | | +--------------------------------+ |
+# | | BR1 (802.1q) | | | | BR1 (802.1q) | |
+# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | |
+# | | local 192.0.2.34 | | | | local 192.0.2.50 | |
+# | | group 233.252.0.1 dev v2$h2 | | | | group 233.252.0.1 dev v2$h3 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | |
+# | | | | | | | |
+# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | |
+# | | local 2001:db8:2::2 | | | | local 2001:db8:3::2 | |
+# | | group ff0e::1:2:3 dev v2$h2 | | | | group ff0e::1:2:3 dev v2$h3 | |
+# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | |
+# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 10 20 | | | | | vid 10 20 | |
+# | +--|-----------------------------+ | | +--|-----------------------------+ |
+# | | | | | |
+# | +--|-----------------------------+ | | +--|-----------------------------+ |
+# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | |
+# | | |\ | | | | |\ | |
+# | | | + w2.10 | | | | | + w2.10 | |
+# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | |
+# | | | | | | | | | |
+# | | + w2.20 | | | | + w2.20 | |
+# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | |
+# | +--------------------------------+ | | +--------------------------------+ |
+# +------------------------------------+ +------------------------------------+
+#
+#shellcheck disable=SC2317 # SC doesn't see our uses of functions.
+
+: "${VXPORT:=4789}"
+export VXPORT
+
+: "${GROUP4:=233.252.0.1}"
+export GROUP4
+
+: "${GROUP6:=ff0e::1:2:3}"
+export GROUP6
+
+: "${IPMR:=lo10}"
+
+ALL_TESTS="
+ ipv4_nomcroute
+ ipv4_mcroute
+ ipv4_mcroute_changelink
+ ipv4_mcroute_starg
+ ipv4_mcroute_noroute
+ ipv4_mcroute_fdb
+ ipv4_mcroute_fdb_oif0
+ ipv4_mcroute_fdb_oif0_sep
+
+ ipv6_nomcroute
+ ipv6_mcroute
+ ipv6_mcroute_changelink
+ ipv6_mcroute_starg
+ ipv6_mcroute_noroute
+ ipv6_mcroute_fdb
+ ipv6_mcroute_fdb_oif0
+
+ ipv4_nomcroute_rx
+ ipv4_mcroute_rx
+ ipv4_mcroute_starg_rx
+ ipv4_mcroute_fdb_oif0_sep_rx
+ ipv4_mcroute_fdb_sep_rx
+
+ ipv6_nomcroute_rx
+ ipv6_mcroute_rx
+ ipv6_mcroute_starg_rx
+ ipv6_mcroute_fdb_sep_rx
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init "$h1"
+ defer simple_if_fini "$h1"
+
+ ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10
+ ip_link_set_up "$h1.10"
+ ip_addr_add "$h1.10" 192.0.2.1/28
+
+ ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20
+ ip_link_set_up "$h1.20"
+ ip_addr_add "$h1.20" 2001:db8:1::1/64
+}
+
+install_capture()
+{
+ local dev=$1; shift
+
+ tc qdisc add dev "$dev" clsact
+ defer tc qdisc del dev "$dev" clsact
+
+ tc filter add dev "$dev" ingress proto ip pref 104 \
+ flower skip_hw ip_proto udp dst_port "$VXPORT" \
+ action pass
+ defer tc filter del dev "$dev" ingress proto ip pref 104
+
+ tc filter add dev "$dev" ingress proto ipv6 pref 106 \
+ flower skip_hw ip_proto udp dst_port "$VXPORT" \
+ action pass
+ defer tc filter del dev "$dev" ingress proto ipv6 pref 106
+}
+
+h2_create()
+{
+ # $h2
+ ip_link_set_up "$h2"
+
+ # H2
+ vrf_create "v$h2"
+ defer vrf_destroy "v$h2"
+
+ ip_link_set_up "v$h2"
+
+ # br2
+ ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0
+ ip_link_set_master br2 "v$h2"
+ ip_link_set_up br2
+
+ # $h2
+ ip_link_set_master "$h2" br2
+ install_capture "$h2"
+
+ # v1$h2
+ ip_link_set_up "v1$h2"
+ ip_link_set_master "v1$h2" br2
+}
+
+h3_create()
+{
+ # $h3
+ ip_link_set_up "$h3"
+
+ # H3
+ vrf_create "v$h3"
+ defer vrf_destroy "v$h3"
+
+ ip_link_set_up "v$h3"
+
+ # br3
+ ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0
+ ip_link_set_master br3 "v$h3"
+ ip_link_set_up br3
+
+ # $h3
+ ip_link_set_master "$h3" br3
+ install_capture "$h3"
+
+ # v1$h3
+ ip_link_set_up "v1$h3"
+ ip_link_set_master "v1$h3" br3
+}
+
+switch_create()
+{
+ local swp1_mac
+
+ # br1
+ swp1_mac=$(mac_get "$swp1")
+ ip_link_add br1 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ ip_link_set_addr br1 "$swp1_mac"
+ ip_link_set_up br1
+
+ # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on
+ # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test.
+ ip_link_add "X$IPMR" up type dummy
+
+ # IPMR
+ ip_link_add "$IPMR" up type dummy
+ ip_addr_add "$IPMR" 192.0.2.100/28
+ ip_addr_add "$IPMR" 2001:db8:4::1/64
+
+ # $swp1
+ ip_link_set_up "$swp1"
+ ip_link_set_master "$swp1" br1
+ bridge_vlan_add vid 10 dev "$swp1"
+ bridge_vlan_add vid 20 dev "$swp1"
+
+ # $swp2
+ ip_link_set_up "$swp2"
+ ip_addr_add "$swp2" 192.0.2.33/28
+ ip_addr_add "$swp2" 2001:db8:2::1/64
+
+ # $swp3
+ ip_link_set_up "$swp3"
+ ip_addr_add "$swp3" 192.0.2.65/28
+ ip_addr_add "$swp3" 2001:db8:3::1/64
+}
+
+vx_create()
+{
+ local name=$1; shift
+ local vid=$1; shift
+
+ ip_link_add "$name" up type vxlan dstport "$VXPORT" \
+ nolearning noudpcsum tos inherit ttl 16 \
+ "$@"
+ ip_link_set_master "$name" br1
+ bridge_vlan_add vid "$vid" dev "$name" pvid untagged
+}
+export -f vx_create
+
+vx_wait()
+{
+ # Wait for all the ARP, IGMP etc. noise to settle down so that the
+ # tunnel is clear for measurements.
+ sleep 10
+}
+
+vx10_create()
+{
+ vx_create vx10 10 id 1000 "$@"
+}
+export -f vx10_create
+
+vx20_create()
+{
+ vx_create vx20 20 id 2000 "$@"
+}
+export -f vx20_create
+
+vx10_create_wait()
+{
+ vx10_create "$@"
+ vx_wait
+}
+
+vx20_create_wait()
+{
+ vx20_create "$@"
+ vx_wait
+}
+
+ns_init_common()
+{
+ local ns=$1; shift
+ local if_in=$1; shift
+ local ipv4_in=$1; shift
+ local ipv6_in=$1; shift
+ local ipv4_host=$1; shift
+ local ipv6_host=$1; shift
+
+ # v2$h2 / v2$h3
+ ip_link_set_up "$if_in"
+ ip_addr_add "$if_in" "$ipv4_in"
+ ip_addr_add "$if_in" "$ipv6_in"
+
+ # br1
+ ip_link_add br1 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ ip_link_set_up br1
+
+ # vx10, vx20
+ vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in"
+ vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in"
+
+ # w1
+ ip_link_add w1 type veth peer name w2
+ ip_link_set_master w1 br1
+ ip_link_set_up w1
+ bridge_vlan_add vid 10 dev w1
+ bridge_vlan_add vid 20 dev w1
+
+ # w2
+ simple_if_init w2
+ defer simple_if_fini w2
+
+ # w2.10
+ ip_link_add w2.10 master vw2 link w2 type vlan id 10
+ ip_link_set_up w2.10
+ ip_addr_add w2.10 "$ipv4_host"
+
+ # w2.20
+ ip_link_add w2.20 master vw2 link w2 type vlan id 20
+ ip_link_set_up w2.20
+ ip_addr_add w2.20 "$ipv6_host"
+}
+export -f ns_init_common
+
+ns2_create()
+{
+ # NS2
+ ip netns add ns2
+ defer ip netns del ns2
+
+ # v2$h2
+ ip link set dev "v2$h2" netns ns2
+ defer ip -n ns2 link set dev "v2$h2" netns 1
+
+ in_ns ns2 \
+ ns_init_common ns2 "v2$h2" \
+ 192.0.2.34/28 2001:db8:2::2/64 \
+ 192.0.2.3/28 2001:db8:1::3/64
+}
+
+ns3_create()
+{
+ # NS3
+ ip netns add ns3
+ defer ip netns del ns3
+
+ # v2$h3
+ ip link set dev "v2$h3" netns ns3
+ defer ip -n ns3 link set dev "v2$h3" netns 1
+
+ ip -n ns3 link set dev "v2$h3" up
+
+ in_ns ns3 \
+ ns_init_common ns3 "v2$h3" \
+ 192.0.2.66/28 2001:db8:3::2/64 \
+ 192.0.2.4/28 2001:db8:1::4/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ defer vrf_cleanup
+
+ forwarding_enable
+ defer forwarding_restore
+
+ ip_link_add "v1$h2" type veth peer name "v2$h2"
+ ip_link_add "v1$h3" type veth peer name "v2$h3"
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+ ns2_create
+ ns3_create
+}
+
+adf_install_broken_sg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+ defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+
+ mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+ defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+}
+
+adf_install_rx()
+{
+ mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR"
+ defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10
+
+ mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR"
+ defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10
+
+ mc_cli add "$swp2" :: "$GROUP6" "$IPMR"
+ defer mc_cli remove "$swp2" :: "$GROUP6" lo10
+
+ mc_cli add "$swp3" :: "$GROUP6" "$IPMR"
+ defer mc_cli remove "$swp3" :: "$GROUP6" lo10
+}
+
+adf_install_sg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+adf_install_sg_sep()
+{
+ adf_mcd_start lo || exit "$EXIT_STATUS"
+
+ mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+}
+
+adf_install_sg_sep_rx()
+{
+ local lo=$1; shift
+
+ adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS"
+
+ mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+adf_install_starg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+do_packets_v4()
+{
+ local mac
+
+ mac=$(mac_get "$h2")
+ "$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+ -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q
+}
+
+do_packets_v6()
+{
+ local mac
+
+ mac=$(mac_get "$h2")
+ "$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q
+}
+
+do_test()
+{
+ local ipv=$1; shift
+ local expect_h2=$1; shift
+ local expect_h3=$1; shift
+ local what=$1; shift
+
+ local pref=$((100 + ipv))
+ local t0_h2
+ local t0_h3
+ local t1_h2
+ local t1_h3
+ local d_h2
+ local d_h3
+
+ RET=0
+
+ t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+ t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+ "do_packets_v$ipv"
+ sleep 1
+
+ t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+ t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+ d_h2=$((t1_h2 - t0_h2))
+ d_h3=$((t1_h3 - t0_h3))
+
+ ((d_h2 == expect_h2))
+ check_err $? "Expected $expect_h2 packets on H2, got $d_h2"
+
+ ((d_h3 == expect_h3))
+ check_err $? "Expected $expect_h3 packets on H3, got $d_h3"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv4_do_test_rx()
+{
+ local h3_should_fail=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ping_do "$h1.10" 192.0.2.3
+ check_err $? "H2 should respond"
+
+ ping_do "$h1.10" 192.0.2.4
+ check_err_fail "$h3_should_fail" $? "H3 responds"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv6_do_test_rx()
+{
+ local h3_should_fail=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ping6_do "$h1.20" 2001:db8:1::3
+ check_err $? "H2 should respond"
+
+ ping6_do "$h1.20" 2001:db8:1::4
+ check_err_fail "$h3_should_fail" $? "H3 responds"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv4_nomcroute()
+{
+ # Install a misleading (S,G) rule to attempt to trick the system into
+ # pushing the packets elsewhere.
+ adf_install_broken_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ do_test 4 10 0 "IPv4 nomcroute"
+}
+
+ipv6_nomcroute()
+{
+ # Like for IPv4, install a misleading (S,G).
+ adf_install_broken_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ do_test 6 10 0 "IPv6 nomcroute"
+}
+
+ipv4_nomcroute_rx()
+{
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ ipv4_do_test_rx 1 "IPv4 nomcroute ping"
+}
+
+ipv6_nomcroute_rx()
+{
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ ipv6_do_test_rx 1 "IPv6 nomcroute ping"
+}
+
+ipv4_mcroute()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 10 10 "IPv4 mcroute"
+}
+
+ipv6_mcroute()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 10 10 "IPv6 mcroute"
+}
+
+ipv4_mcroute_rx()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ ipv4_do_test_rx 0 "IPv4 mcroute ping"
+}
+
+ipv6_mcroute_rx()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ipv6_do_test_rx 0 "IPv6 mcroute ping"
+}
+
+ipv4_mcroute_changelink()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR"
+ ip link set dev vx10 type vxlan mcroute
+ sleep 1
+ do_test 4 10 10 "IPv4 mcroute changelink"
+}
+
+ipv6_mcroute_changelink()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ip link set dev vx20 type vxlan mcroute
+ sleep 1
+ do_test 6 10 10 "IPv6 mcroute changelink"
+}
+
+ipv4_mcroute_starg()
+{
+ adf_install_starg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 10 10 "IPv4 mcroute (*,G)"
+}
+
+ipv6_mcroute_starg()
+{
+ adf_install_starg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 10 10 "IPv6 mcroute (*,G)"
+}
+
+ipv4_mcroute_starg_rx()
+{
+ adf_install_starg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping"
+}
+
+ipv6_mcroute_starg_rx()
+{
+ adf_install_starg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping"
+}
+
+ipv4_mcroute_noroute()
+{
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 0 0 "IPv4 mcroute, no route"
+}
+
+ipv6_mcroute_noroute()
+{
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 0 0 "IPv6 mcroute, no route"
+}
+
+ipv4_mcroute_fdb()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute
+ bridge fdb add dev vx10 \
+ 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR"
+ do_test 4 10 10 "IPv4 mcroute FDB"
+}
+
+ipv6_mcroute_fdb()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute
+ bridge -6 fdb add dev vx20 \
+ 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR"
+ do_test 6 10 10 "IPv6 mcroute FDB"
+}
+
+# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup.
+ipv4_mcroute_fdb_oif0()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ do_test 4 10 10 "IPv4 mcroute oif=0"
+}
+
+ipv6_mcroute_fdb_oif0()
+{
+ # The IPv6 tunnel lookup does not fall back to selection by source
+ # address. Instead it just does a FIB match, and that would find one of
+ # the several ff00::/8 multicast routes -- each device has one. In order
+ # to reliably force the $IPMR device, add a /128 route for the
+ # destination group address.
+ ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR"
+ defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR"
+
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ bridge -6 fdb del dev vx20 00:00:00:00:00:00
+ bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6"
+ do_test 6 10 10 "IPv6 mcroute oif=0"
+}
+
+# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR.
+# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT.
+ipv4_mcroute_fdb_oif0_sep()
+{
+ adf_install_sg_sep
+
+ ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0"
+}
+
+ipv4_mcroute_fdb_oif0_sep_rx()
+{
+ adf_install_sg_sep_rx lo
+
+ ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping"
+}
+
+ipv4_mcroute_fdb_sep_rx()
+{
+ adf_install_sg_sep_rx lo
+
+ ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add \
+ dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo
+ ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping"
+}
+
+ipv6_mcroute_fdb_sep_rx()
+{
+ adf_install_sg_sep_rx "X$IPMR"
+
+ ip_addr_add "X$IPMR" 2001:db8:5::1/64
+ vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute
+ bridge -6 fdb del dev vx20 00:00:00:00:00:00
+ bridge -6 fdb add dev vx20 00:00:00:00:00:00 \
+ self static dst "$GROUP6" via "X$IPMR"
+ ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh
new file mode 100755
index 000000000000..bf0243366caa
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test IPv6 force_forwarding interface property
+#
+# This test verifies that the force_forwarding property works correctly:
+# - When global forwarding is disabled, packets are not forwarded normally
+# - When force_forwarding is enabled on an interface, packets are forwarded
+# regardless of the global forwarding setting
+
+source lib.sh
+
+cleanup() {
+ cleanup_ns $ns1 $ns2 $ns3
+}
+
+trap cleanup EXIT
+
+setup_test() {
+ # Create three namespaces: sender, router, receiver
+ setup_ns ns1 ns2 ns3
+
+ # Create veth pairs: ns1 <-> ns2 <-> ns3
+ ip link add name veth12 type veth peer name veth21
+ ip link add name veth23 type veth peer name veth32
+
+ # Move interfaces to namespaces
+ ip link set veth12 netns $ns1
+ ip link set veth21 netns $ns2
+ ip link set veth23 netns $ns2
+ ip link set veth32 netns $ns3
+
+ # Configure interfaces
+ ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 nodad
+ ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 nodad
+ ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 nodad
+ ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 nodad
+
+ # Bring up interfaces
+ ip -n $ns1 link set veth12 up
+ ip -n $ns2 link set veth21 up
+ ip -n $ns2 link set veth23 up
+ ip -n $ns3 link set veth32 up
+
+ # Add routes
+ ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2
+ ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1
+
+ # Disable global forwarding
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0
+}
+
+test_force_forwarding() {
+ local ret=0
+
+ echo "TEST: force_forwarding functionality"
+
+ # Check if force_forwarding sysctl exists
+ if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then
+ echo "SKIP: force_forwarding not available"
+ return $ksft_skip
+ fi
+
+ # Test 1: Without force_forwarding, ping should fail
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0
+
+ if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+ echo "FAIL: ping succeeded when forwarding disabled"
+ ret=1
+ else
+ echo "PASS: forwarding disabled correctly"
+ fi
+
+ # Test 2: With force_forwarding enabled, ping should succeed
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1
+
+ if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+ echo "PASS: force_forwarding enabled forwarding"
+ else
+ echo "FAIL: ping failed with force_forwarding enabled"
+ ret=1
+ fi
+
+ return $ret
+}
+
+echo "IPv6 force_forwarding test"
+echo "=========================="
+
+setup_test
+test_force_forwarding
+ret=$?
+
+if [ $ret -eq 0 ]; then
+ echo "OK"
+ exit 0
+elif [ $ret -eq $ksft_skip ]; then
+ echo "SKIP"
+ exit $ksft_skip
+else
+ echo "FAIL"
+ exit 1
+fi
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 86a216e9aca8..c7add0dc4c60 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -240,6 +240,29 @@ create_netdevsim() {
echo nsim$id
}
+create_netdevsim_port() {
+ local nsim_id="$1"
+ local ns="$2"
+ local port_id="$3"
+ local perm_addr="$4"
+ local orig_dev
+ local new_dev
+ local nsim_path
+
+ nsim_path="/sys/bus/netdevsim/devices/netdevsim$nsim_id"
+
+ echo "$port_id $perm_addr" | ip netns exec "$ns" tee "$nsim_path"/new_port > /dev/null || return 1
+
+ orig_dev=$(ip netns exec "$ns" find "$nsim_path"/net/ -maxdepth 1 -name 'e*' | tail -n 1)
+ orig_dev=$(basename "$orig_dev")
+ new_dev="nsim${nsim_id}p$port_id"
+
+ ip -netns "$ns" link set dev "$orig_dev" name "$new_dev"
+ ip -netns "$ns" link set dev "$new_dev" up
+
+ echo "$new_dev"
+}
+
# Remove netdevsim with given id.
cleanup_netdevsim() {
local id="$1"
@@ -547,13 +570,19 @@ ip_link_set_addr()
defer ip link set dev "$name" address "$old_addr"
}
-ip_link_is_up()
+ip_link_has_flag()
{
local name=$1; shift
+ local flag=$1; shift
local state=$(ip -j link show "$name" |
- jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"')
- [[ $state == "UP" ]]
+ jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)')
+ [[ $state == true ]]
+}
+
+ip_link_is_up()
+{
+ ip_link_has_flag "$1" UP
}
ip_link_set_up()
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 8697bd27dc30..02be28dcc089 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -6,4 +6,4 @@ from .netns import NetNS, NetNSEnter
from .nsim import *
from .utils import *
from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
-from .ynl import NetshaperFamily
+from .ynl import NetshaperFamily, DevlinkFamily
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 61287c203b6e..8e35ed12ed9e 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -32,6 +32,7 @@ class KsftTerminate(KeyboardInterrupt):
def ksft_pr(*objs, **kwargs):
+ kwargs["flush"] = True
print("#", *objs, **kwargs)
@@ -139,7 +140,7 @@ def ktap_result(ok, cnt=1, case="", comment=""):
res += "." + str(case.__name__)
if comment:
res += " # " + comment
- print(res)
+ print(res, flush=True)
def ksft_flush_defer():
@@ -227,8 +228,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
- print("TAP version 13")
- print("1.." + str(len(cases)))
+ print("TAP version 13", flush=True)
+ print("1.." + str(len(cases)), flush=True)
global KSFT_RESULT
cnt = 0
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 34470d65d871..f395c90fb0f1 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -175,6 +175,10 @@ def tool(name, args, json=None, ns=None, host=None):
return cmd_obj
+def bpftool(args, json=None, ns=None, host=None):
+ return tool('bpftool', args, json=json, ns=ns, host=host)
+
+
def ip(args, json=None, ns=None, host=None):
if ns:
args = f'-netns {ns} ' + args
@@ -185,6 +189,41 @@ def ethtool(args, json=None, ns=None, host=None):
return tool('ethtool', args, json=json, ns=ns, host=host)
+def bpftrace(expr, json=None, ns=None, host=None, timeout=None):
+ """
+ Run bpftrace and return map data (if json=True).
+ The output of bpftrace is inconvenient, so the helper converts
+ to a dict indexed by map name, e.g.:
+ {
+ "@": { ... },
+ "@map2": { ... },
+ }
+ """
+ cmd_arr = ['bpftrace']
+ # Throw in --quiet if json, otherwise the output has two objects
+ if json:
+ cmd_arr += ['-f', 'json', '-q']
+ if timeout:
+ expr += ' interval:s:' + str(timeout) + ' { exit(); }'
+ cmd_arr += ['-e', expr]
+ cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False)
+ if json:
+ # bpftrace prints objects as lines
+ ret = {}
+ for l in cmd_obj.stdout.split('\n'):
+ if not l.strip():
+ continue
+ one = _json.loads(l)
+ if one.get('type') != 'map':
+ continue
+ for k, v in one["data"].items():
+ if k.startswith('@'):
+ k = k.lstrip('@')
+ ret[k] = v
+ return ret
+ return cmd_obj
+
+
def rand_port(type=socket.SOCK_STREAM):
"""
Get a random unprivileged port.
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 6329ae805abf..2b3a61ea3bfa 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -56,3 +56,8 @@ class NetshaperFamily(YnlFamily):
def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(),
schema='', recv_size=recv_size)
+
+class DevlinkFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c
new file mode 100644
index 000000000000..521ba38f2ddd
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_ADJST_OFFSET 256
+#define MAX_PAYLOAD_LEN 5000
+#define MAX_HDR_LEN 64
+
+enum {
+ XDP_MODE = 0,
+ XDP_PORT = 1,
+ XDP_ADJST_OFFSET = 2,
+ XDP_ADJST_TAG = 3,
+} xdp_map_setup_keys;
+
+enum {
+ XDP_MODE_PASS = 0,
+ XDP_MODE_DROP = 1,
+ XDP_MODE_TX = 2,
+ XDP_MODE_TAIL_ADJST = 3,
+ XDP_MODE_HEAD_ADJST = 4,
+} xdp_map_modes;
+
+enum {
+ STATS_RX = 0,
+ STATS_PASS = 1,
+ STATS_DROP = 2,
+ STATS_TX = 3,
+ STATS_ABORT = 4,
+} xdp_stats;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __s32);
+} map_xdp_setup SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __u64);
+} map_xdp_stats SEC(".maps");
+
+static __u32 min(__u32 a, __u32 b)
+{
+ return a < b ? a : b;
+}
+
+static void record_stats(struct xdp_md *ctx, __u32 stat_type)
+{
+ __u64 *count;
+
+ count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type);
+
+ if (count)
+ __sync_fetch_and_add(count, 1);
+}
+
+static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+
+ if (data + sizeof(*eth) > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+
+ if (iph + 1 > (struct iphdr *)data_end ||
+ iph->protocol != IPPROTO_UDP)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+ ipv6h->nexthdr != IPPROTO_UDP)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+ } else {
+ return NULL;
+ }
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ if (udph->dest != bpf_htons(port))
+ return NULL;
+
+ record_stats(ctx, STATS_RX);
+
+ return udph;
+}
+
+static int xdp_mode_pass(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_PASS);
+
+ return XDP_PASS;
+}
+
+static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_DROP);
+
+ return XDP_DROP;
+}
+
+static void swap_machdr(void *data)
+{
+ struct ethhdr *eth = data;
+ __u8 tmp_mac[ETH_ALEN];
+
+ __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN);
+ __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN);
+}
+
+static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_PASS;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+ __be32 tmp_ip = iph->saddr;
+
+ if (iph + 1 > (struct iphdr *)data_end ||
+ iph->protocol != IPPROTO_UDP)
+ return XDP_PASS;
+
+ udph = data + sizeof(*iph) + sizeof(*eth);
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return XDP_PASS;
+ if (udph->dest != bpf_htons(port))
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_RX);
+ swap_machdr((void *)eth);
+
+ iph->saddr = iph->daddr;
+ iph->daddr = tmp_ip;
+
+ record_stats(ctx, STATS_TX);
+
+ return XDP_TX;
+
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ struct in6_addr tmp_ipv6;
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+ ipv6h->nexthdr != IPPROTO_UDP)
+ return XDP_PASS;
+
+ udph = data + sizeof(*ipv6h) + sizeof(*eth);
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return XDP_PASS;
+ if (udph->dest != bpf_htons(port))
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_RX);
+ swap_machdr((void *)eth);
+
+ __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6));
+ __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr,
+ sizeof(tmp_ipv6));
+ __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6));
+
+ record_stats(ctx, STATS_TX);
+
+ return XDP_TX;
+ }
+
+ return XDP_PASS;
+}
+
+static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+ __u32 len, len_new;
+
+ if (data + sizeof(*eth) > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+ __u16 total_len;
+
+ if (iph + 1 > (struct iphdr *)data_end)
+ return NULL;
+
+ iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset);
+
+ udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
+ if (!udph || udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ __u16 payload_len;
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+ if (!udph || udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ *udp_csum = ~((__u32)udph->check);
+
+ len = ipv6h->payload_len;
+ len_new = bpf_htons(bpf_ntohs(len) + offset);
+ ipv6h->payload_len = len_new;
+
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+
+ len = udph->len;
+ len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+ } else {
+ return NULL;
+ }
+
+ udph->len = len_new;
+
+ return udph;
+}
+
+static __u16 csum_fold_helper(__u32 csum)
+{
+ return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff;
+}
+
+static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset,
+ __u32 hdr_len)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ __u32 buff_pos, udp_csum = 0;
+ struct udphdr *udph = NULL;
+ __u32 buff_len;
+
+ udph = update_pkt(ctx, 0 - offset, &udp_csum);
+ if (!udph)
+ return -1;
+
+ buff_len = bpf_xdp_get_buff_len(ctx);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ /* Make sure we have enough data to avoid eating the header */
+ if (buff_len - offset < hdr_len)
+ return -1;
+
+ buff_pos = buff_len - offset;
+ if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ __u32 buff_pos, udp_csum = 0;
+ __u32 buff_len, hdr_len, key;
+ struct udphdr *udph;
+ __s32 *val;
+ __u8 tag;
+
+ /* Proceed to update the packet headers before attempting to adjuste
+ * the tail. Once the tail is adjusted we lose access to the offset
+ * amount of data at the end of the packet which is crucial to update
+ * the checksum.
+ * Since any failure beyond this would abort the packet, we should
+ * not worry about passing a packet up the stack with wrong headers
+ */
+ udph = update_pkt(ctx, offset, &udp_csum);
+ if (!udph)
+ return -1;
+
+ key = XDP_ADJST_TAG;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return -1;
+
+ tag = (__u8)(*val);
+
+ for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+ __builtin_memcpy(&tmp_buff[i], &tag, 1);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ buff_len = bpf_xdp_get_buff_len(ctx);
+
+ if (bpf_xdp_adjust_tail(ctx, offset) < 0) {
+ bpf_printk("Failed to adjust tail\n");
+ return -1;
+ }
+
+ if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port)
+{
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ __s32 *adjust_offset, *val;
+ __u32 key, hdr_len;
+ void *offset_ptr;
+ __u8 tag;
+ int ret;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ hdr_len = (void *)udph - data + sizeof(struct udphdr);
+ key = XDP_ADJST_OFFSET;
+ adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!adjust_offset)
+ return XDP_PASS;
+
+ if (*adjust_offset < 0)
+ ret = xdp_adjst_tail_shrnk_data(ctx,
+ (__u16)(0 - *adjust_offset),
+ hdr_len);
+ else
+ ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset));
+ if (ret)
+ goto abort_pkt;
+
+ record_stats(ctx, STATS_PASS);
+ return XDP_PASS;
+
+abort_pkt:
+ record_stats(ctx, STATS_ABORT);
+ return XDP_ABORTED;
+}
+
+static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len,
+ __u32 offset)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ struct udphdr *udph;
+ void *offset_ptr;
+ __u32 udp_csum = 0;
+
+ /* Update the length information in the IP and UDP headers before
+ * adjusting the headroom. This simplifies accessing the relevant
+ * fields in the IP and UDP headers for fragmented packets. Any
+ * failure beyond this point will result in the packet being aborted,
+ * so we don't need to worry about incorrect length information for
+ * passed packets.
+ */
+ udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum);
+ if (!udph)
+ return -1;
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0)
+ return -1;
+
+ if (bpf_xdp_adjust_head(ctx, offset) < 0)
+ return -1;
+
+ if (offset > MAX_ADJST_OFFSET)
+ return -1;
+
+ if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+ return -1;
+
+ /* Added here to handle clang complain about negative value */
+ hdr_len = hdr_len & 0xff;
+
+ if (hdr_len == 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len,
+ __u32 offset)
+{
+ char hdr_buff[MAX_HDR_LEN];
+ char data_buff[MAX_ADJST_OFFSET];
+ void *offset_ptr;
+ __s32 *val;
+ __u32 key;
+ __u8 tag;
+ __u32 udp_csum = 0;
+ struct udphdr *udph;
+
+ udph = update_pkt(ctx, (__s16)(offset), &udp_csum);
+ if (!udph)
+ return -1;
+
+ key = XDP_ADJST_TAG;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return -1;
+
+ tag = (__u8)(*val);
+ for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+ __builtin_memcpy(&data_buff[i], &tag, 1);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+ return -1;
+
+ /* Added here to handle clang complain about negative value */
+ hdr_len = hdr_len & 0xff;
+
+ if (hdr_len == 0)
+ return -1;
+
+ if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+ return -1;
+
+ if (offset > MAX_ADJST_OFFSET)
+ return -1;
+
+ if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_head_adjst(struct xdp_md *ctx, __u16 port)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph_ptr = NULL;
+ __u32 key, size, hdr_len;
+ __s32 *val;
+ int res;
+
+ /* Filter packets based on UDP port */
+ udph_ptr = filter_udphdr(ctx, port);
+ if (!udph_ptr)
+ return XDP_PASS;
+
+ hdr_len = (void *)udph_ptr - data + sizeof(struct udphdr);
+
+ key = XDP_ADJST_OFFSET;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return XDP_PASS;
+
+ switch (*val) {
+ case -16:
+ case 16:
+ size = 16;
+ break;
+ case -32:
+ case 32:
+ size = 32;
+ break;
+ case -64:
+ case 64:
+ size = 64;
+ break;
+ case -128:
+ case 128:
+ size = 128;
+ break;
+ case -256:
+ case 256:
+ size = 256;
+ break;
+ default:
+ bpf_printk("Invalid adjustment offset: %d\n", *val);
+ goto abort;
+ }
+
+ if (*val < 0)
+ res = xdp_adjst_head_grow_data(ctx, hdr_len, size);
+ else
+ res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size);
+
+ if (res)
+ goto abort;
+
+ record_stats(ctx, STATS_PASS);
+ return XDP_PASS;
+
+abort:
+ record_stats(ctx, STATS_ABORT);
+ return XDP_ABORTED;
+}
+
+static int xdp_prog_common(struct xdp_md *ctx)
+{
+ __u32 key, *port;
+ __s32 *mode;
+
+ key = XDP_MODE;
+ mode = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!mode)
+ return XDP_PASS;
+
+ key = XDP_PORT;
+ port = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!port)
+ return XDP_PASS;
+
+ switch (*mode) {
+ case XDP_MODE_PASS:
+ return xdp_mode_pass(ctx, (__u16)(*port));
+ case XDP_MODE_DROP:
+ return xdp_mode_drop_handler(ctx, (__u16)(*port));
+ case XDP_MODE_TX:
+ return xdp_mode_tx_handler(ctx, (__u16)(*port));
+ case XDP_MODE_TAIL_ADJST:
+ return xdp_adjst_tail(ctx, (__u16)(*port));
+ case XDP_MODE_HEAD_ADJST:
+ return xdp_head_adjst(ctx, (__u16)(*port));
+ }
+
+ /* Default action is to simple pass */
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_prog(struct xdp_md *ctx)
+{
+ return xdp_prog_common(ctx);
+}
+
+SEC("xdp.frags")
+int xdp_prog_frags(struct xdp_md *ctx)
+{
+ return xdp_prog_common(ctx);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 4f80014cae49..968d440c03fe 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -13,6 +13,7 @@ CONFIG_NETFILTER_NETLINK=m
CONFIG_NF_TABLES=m
CONFIG_NFT_COMPAT=m
CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
@@ -25,6 +26,7 @@ CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IP6_NF_FILTER=m
CONFIG_NET_ACT_CSUM=m
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 7ea5fb28c93d..1d5d3c4e7e87 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -77,6 +77,7 @@
static int cfg_cork;
static bool cfg_cork_mixed;
static int cfg_cpu = -1; /* default: pin to last cpu */
+static int cfg_expect_zerocopy = -1;
static int cfg_family = PF_UNSPEC;
static int cfg_ifindex = 1;
static int cfg_payload_len;
@@ -92,9 +93,9 @@ static socklen_t cfg_alen;
static struct sockaddr_storage cfg_dst_addr;
static struct sockaddr_storage cfg_src_addr;
+static int exitcode;
static char payload[IP_MAXPACKET];
static long packets, bytes, completions, expected_completions;
-static int zerocopied = -1;
static uint32_t next_completion;
static uint32_t sends_since_notify;
@@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain)
next_completion = hi + 1;
zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
- if (zerocopied == -1)
- zerocopied = zerocopy;
- else if (zerocopied != zerocopy) {
- fprintf(stderr, "serr: inconsistent\n");
- zerocopied = zerocopy;
+ if (cfg_expect_zerocopy != -1 &&
+ cfg_expect_zerocopy != zerocopy) {
+ fprintf(stderr, "serr: ee_code: %u != expected %u\n",
+ zerocopy, cfg_expect_zerocopy);
+ exitcode = 1;
+ /* suppress repeated messages */
+ cfg_expect_zerocopy = zerocopy;
}
if (cfg_verbose >= 2)
@@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol)
fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
packets, bytes >> 20, completions,
- zerocopied == 1 ? 'y' : 'n');
+ cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n');
}
static int do_setup_rx(int domain, int type, int protocol)
@@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv)
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
+ while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv)
case 'z':
cfg_zerocopy = true;
break;
+ case 'Z':
+ cfg_expect_zerocopy = !!atoi(optarg);
+ break;
}
}
@@ -817,5 +823,5 @@ int main(int argc, char **argv)
else
error(1, 0, "unknown cfg_test %s", cfg_test);
- return 0;
+ return exitcode;
}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index 89c22f5320e0..28178a38a4e7 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -6,6 +6,7 @@
set -e
readonly DEV="veth0"
+readonly DUMMY_DEV="dummy0"
readonly DEV_MTU=65535
readonly BIN="./msg_zerocopy"
@@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}"
readonly NS1="${NSPREFIX}1"
readonly NS2="${NSPREFIX}2"
-readonly SADDR4='192.168.1.1'
-readonly DADDR4='192.168.1.2'
-readonly SADDR6='fd::1'
-readonly DADDR6='fd::2'
+readonly LPREFIX4='192.168.1'
+readonly RPREFIX4='192.168.2'
+readonly LPREFIX6='fd'
+readonly RPREFIX6='fc'
+
readonly path_sysctl_mem="net.core.optmem_max"
# No arguments: automated test
if [[ "$#" -eq "0" ]]; then
- $0 4 tcp -t 1
- $0 6 tcp -t 1
- $0 4 udp -t 1
- $0 6 udp -t 1
- echo "OK. All tests passed"
- exit 0
+ ret=0
+
+ $0 4 tcp -t 1 || ret=1
+ $0 6 tcp -t 1 || ret=1
+ $0 4 udp -t 1 || ret=1
+ $0 6 udp -t 1 || ret=1
+
+ [[ "$ret" == "0" ]] && echo "OK. All tests passed"
+ exit $ret
fi
# Argument parsing
@@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@"
# Argument parsing: configure addresses
if [[ "${IP}" == "4" ]]; then
- readonly SADDR="${SADDR4}"
- readonly DADDR="${DADDR4}"
+ readonly SADDR="${LPREFIX4}.1"
+ readonly DADDR="${LPREFIX4}.2"
+ readonly DUMMY_ADDR="${RPREFIX4}.1"
+ readonly DADDR_TXONLY="${RPREFIX4}.2"
+ readonly MASK="24"
elif [[ "${IP}" == "6" ]]; then
- readonly SADDR="${SADDR6}"
- readonly DADDR="${DADDR6}"
+ readonly SADDR="${LPREFIX6}::1"
+ readonly DADDR="${LPREFIX6}::2"
+ readonly DUMMY_ADDR="${RPREFIX6}::1"
+ readonly DADDR_TXONLY="${RPREFIX6}::2"
+ readonly MASK="64"
+ readonly NODAD="nodad"
else
echo "Invalid IP version ${IP}"
exit 1
@@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy
+
# Bring the devices up
ip -netns "${NS1}" link set "${DEV}" up
ip -netns "${NS2}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DUMMY_DEV}" up
# Set fixed MAC addresses on the devices
ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
# Add fixed IP addresses to the devices
-ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
-ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
-ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
-ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD}
+
+ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}"
+ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}"
+
+ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1
+ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1
# Optionally disable sg or csum offload to test edge cases
# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+ret=0
+
do_test() {
local readonly ARGS="$1"
- echo "ipv${IP} ${TXMODE} ${ARGS}"
- ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+ # tx-rx test
+ # packets queued to a local socket are copied,
+ # sender notification has SO_EE_CODE_ZEROCOPY_COPIED.
+
+ echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n"
+ ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \
+ -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
sleep 0.2
- ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+ -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1
wait
+
+ # next test is unconnected tx to dummy0, cannot exercise with tcp
+ [[ "${TXMODE}" == "tcp" ]] && return
+
+ # tx-only test: send out dummy0
+ # packets leaving the host are not copied,
+ # sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED.
+
+ echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n"
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+ -S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1
}
do_test "${EXTRA_ARGS}"
do_test "-z ${EXTRA_ARGS}"
-echo ok
+
+[[ "$ret" == "0" ]] && echo "OK"
diff --git a/tools/testing/selftests/net/netdev-l2addr.sh b/tools/testing/selftests/net/netdev-l2addr.sh
new file mode 100755
index 000000000000..18509da293e5
--- /dev/null
+++ b/tools/testing/selftests/net/netdev-l2addr.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+set -o pipefail
+
+NSIM_ADDR=2025
+TEST_ADDR="d0:be:d0:be:d0:00"
+
+RET_CODE=0
+
+cleanup() {
+ cleanup_netdevsim "$NSIM_ADDR"
+ cleanup_ns "$NS"
+}
+
+trap cleanup EXIT
+
+fail() {
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ RET_CODE=1
+}
+
+get_addr()
+{
+ local type="$1"
+ local dev="$2"
+ local ns="$3"
+
+ ip -j -n "$ns" link show dev "$dev" | jq -er ".[0].$type"
+}
+
+setup_ns NS
+
+nsim=$(create_netdevsim $NSIM_ADDR "$NS")
+
+get_addr address "$nsim" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim" "$NS" >/dev/null || fail "Couldn't get brd addr"
+get_addr permaddr "$nsim" "$NS" >/dev/null && fail "Found perm_addr without setting it"
+
+ip -n "$NS" link set dev "$nsim" address "$TEST_ADDR"
+ip -n "$NS" link set dev "$nsim" brd "$TEST_ADDR"
+
+[[ "$(get_addr address "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set ether addr"
+[[ "$(get_addr broadcast "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set brd addr"
+
+if create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "FF:FF:FF:FF:FF:FF" 2>/dev/null; then
+ fail "Created netdevsim with broadcast permaddr"
+fi
+
+nsim_port=$(create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "$TEST_ADDR")
+
+get_addr address "$nsim_port" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim_port" "$NS" >/dev/null || fail "Couldn't get brd addr"
+[[ "$(get_addr permaddr "$nsim_port" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't get permaddr"
+
+cleanup_netdevsim "$NSIM_ADDR" "$NS"
+
+exit $RET_CODE
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 363646f4fefe..79d5b33966ba 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -1,6 +1,8 @@
CONFIG_AUDIT=y
CONFIG_BPF_SYSCALL=y
CONFIG_BRIDGE=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m
CONFIG_BRIDGE_EBT_BROUTE=m
CONFIG_BRIDGE_EBT_IP=m
CONFIG_BRIDGE_EBT_REDIRECT=m
@@ -14,7 +16,10 @@ CONFIG_INET_ESP=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP_NF_RAW=m
@@ -92,4 +97,4 @@ CONFIG_XFRM_STATISTICS=y
CONFIG_NET_PKTGEN=m
CONFIG_TUN=m
CONFIG_INET_DIAG=m
-CONFIG_SCTP_DIAG=m
+CONFIG_INET_SCTP_DIAG=m
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
index 6af2ea3ad6b8..9c9d5b38ab71 100755
--- a/tools/testing/selftests/net/netfilter/ipvs.sh
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -151,7 +151,7 @@ test_nat() {
test_tun() {
ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
- ip netns exec "${ns1}" modprobe -q ipip
+ modprobe -q ipip
ip netns exec "${ns1}" ip link set tunl0 up
ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0
ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0
@@ -160,10 +160,10 @@ test_tun() {
ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port}
ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1
- ip netns exec "${ns2}" modprobe -q ipip
ip netns exec "${ns2}" ip link set tunl0 up
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service
diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
index 5ff7be9daeee..c0fffaa6dbd9 100755
--- a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
+++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
@@ -10,6 +10,8 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
checktool "iperf3 --version" "run test without iperf3 tool"
+read kernel_tainted < /proc/sys/kernel/tainted
+
# how many seconds to torture the kernel?
# default to 80% of max run time but don't exceed 48s
TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10))
@@ -135,7 +137,8 @@ else
wait
fi
-[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || {
+
+[[ $kernel_tainted -eq 0 && $(</proc/sys/kernel/tainted) -ne 0 ]] && {
echo "FAIL: Kernel is tainted!"
exit $ksft_fail
}
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index cd8a58097448..1f5227f3d64d 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len)
name[0] = '\0';
rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
if (rc < 0)
- log_err_errno("setsockopt(SO_BINDTODEVICE)");
+ log_err_errno("getsockopt(SO_BINDTODEVICE)");
return rc;
}
@@ -535,7 +535,7 @@ static int set_freebind(int sd, int version)
break;
case AF_INET6:
if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) {
- log_err_errno("setsockopt(IPV6_FREEBIND");
+ log_err_errno("setsockopt(IPV6_FREEBIND)");
rc = -1;
}
break;
@@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str,
sep++;
if (str_to_uint(sep, 1, pfx_len_max,
&args->prefix_len) != 0) {
- fprintf(stderr, "Invalid port\n");
+ fprintf(stderr, "Invalid prefix length\n");
return 1;
}
} else {
@@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen,
}
}
- nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
+ nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
while (1) {
FD_ZERO(&rfds);
FD_SET(sd, &rfds);
@@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args)
sd = socket(args->version, args->type, args->protocol);
if (sd < 0) {
log_err_errno("Error opening socket");
- return -1;
+ return -1;
}
if (set_reuseaddr(sd) != 0)
@@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
* waiting to be told when to continue
*/
if (read(fd, &buf, sizeof(buf)) <= 0) {
- log_err_errno("Failed to read IPC status from status");
+ log_err_errno("Failed to read IPC status from pipe");
return 1;
}
if (!buf) {
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index beaee5e4e2aa..5c66421ab8aa 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -2,8 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
import time
+from os import system
from lib.py import ksft_run, ksft_exit, ksft_pr
-from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
+from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait
from lib.py import NetdevFamily, NetdevSimDev, ip
@@ -34,6 +35,128 @@ def napi_list_check(nf) -> None:
ksft_eq(len(napis), 100,
comment=f"queue count after reset queue {q} mode {i}")
+def napi_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at napi and device level.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set napi threaded and verify
+ nf.napi_set({'id': napi0_id, 'threaded': "enabled"})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # check it is not set for napi1
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ ip(f"link set dev {nsim.ifname} down")
+ ip(f"link set dev {nsim.ifname} up")
+
+ # verify if napi threaded is still set
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # check it is still not set for napi1
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ # unset napi threaded and verify
+ nf.napi_set({'id': napi0_id, 'threaded': "disabled"})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+
+ # set threaded at device level
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "enabled")
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded at device level
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ # set napi threaded for napi0
+ nf.napi_set({'id': napi0_id, 'threaded': 1})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # unset threaded at device level
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+def dev_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at device level using sysfs.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set threaded
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "enabled")
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
def nsim_rxq_reset_down(nf) -> None:
"""
@@ -122,7 +245,7 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
- nsim_rxq_reset_down],
+ dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down],
args=(nf, ))
ksft_exit()
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index ef8b25a606d8..c5b01e1bd4c7 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -39,11 +39,15 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
# xfail tests that are known flaky with dbg config, not fixable.
# still run them for coverage (and expect 100% pass without dbg).
declare -ar xfail_list=(
+ "tcp_blocking_blocking-connect.pkt"
+ "tcp_blocking_blocking-read.pkt"
"tcp_eor_no-coalesce-retrans.pkt"
"tcp_fast_recovery_prr-ss.*.pkt"
+ "tcp_sack_sack-route-refresh-ip-tos.pkt"
"tcp_slow_start_slow-start-after-win-update.pkt"
"tcp_timestamping.*.pkt"
"tcp_user_timeout_user-timeout-probe.pkt"
+ "tcp_zerocopy_cl.*.pkt"
"tcp_zerocopy_epoll_.*.pkt"
"tcp_tcp_info_tcp-info-.*-limited.pkt"
)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
index 914eabab367a..657e42ca65b5 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test for blocking read.
+
--tolerance_usecs=10000
+--mss=1000
`./defaults.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
new file mode 100644
index 000000000000..c790d0af635e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test various DSACK (RFC 2883) behaviors.
+
+--mss=1000
+
+`./defaults.sh`
+
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+// First SACK range.
+ +0 < P. 1001:2001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop, nop, sack 1001:2001>
+
+// Check SACK coalescing (contiguous sequence).
+ +0 < P. 2001:3001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 1001:3001>
+
+// Check we have two SACK ranges for non contiguous sequences.
+ +0 < P. 4001:5001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 4001:5001 1001:3001>
+
+// Three ranges.
+ +0 < P. 7001:8001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 7001:8001 4001:5001 1001:3001>
+
+// DSACK (1001:3001) + SACK (6001:7001)
+ +0 < P. 1:6001(6000) ack 1 win 1024
+ +0 > . 1:1(0) ack 6001 <nop,nop,sack 1001:3001 7001:8001>
+
+// DSACK (7001:8001)
+ +0 < P. 6001:8001(2000) ack 1 win 1024
+ +0 > . 1:1(0) ack 8001 <nop,nop,sack 7001:8001>
+
+// DSACK for an older segment.
+ +0 < P. 1:1001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 8001 <nop,nop,sack 1:1001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
index df49c67645ac..e13f0eee9795 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test TCP_INQ and TCP_CM_INQ on the client side.
+
+--mss=1000
+
`./defaults.sh
`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
index 04a5e2590c62..14dd5f813d50 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test TCP_INQ and TCP_CM_INQ on the server side.
+
+--mss=1000
+
`./defaults.sh
`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
new file mode 100644
index 000000000000..7e6bc5fb0c8d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
+
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 2001:11001(9000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001>
+
+// check that ooo packet properly updates tcpi_rcv_mss
+ +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }%
+
+ +0 < . 11001:21001(10000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001>
+
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
new file mode 100644
index 000000000000..3848b419e68c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:4001(4000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +0 < P. 4001:54001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +1 < P. 5001:55001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +0 < P. 70001:80001(10000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+ +0 read(4, ..., 100000) = 4000
+
+// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd
+ +0 < P. 4001:54001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 54001 win 0
+
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times.
++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
new file mode 100644
index 000000000000..f575c0ff89da
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:20001(20000) ack 1 win 257
+ +.04 > . 1:1(0) ack 20001 win 18000
+
+ +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 20001 win 18000
+
+ +0 read(4, ..., 20000) = 20000
+// A too big packet is accepted if the receive queue is empty
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 80001 win 0
+
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 2e8243a65b50..d6c00efeb664 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -21,6 +21,7 @@ ALL_TESTS="
kci_test_vrf
kci_test_encap
kci_test_macsec
+ kci_test_macsec_vlan
kci_test_ipsec
kci_test_ipsec_offload
kci_test_fdb_get
@@ -30,6 +31,7 @@ ALL_TESTS="
kci_test_address_proto
kci_test_enslave_bonding
kci_test_mngtmpaddr
+ kci_test_operstate
"
devdummy="test-dummy0"
@@ -291,6 +293,17 @@ kci_test_route_get()
end_test "PASS: route get"
}
+check_addr_not_exist()
+{
+ dev=$1
+ addr=$2
+ if ip addr show dev $dev | grep -q $addr; then
+ return 1
+ else
+ return 0
+ fi
+}
+
kci_test_addrlft()
{
for i in $(seq 10 100) ;do
@@ -298,9 +311,8 @@ kci_test_addrlft()
run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
done
- sleep 5
- run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy"
- if [ $? -eq 0 ]; then
+ slowwait 5 check_addr_not_exist "$devdummy" "10.23.11."
+ if [ $? -eq 1 ]; then
check_err 1
end_test "FAIL: preferred_lft addresses remaining"
return
@@ -561,6 +573,41 @@ kci_test_macsec()
end_test "PASS: macsec"
}
+# Test __dev_set_rx_mode call from dev_uc_add under addr_list_lock spinlock.
+# Make sure __dev_set_promiscuity is not grabbing (sleeping) netdev instance
+# lock.
+# https://lore.kernel.org/netdev/2aff4342b0f5b1539c02ffd8df4c7e58dd9746e7.camel@nvidia.com/
+kci_test_macsec_vlan()
+{
+ msname="test_macsec1"
+ vlanname="test_vlan1"
+ local ret=0
+ run_cmd_grep "^Usage: ip macsec" ip macsec help
+ if [ $? -ne 0 ]; then
+ end_test "SKIP: macsec: iproute2 too old"
+ return $ksft_skip
+ fi
+ run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: can't add macsec interface, skipping test"
+ return 1
+ fi
+
+ run_cmd ip link set dev "$msname" up
+ ip link add link "$msname" name "$vlanname" type vlan id 1
+ ip link set dev "$vlanname" address 00:11:22:33:44:88
+ ip link set dev "$vlanname" up
+ run_cmd ip link del dev "$vlanname"
+ run_cmd ip link del dev "$msname"
+
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: macsec_vlan"
+ return 1
+ fi
+
+ end_test "PASS: macsec_vlan"
+}
+
#-------------------------------------------------------------------
# Example commands
# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
@@ -673,6 +720,11 @@ kci_test_ipsec_offload()
sysfsf=$sysfsd/ipsec
sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
probed=false
+ esp4_offload_probed_default=false
+
+ if lsmod | grep -q esp4_offload; then
+ esp4_offload_probed_default=true
+ fi
if ! mount | grep -q debugfs; then
mount -t debugfs none /sys/kernel/debug/ &> /dev/null
@@ -766,6 +818,7 @@ EOF
fi
# clean up any leftovers
+ ! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload
echo 0 > /sys/bus/netdevsim/del_device
$probed && rmmod netdevsim
@@ -1334,6 +1387,39 @@ kci_test_mngtmpaddr()
return $ret
}
+kci_test_operstate()
+{
+ local ret=0
+
+ # Check that it is possible to set operational state during device
+ # creation and that it is preserved when the administrative state of
+ # the device is toggled.
+ run_cmd ip link add name vx0 up state up type vxlan id 10010 dstport 4789
+ run_cmd_grep "state UP" ip link show dev vx0
+ run_cmd ip link set dev vx0 down
+ run_cmd_grep "state DOWN" ip link show dev vx0
+ run_cmd ip link set dev vx0 up
+ run_cmd_grep "state UP" ip link show dev vx0
+
+ run_cmd ip link del dev vx0
+
+ # Check that it is possible to set the operational state of the device
+ # after creation.
+ run_cmd ip link add name vx0 up type vxlan id 10010 dstport 4789
+ run_cmd_grep "state UNKNOWN" ip link show dev vx0
+ run_cmd ip link set dev vx0 state up
+ run_cmd_grep "state UP" ip link show dev vx0
+
+ run_cmd ip link del dev vx0
+
+ if [ "$ret" -ne 0 ]; then
+ end_test "FAIL: operstate"
+ return 1
+ fi
+
+ end_test "PASS: operstate"
+}
+
kci_test_rtnl()
{
local current_test
diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh
new file mode 100755
index 000000000000..3f9780232bd6
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink_notification.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking rtnetlink notification callpaths, and get as much
+# coverage as possible.
+#
+# set -e
+
+ALL_TESTS="
+ kci_test_mcast_addr_notification
+ kci_test_anycast_addr_notification
+"
+
+source lib.sh
+test_dev="test-dummy1"
+
+kci_test_mcast_addr_notification()
+{
+ RET=0
+ local tmpfile
+ local monitor_pid
+ local match_result
+
+ tmpfile=$(mktemp)
+ defer rm "$tmpfile"
+
+ ip monitor maddr > $tmpfile &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+
+ sleep 1
+
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "mcast addr notification: iproute2 too old"
+ return $RET
+ fi
+
+ ip link add name "$test_dev" type dummy
+ check_err $? "failed to add dummy interface"
+ ip link set "$test_dev" up
+ check_err $? "failed to set dummy interface up"
+ ip link del dev "$test_dev"
+ check_err $? "Failed to delete dummy interface"
+ sleep 1
+
+ # There should be 4 line matches as follows.
+ # 13: test-dummy1    inet6 mcast ff02::1 scope global 
+ # 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+ # Deleted 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+ # Deleted 13: test-dummy1    inet6 mcast ff02::1 scope global 
+ match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile")
+ if [ "$match_result" -ne 4 ]; then
+ RET=$ksft_fail
+ fi
+ log_test "mcast addr notification: Expected 4 matches, got $match_result"
+ return $RET
+}
+
+kci_test_anycast_addr_notification()
+{
+ RET=0
+ local tmpfile
+ local monitor_pid
+ local match_result
+
+ tmpfile=$(mktemp)
+ defer rm "$tmpfile"
+
+ ip monitor acaddress > "$tmpfile" &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+ sleep 1
+
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "anycast addr notification: iproute2 too old"
+ return "$RET"
+ fi
+
+ ip link add name "$test_dev" type dummy
+ check_err $? "failed to add dummy interface"
+ ip link set "$test_dev" up
+ check_err $? "failed to set dummy interface up"
+ sysctl -qw net.ipv6.conf."$test_dev".forwarding=1
+ ip link del dev "$test_dev"
+ check_err $? "Failed to delete dummy interface"
+ sleep 1
+
+ # There should be 2 line matches as follows.
+ # 9: dummy2 inet6 any fe80:: scope global
+ # Deleted 9: dummy2 inet6 any fe80:: scope global
+ match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile")
+ if [ "$match_result" -ne 2 ]; then
+ RET=$ksft_fail
+ fi
+ log_test "anycast addr notification: Expected 2 matches, got $match_result"
+ return "$RET"
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+ RET=$ksft_skip
+ log_test "need root privileges"
+ exit $RET
+fi
+
+require_command ip
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
index ba730655a7bf..4bc135e5c22c 100755
--- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -594,7 +594,7 @@ setup_rt_local_sids()
dev "${DUMMY_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
index 4b86040c58c6..34b781a2ae74 100755
--- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -72,6 +72,9 @@
# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
# the selftest network.
#
+# In addition, every router interface connecting rt-x to rt-y is assigned an
+# IPv6 link-local address fe80::x:y/64.
+#
# Local SID/C-SID table
# =====================
#
@@ -521,6 +524,9 @@ setup_rt_networking()
ip -netns "${nsname}" addr \
add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+ ip -netns "${nsname}" addr \
+ add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad
+
ip -netns "${nsname}" link set "${devname}" up
done
@@ -609,6 +615,27 @@ set_end_x_nextcsid()
nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
}
+set_end_x_ll_nextcsid()
+{
+ local rt="$1"
+ local adj="$2"
+
+ eval nsname=\${$(get_rtname "${rt}")}
+ lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+ nh6_ll_addr="fe80::${adj}:${rt}"
+ oifname="veth-rt-${rt}-${adj}"
+
+ # enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop
+ # address (note that "dev" is the dummy dum0 device chosen for the sake
+ # of simplicity).
+ ip -netns "${nsname}" -6 route \
+ replace "${lcnode_func_prefix}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.X nh6 "${nh6_ll_addr}" \
+ oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \
+ nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
+}
+
set_underlay_sids_reachability()
{
local rt="$1"
@@ -654,7 +681,7 @@ setup_rt_local_sids()
set_underlay_sids_reachability "${rt}" "${rt_neighs}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -1016,6 +1043,27 @@ host_vpn_tests()
check_and_log_hs_ipv4_connectivity 1 2
check_and_log_hs_ipv4_connectivity 2 1
+
+ # Setup the adjacencies in the SRv6 aware routers using IPv6 link-local
+ # addresses.
+ # - rt-3 SRv6 End.X adjacency with rt-4
+ # - rt-4 SRv6 End.X adjacency with rt-1
+ set_end_x_ll_nextcsid 3 4
+ set_end_x_ll_nextcsid 4 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local"
+
+ check_and_log_hs_ipv6_connectivity 1 2
+ check_and_log_hs_ipv6_connectivity 2 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local"
+
+ check_and_log_hs_ipv4_connectivity 1 2
+ check_and_log_hs_ipv4_connectivity 2 1
+
+ # Restore the previous adjacencies.
+ set_end_x_nextcsid 3 4
+ set_end_x_nextcsid 4 1
}
__nextcsid_end_x_behavior_test()
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
index 3efce1718c5f..6a68c7eff1dc 100755
--- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -395,7 +395,7 @@ setup_rt_local_sids()
dev "${VRF_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
index cabc70538ffe..0979b5316fdf 100755
--- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -343,7 +343,7 @@ setup_rt_local_sids()
encap seg6local action End dev "${DUMMY_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behaviors instaces are grouped together in the 'localsid'
+ # Endpoint behaviors instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule add \
to "${VPN_LOCATOR_SERVICE}::/16" \
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
index f00245263b20..6478da6a71c3 100644
--- a/tools/testing/selftests/net/tcp_ao/seq-ext.c
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Check that after SEQ number wrap-around:
* 1. SEQ-extension has upper bytes set
- * 2. TCP conneciton is alive and no TCPAOBad segments
+ * 2. TCP connection is alive and no TCPAOBad segments
* In order to test (2), the test doesn't just adjust seq number for a queue
* on a connected socket, but migrates it to another sk+port number, so
* that there won't be any delayed packets that will fail to verify
diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh
new file mode 100755
index 000000000000..388056472b5b
--- /dev/null
+++ b/tools/testing/selftests/net/test_neigh.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+ extern_valid_ipv4
+ extern_valid_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ stderr=
+ fi
+
+ out=$(eval "$cmd" "$stderr")
+ rc=$?
+ if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+################################################################################
+# Setup
+
+setup()
+{
+ set -e
+
+ setup_ns ns1 ns2
+
+ ip -n "$ns1" link add veth0 type veth peer name veth1 netns "$ns2"
+ ip -n "$ns1" link set dev veth0 up
+ ip -n "$ns2" link set dev veth1 up
+
+ ip -n "$ns1" address add 192.0.2.1/24 dev veth0
+ ip -n "$ns1" address add 2001:db8:1::1/64 dev veth0 nodad
+ ip -n "$ns2" address add 192.0.2.2/24 dev veth1
+ ip -n "$ns2" address add 2001:db8:1::2/64 dev veth1 nodad
+
+ ip netns exec "$ns1" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec "$ns2" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+
+ sleep 5
+
+ set +e
+}
+
+exit_cleanup_all()
+{
+ cleanup_all_ns
+ exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+extern_valid_common()
+{
+ local af_str=$1; shift
+ local ip_addr=$1; shift
+ local tbl_name=$1; shift
+ local subnet=$1; shift
+ local mac
+
+ mac=$(ip -n "$ns2" -j link show dev veth1 | jq -r '.[]["address"]')
+
+ RET=0
+
+ # Check that simple addition works.
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "No \"extern_valid\" flag after addition"
+
+ log_test "$af_str \"extern_valid\" flag: Add entry"
+
+ RET=0
+
+ # Check that an entry cannot be added with "extern_valid" flag and an
+ # invalid state.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr nud none dev veth0 extern_valid"
+ check_fail $? "Managed to add an entry with \"extern_valid\" flag and an invalid state"
+
+ log_test "$af_str \"extern_valid\" flag: Add with an invalid state"
+
+ RET=0
+
+ # Check that entry cannot be added with both "extern_valid" flag and
+ # "use" / "managed" flag.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ check_fail $? "Managed to add an entry with \"extern_valid\" flag and \"use\" flag"
+
+ log_test "$af_str \"extern_valid\" flag: Add with \"use\" flag"
+
+ RET=0
+
+ # Check that "extern_valid" flag can be toggled using replace.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Did not manage to set \"extern_valid\" flag with replace"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_fail $? "Did not manage to clear \"extern_valid\" flag with replace"
+
+ log_test "$af_str \"extern_valid\" flag: Replace entry"
+
+ RET=0
+
+ # Check that an existing "extern_valid" entry can be marked as
+ # "managed".
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid managed"
+ check_err $? "Did not manage to add \"managed\" flag to an existing \"extern_valid\" entry"
+
+ log_test "$af_str \"extern_valid\" flag: Replace entry with \"managed\" flag"
+
+ RET=0
+
+ # Check that entry cannot be replaced with "extern_valid" flag and an
+ # invalid state.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr nud none dev veth0 extern_valid"
+ check_fail $? "Managed to replace an entry with \"extern_valid\" flag and an invalid state"
+
+ log_test "$af_str \"extern_valid\" flag: Replace with an invalid state"
+
+ RET=0
+
+ # Check that an "extern_valid" entry is flushed when the interface is
+ # put administratively down.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 link set dev veth0 down"
+ run_cmd "ip -n $ns1 link set dev veth0 up"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+ check_fail $? "\"extern_valid\" entry not flushed upon interface down"
+
+ log_test "$af_str \"extern_valid\" flag: Interface down"
+
+ RET=0
+
+ # Check that an "extern_valid" entry is not flushed when the interface
+ # loses its carrier.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns2 link set dev veth1 down"
+ run_cmd "ip -n $ns2 link set dev veth1 up"
+ run_cmd "sleep 2"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+ check_err $? "\"extern_valid\" entry flushed upon carrier down"
+
+ log_test "$af_str \"extern_valid\" flag: Carrier down"
+
+ RET=0
+
+ # Check that when entry transitions to "reachable" state it maintains
+ # the "extern_valid" flag. Wait "delay_probe" seconds for ARP request /
+ # NS to be sent.
+ local delay_probe
+
+ delay_probe=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["delay_probe"]')
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ run_cmd "sleep $((delay_probe / 1000 + 2))"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"REACHABLE\""
+ check_err $? "Entry did not transition to \"reachable\" state"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry did not maintain \"extern_valid\" flag after transition to \"reachable\" state"
+
+ log_test "$af_str \"extern_valid\" flag: Transition to \"reachable\" state"
+
+ RET=0
+
+ # Drop all packets, trigger resolution and check that entry goes back
+ # to "stale" state instead of "failed".
+ local mcast_reprobes
+ local retrans_time
+ local ucast_probes
+ local app_probes
+ local probes
+ local delay
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "tc -n $ns2 qdisc add dev veth1 clsact"
+ run_cmd "tc -n $ns2 filter add dev veth1 ingress proto all matchall action drop"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ retrans_time=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["retrans"]')
+ ucast_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["ucast_probes"]')
+ app_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["app_probes"]')
+ mcast_reprobes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["mcast_reprobes"]')
+ delay=$((delay_probe + (ucast_probes + app_probes + mcast_reprobes) * retrans_time))
+ run_cmd "sleep $((delay / 1000 + 2))"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"STALE\""
+ check_err $? "Entry did not return to \"stale\" state"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry did not maintain \"extern_valid\" flag after returning to \"stale\" state"
+ probes=$(ip -n "$ns1" -j -s neigh get "$ip_addr" dev veth0 | jq '.[]["probes"]')
+ if [[ $probes -eq 0 ]]; then
+ check_err 1 "No probes were sent"
+ fi
+
+ log_test "$af_str \"extern_valid\" flag: Transition back to \"stale\" state"
+
+ run_cmd "tc -n $ns2 qdisc del dev veth1 clsact"
+
+ RET=0
+
+ # Forced garbage collection runs whenever the number of entries is
+ # larger than "thresh3" and deletes stale entries that have not been
+ # updated in the last 5 seconds.
+ #
+ # Check that an "extern_valid" entry survives a forced garbage
+ # collection. Add an entry, wait 5 seconds and add more entries than
+ # "thresh3" so that forced garbage collection will run.
+ #
+ # Note that the garbage collection thresholds are global resources and
+ # that changes in the initial namespace affect all the namespaces.
+ local forced_gc_runs_t0
+ local forced_gc_runs_t1
+ local orig_thresh1
+ local orig_thresh2
+ local orig_thresh3
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+ orig_thresh2=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh2")) | .["thresh2"]')
+ orig_thresh3=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh3")) | .["thresh3"]')
+ run_cmd "ip ntable change name $tbl_name thresh3 10 thresh2 9 thresh1 8"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+ run_cmd "sleep 5"
+ forced_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+ for i in {1..20}; do
+ run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+ done
+ forced_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+ if [[ $forced_gc_runs_t1 -eq $forced_gc_runs_t0 ]]; then
+ check_err 1 "Forced garbage collection did not run"
+ fi
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry with \"extern_valid\" flag did not survive forced garbage collection"
+ run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+ check_fail $? "Entry without \"extern_valid\" flag survived forced garbage collection"
+
+ log_test "$af_str \"extern_valid\" flag: Forced garbage collection"
+
+ run_cmd "ip ntable change name $tbl_name thresh3 $orig_thresh3 thresh2 $orig_thresh2 thresh1 $orig_thresh1"
+
+ RET=0
+
+ # Periodic garbage collection runs every "base_reachable"/2 seconds and
+ # if the number of entries is larger than "thresh1", then it deletes
+ # stale entries that have not been used in the last "gc_stale" seconds.
+ #
+ # Check that an "extern_valid" entry survives a periodic garbage
+ # collection. Add an "extern_valid" entry, add more than "thresh1"
+ # regular entries, wait "base_reachable" (longer than "gc_stale")
+ # seconds and check that the "extern_valid" entry was not deleted.
+ #
+ # Note that the garbage collection thresholds and "base_reachable" are
+ # global resources and that changes in the initial namespace affect all
+ # the namespaces.
+ local periodic_gc_runs_t0
+ local periodic_gc_runs_t1
+ local orig_base_reachable
+ local orig_gc_stale
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+ orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]')
+ run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000"
+ orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]')
+ run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 5000"
+ # Wait orig_base_reachable/2 for the new interval to take effect.
+ run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+ for i in {1..20}; do
+ run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+ done
+ periodic_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+ run_cmd "sleep 10"
+ periodic_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+ [[ $periodic_gc_runs_t1 -ne $periodic_gc_runs_t0 ]]
+ check_err $? "Periodic garbage collection did not run"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry with \"extern_valid\" flag did not survive periodic garbage collection"
+ run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+ check_fail $? "Entry without \"extern_valid\" flag survived periodic garbage collection"
+
+ log_test "$af_str \"extern_valid\" flag: Periodic garbage collection"
+
+ run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale $orig_gc_stale"
+ run_cmd "ip ntable change name $tbl_name thresh1 $orig_thresh1 base_reachable $orig_base_reachable"
+}
+
+extern_valid_ipv4()
+{
+ extern_valid_common "IPv4" 192.0.2.2 "arp_cache" 192.0.2.
+}
+
+extern_valid_ipv6()
+{
+ extern_valid_common "IPv6" 2001:db8:1::2 "ndisc_cache" 2001:db8:1::
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$((VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+require_command jq
+
+if ! ip neigh help 2>&1 | grep -q "extern_valid"; then
+ echo "SKIP: iproute2 ip too old, missing \"extern_valid\" support"
+ exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+ setup; $t; cleanup_all_ns;
+done
diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
index 6127a78ee988..8deacc565afa 100755
--- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -146,18 +146,17 @@ run_cmd()
}
check_hv_connectivity() {
- ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
- sleep 1
- ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
+ slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
+ slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
return $?
}
check_vm_connectivity() {
- run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
+ slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
- run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
+ slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
}
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index e9c2f71da207..ce34cb2e6e0b 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -275,7 +275,7 @@ setup_sym()
# Wait for ip config to settle
- sleep 2
+ slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
}
setup_asym()
@@ -370,7 +370,7 @@ setup_asym()
ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
# Wait for ip config to settle
- sleep 2
+ slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
}
check_connectivity()