225 files changed, 46571 insertions, 0 deletions
diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore
new file mode 100644
index 000000000000..3633c7a3ed65
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+gro
+napi_id_helper
+psp_responder
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
new file mode 100644
index 000000000000..f5c71d993750
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += $(KHDR_INCLUDES)
+
+TEST_INCLUDES := $(wildcard lib/py/*.py) \
+		 $(wildcard lib/sh/*.sh) \
+		 ../../net/lib.sh \
+
+TEST_GEN_FILES := \
+	gro \
+	napi_id_helper \
+# end of TEST_GEN_FILES
+
+TEST_PROGS := \
+	gro.py \
+	hds.py \
+	napi_id.py \
+	napi_threaded.py \
+	netcons_basic.sh \
+	netcons_cmdline.sh \
+	netcons_fragmented_msg.sh \
+	netcons_overflow.sh \
+	netcons_sysdata.sh \
+	netcons_torture.sh \
+	netpoll_basic.py \
+	ping.py \
+	psp.py \
+	queues.py \
+	ring_reconfig.py \
+	shaper.py \
+	stats.py \
+	xdp.py \
+# end of TEST_PROGS
+
+# YNL files, must be before "include ..lib.mk"
+YNL_GEN_FILES := psp_responder
+TEST_GEN_FILES += $(YNL_GEN_FILES)
+
+include ../../lib.mk
+
+# YNL build
+YNL_GENS := psp
+
+include ../../net/ynl.mk
diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst
new file mode 100644
index 000000000000..eb838ae94844
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/README.rst
@@ -0,0 +1,136 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Running driver tests
+====================
+
+Networking driver tests are executed within kselftest framework like any
+other tests. They support testing both real device drivers and emulated /
+software drivers (latter mostly to test the core parts of the stack).
+
+SW mode
+~~~~~~~
+
+By default, when no extra parameters are set or exported, tests execute
+against software drivers such as netdevsim. No extra preparation is required
+the software devices are created and destroyed as part of the test.
+In this mode the tests are indistinguishable from other selftests and
+(for example) can be run under ``virtme-ng`` like the core networking selftests.
+
+HW mode
+~~~~~~~
+
+Executing tests against a real device requires external preparation.
+The netdevice against which tests will be run must exist, be running
+(in UP state) and be configured with an IP address.
+
+Refer to list of :ref:`Variables` later in this file to set up running
+the tests against a real device.
+
+Both modes required
+~~~~~~~~~~~~~~~~~~~
+
+All tests in drivers/net must support running both against a software device
+and a real device. SW-only tests should instead be placed in net/ or
+drivers/net/netdevsim, HW-only tests in drivers/net/hw.
+
+Variables
+=========
+
+The variables can be set in the environment or by creating a net.config
+file in the same directory as this README file. Example::
+
+  $ NETIF=eth0 ./some_test.sh
+
+or::
+
+  $ cat tools/testing/selftests/drivers/net/net.config
+  # Variable set in a file
+  NETIF=eth0
+
+Local test (which don't require endpoint for sending / receiving traffic)
+need only the ``NETIF`` variable. Remaining variables define the endpoint
+and communication method.
+
+NETIF
+~~~~~
+
+Name of the netdevice against which the test should be executed.
+When empty or not set software devices will be used.
+
+LOCAL_V4, LOCAL_V6, REMOTE_V4, REMOTE_V6
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Local and remote endpoint IP addresses.
+
+REMOTE_TYPE
+~~~~~~~~~~~
+
+Communication method used to run commands on the remote endpoint.
+Test framework has built-in support for ``netns`` and ``ssh`` channels.
+``netns`` assumes the "remote" interface is part of the same
+host, just moved to the specified netns.
+``ssh`` communicates with remote endpoint over ``ssh`` and ``scp``.
+Using persistent SSH connections is strongly encouraged to avoid
+the latency of SSH connection setup on every command.
+
+Communication methods are defined by classes in ``lib/py/remote_{name}.py``.
+It should be possible to add a new method without modifying any of
+the framework, by simply adding an appropriately named file to ``lib/py``.
+
+REMOTE_ARGS
+~~~~~~~~~~~
+
+Arguments used to construct the communication channel.
+Communication channel dependent::
+
+  for netns - name of the "remote" namespace
+  for ssh - name/address of the remote host
+
+Example
+=======
+
+Build the selftests::
+
+  # make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw"
+
+"Install" the tests and copy them over to the target machine::
+
+  # make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw" \
+     install INSTALL_PATH=/tmp/ksft-net-drv
+
+  # rsync -ra --delete /tmp/ksft-net-drv root@192.168.1.1:/root/
+
+On the target machine, running the tests will use netdevsim by default::
+
+  [/root] # ./ksft-net-drv/run_kselftest.sh -t drivers/net:ping.py
+  TAP version 13
+  1..1
+  # timeout set to 45
+  # selftests: drivers/net: ping.py
+  # TAP version 13
+  # 1..3
+  # ok 1 ping.test_v4
+  # ok 2 ping.test_v6
+  # ok 3 ping.test_tcp
+  # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0
+  ok 1 selftests: drivers/net: ping.py
+
+Create a config with remote info::
+
+  [/root] # cat > ./ksft-net-drv/drivers/net/net.config <<EOF
+  NETIF=eth0
+  LOCAL_V4=192.168.1.1
+  REMOTE_V4=192.168.1.2
+  REMOTE_TYPE=ssh
+  REMOTE_ARGS=root@192.168.1.2
+  EOF
+
+Run the test::
+
+  [/root] # ./ksft-net-drv/drivers/net/ping.py
+  TAP version 13
+  1..3
+  ok 1 ping.test_v4
+  ok 2 ping.test_v6 # SKIP Test requires IPv6 connectivity
+  ok 3 ping.test_tcp
+  # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:1 error:0
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
new file mode 100644
index 000000000000..6c5c60adb5e8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for net selftests
+
+TEST_PROGS := \
+	bond-arp-interval-causes-panic.sh \
+	bond-break-lacpdu-tx.sh \
+	bond-eth-type-change.sh \
+	bond-lladdr-target.sh \
+	bond_ipsec_offload.sh \
+	bond_lacp_prio.sh \
+	bond_macvlan_ipvlan.sh \
+	bond_options.sh \
+	bond_passive_lacp.sh \
+	dev_addr_lists.sh \
+	mode-1-recovery-updelay.sh \
+	mode-2-recovery-updelay.sh \
+	netcons_over_bonding.sh \
+# end of TEST_PROGS
+
+TEST_FILES := \
+	bond_topo_2d1c.sh \
+	bond_topo_3d1c.sh \
+	lag_lib.sh \
+# end of TEST_FILES
+
+TEST_INCLUDES := \
+	../../../net/lib.sh \
+	../lib/sh/lib_netcons.sh \
+	../../../net/forwarding/lib.sh \
+# end of TEST_INCLUDES
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh
new file mode 100755
index 000000000000..5667febee328
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# cause kernel oops in bond_rr_gen_slave_id
+DEBUG=${DEBUG:-0}
+
+set -e
+test ${DEBUG} -ne 0 && set -x
+
+finish()
+{
+	ip netns delete server || true
+	ip netns delete client || true
+}
+
+trap finish EXIT
+
+client_ip4=192.168.1.198
+server_ip4=192.168.1.254
+
+# setup kernel so it reboots after causing the panic
+echo 180 >/proc/sys/kernel/panic
+
+# build namespaces
+ip netns add "server"
+ip netns add "client"
+ip -n client link add eth0 type veth peer name eth0 netns server
+ip netns exec server ip link set dev eth0 up
+ip netns exec server ip addr add ${server_ip4}/24 dev eth0
+
+ip netns exec client ip link add dev bond0 down type bond mode 1 \
+	miimon 100 all_slaves_active 1
+ip netns exec client ip link set dev eth0 master bond0
+ip netns exec client ip link set dev bond0 up
+ip netns exec client ip addr add ${client_ip4}/24 dev bond0
+ip netns exec client ping -c 5 $server_ip4 >/dev/null
+
+ip netns exec client ip link set dev eth0 nomaster
+ip netns exec client ip link set dev bond0 down
+ip netns exec client ip link set dev bond0 type bond mode 0 \
+	arp_interval 1000 arp_ip_target "+${server_ip4}"
+ip netns exec client ip link set dev eth0 master bond0
+ip netns exec client ip link set dev bond0 up
+ip netns exec client ping -c 5 $server_ip4 >/dev/null
+
+exit 0
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
new file mode 100755
index 000000000000..1ec7f59db7f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
@@ -0,0 +1,80 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+#   Verify LACPDUs get transmitted after setting the MAC address of
+#   the bond.
+#
+# https://bugzilla.redhat.com/show_bug.cgi?id=2020773
+#
+#       +---------+
+#       | fab-br0 |
+#       +---------+
+#            |
+#       +---------+
+#       |  fbond  |
+#       +---------+
+#        |       |
+#    +------+ +------+
+#    |veth1 | |veth2 |
+#    +------+ +------+
+#
+# We use veths instead of physical interfaces
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+set -e
+cleanup() {
+	ip link del fab-br0 >/dev/null 2>&1 || :
+	ip link del fbond  >/dev/null 2>&1 || :
+	ip link del veth1-bond  >/dev/null 2>&1 || :
+	ip link del veth2-bond  >/dev/null 2>&1 || :
+}
+
+trap cleanup 0 1 2
+cleanup
+
+# create the bridge
+ip link add fab-br0 address 52:54:00:3B:7C:A6 mtu 1500 type bridge \
+	forward_delay 15
+
+# create the bond
+ip link add fbond type bond mode 4 miimon 200 xmit_hash_policy 1 \
+	ad_actor_sys_prio 65535 lacp_rate fast
+
+# set bond address
+ip link set fbond address 52:54:00:3B:7C:A6
+ip link set fbond up
+
+# set again bond sysfs parameters
+ip link set fbond type bond ad_actor_sys_prio 65535
+
+# create veths
+ip link add name veth1-bond type veth peer name veth1-end
+ip link add name veth2-bond type veth peer name veth2-end
+
+# add ports
+ip link set fbond master fab-br0
+ip link set veth1-bond master fbond
+ip link set veth2-bond master fbond
+
+# bring up
+ip link set veth1-end up
+ip link set veth2-end up
+ip link set fab-br0 up
+ip link set fbond up
+ip addr add dev fab-br0 10.0.0.3
+
+rc=0
+tc qdisc add dev veth1-end clsact
+tc filter add dev veth1-end ingress protocol 0x8809 pref 1 handle 101 flower skip_hw action pass
+if slowwait_for_counter 15 2 \
+	tc_rule_handle_stats_get "dev veth1-end ingress" 101 ".packets" "" &> /dev/null; then
+	echo "PASS, captured 2"
+else
+	echo "FAIL"
+	rc=1
+fi
+exit $rc
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
new file mode 100755
index 000000000000..8293dbc7c18f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bond device ether type changing
+#
+
+ALL_TESTS="
+	bond_test_unsuccessful_enslave_type_change
+	bond_test_successful_enslave_type_change
+"
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+bond_check_flags()
+{
+	local bonddev=$1
+
+	ip -d l sh dev "$bonddev" | grep -q "MASTER"
+	check_err $? "MASTER flag is missing from the bond device"
+
+	ip -d l sh dev "$bonddev" | grep -q "SLAVE"
+	check_err $? "SLAVE flag is missing from the bond device"
+}
+
+# test enslaved bond dev type change from ARPHRD_ETHER and back
+# this allows us to test both MASTER and SLAVE flags at once
+bond_test_enslave_type_change()
+{
+	local test_success=$1
+	local devbond0="test-bond0"
+	local devbond1="test-bond1"
+	local devbond2="test-bond2"
+	local nonethdev="test-noneth0"
+
+	# create a non-ARPHRD_ETHER device for testing (e.g. nlmon type)
+	ip link add name "$nonethdev" type nlmon
+	check_err $? "could not create a non-ARPHRD_ETHER device (nlmon)"
+	ip link add name "$devbond0" type bond
+	if [ $test_success -eq 1 ]; then
+		# we need devbond0 in active-backup mode to successfully enslave nonethdev
+		ip link set dev "$devbond0" type bond mode active-backup
+		check_err $? "could not change bond mode to active-backup"
+	fi
+	ip link add name "$devbond1" type bond
+	ip link add name "$devbond2" type bond
+	ip link set dev "$devbond0" master "$devbond1"
+	check_err $? "could not enslave $devbond0 to $devbond1"
+	# change bond type to non-ARPHRD_ETHER
+	ip link set dev "$nonethdev" master "$devbond0" 1>/dev/null 2>/dev/null
+	ip link set dev "$nonethdev" nomaster 1>/dev/null 2>/dev/null
+	# restore ARPHRD_ETHER type by enslaving such device
+	ip link set dev "$devbond2" master "$devbond0"
+	check_err $? "could not enslave $devbond2 to $devbond0"
+
+	bond_check_flags "$devbond0"
+
+	# clean up
+	ip link del dev "$devbond0"
+	ip link del dev "$devbond1"
+	ip link del dev "$devbond2"
+	ip link del dev "$nonethdev"
+}
+
+bond_test_unsuccessful_enslave_type_change()
+{
+	RET=0
+
+	bond_test_enslave_type_change 0
+	log_test "Change ether type of an enslaved bond device with unsuccessful enslave"
+}
+
+bond_test_successful_enslave_type_change()
+{
+	RET=0
+
+	bond_test_enslave_type_change 1
+	log_test "Change ether type of an enslaved bond device with successful enslave"
+}
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
new file mode 100755
index 000000000000..78d3e0fe6604
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+#   Verify bond interface could up when set IPv6 link local address target.
+#
+#  +----------------+
+#  |      br0       |
+#  |       |        |    sw
+#  | veth0   veth1  |
+#  +---+-------+----+
+#      |       |
+#  +---+-------+----+
+#  | veth0   veth1  |
+#  |       |        |    host
+#  |     bond0      |
+#  +----------------+
+#
+# We use veths instead of physical interfaces
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+sw="sw-$(mktemp -u XXXXXX)"
+host="ns-$(mktemp -u XXXXXX)"
+
+cleanup()
+{
+	ip netns del $sw
+	ip netns del $host
+}
+
+wait_lladdr_dad()
+{
+	$@ | grep fe80 | grep -qv tentative
+}
+
+wait_bond_up()
+{
+	$@ | grep -q 'state UP'
+}
+
+trap cleanup 0 1 2
+
+ip netns add $sw
+ip netns add $host
+
+ip -n $host link add veth0 type veth peer name veth0 netns $sw
+ip -n $host link add veth1 type veth peer name veth1 netns $sw
+
+ip -n $sw link add br0 type bridge
+ip -n $sw link set br0 up
+sw_lladdr=$(ip -n $sw addr show br0 | awk '/fe80/{print $2}' | cut -d'/' -f1)
+# wait some time to make sure bridge lladdr pass DAD
+slowwait 2 wait_lladdr_dad ip -n $sw addr show br0
+
+ip -n $host link add bond0 type bond mode 1 ns_ip6_target ${sw_lladdr} \
+	arp_validate 3 arp_interval 1000
+# add a lladdr for bond to make sure there is a route to target
+ip -n $host addr add fe80::beef/64 dev bond0
+ip -n $host link set bond0 up
+ip -n $host link set veth0 master bond0
+ip -n $host link set veth1 master bond0
+
+ip -n $sw link set veth0 master br0
+ip -n $sw link set veth1 master br0
+ip -n $sw link set veth0 up
+ip -n $sw link set veth1 up
+
+slowwait 5 wait_bond_up ip -n $host link show bond0
+
+rc=0
+if ip -n $host link show bond0 | grep -q LOWER_UP; then
+	echo "PASS"
+else
+	echo "FAIL"
+	rc=1
+fi
+exit $rc
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh
new file mode 100755
index 000000000000..f09e100232c7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# IPsec over bonding offload test:
+#
+#  +----------------+
+#  |     bond0      |
+#  |       |        |
+#  |  eth0    eth1  |
+#  +---+-------+----+
+#
+# We use netdevsim instead of physical interfaces
+#-------------------------------------------------------------------
+# Example commands
+#   ip x s add proto esp src 192.0.2.1 dst 192.0.2.2 \
+#            spi 0x07 mode transport reqid 0x07 replay-window 32 \
+#            aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+#            sel src 192.0.2.1/24 dst 192.0.2.2/24
+#            offload dev bond0 dir out
+#   ip x p add dir out src 192.0.2.1/24 dst 192.0.2.2/24 \
+#            tmpl proto esp src 192.0.2.1 dst 192.0.2.2 \
+#            spi 0x07 mode transport reqid 0x07
+#
+#-------------------------------------------------------------------
+
+lib_dir=$(dirname "$0")
+# shellcheck disable=SC1091
+source "$lib_dir"/../../../net/lib.sh
+srcip=192.0.2.1
+dstip=192.0.2.2
+ipsec0=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ipsec
+ipsec1=/sys/kernel/debug/netdevsim/netdevsim0/ports/1/ipsec
+active_slave=""
+
+# shellcheck disable=SC2317
+active_slave_changed()
+{
+	local old_active_slave=$1
+	local new_active_slave
+
+	# shellcheck disable=SC2154
+	new_active_slave=$(ip -n "${ns}" -d -j link show bond0 | \
+		jq -r ".[].linkinfo.info_data.active_slave")
+	[ "$new_active_slave" != "$old_active_slave" ] && [ "$new_active_slave" != "null" ]
+}
+
+test_offload()
+{
+	# use ping to exercise the Tx path
+	ip netns exec "$ns" ping -I bond0 -c 3 -W 1 -i 0 "$dstip" >/dev/null
+
+	active_slave=$(ip -n "${ns}" -d -j link show bond0 | \
+		       jq -r ".[].linkinfo.info_data.active_slave")
+
+	if [ "$active_slave" = "$nic0" ]; then
+		sysfs=$ipsec0
+	elif [ "$active_slave" = "$nic1" ]; then
+		sysfs=$ipsec1
+	else
+		check_err 1 "bond_ipsec_offload invalid active_slave $active_slave"
+	fi
+
+	# The tx/rx order in sysfs may changed after failover
+	grep -q "SA count=2 tx=3" "$sysfs" && grep -q "tx ipaddr=$dstip" "$sysfs"
+	check_err $? "incorrect tx count with link ${active_slave}"
+
+	log_test bond_ipsec_offload "active_slave ${active_slave}"
+}
+
+setup_env()
+{
+	if ! mount | grep -q debugfs; then
+		mount -t debugfs none /sys/kernel/debug/ &> /dev/null
+		defer umount /sys/kernel/debug/
+
+	fi
+
+	# setup netdevsim since dummy/veth dev doesn't have offload support
+	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+		if ! modprobe -q netdevsim; then
+			echo "SKIP: can't load netdevsim for ipsec offload"
+			# shellcheck disable=SC2154
+			exit "$ksft_skip"
+		fi
+		defer modprobe -r netdevsim
+	fi
+
+	setup_ns ns
+	defer cleanup_ns "$ns"
+}
+
+setup_bond()
+{
+	ip -n "$ns" link add bond0 type bond mode active-backup miimon 100
+	ip -n "$ns" addr add "$srcip/24" dev bond0
+	ip -n "$ns" link set bond0 up
+
+	echo "0 2" | ip netns exec "$ns" tee /sys/bus/netdevsim/new_device >/dev/null
+	nic0=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | head -n 1)
+	nic1=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | tail -n 1)
+	ip -n "$ns" link set "$nic0" master bond0
+	ip -n "$ns" link set "$nic1" master bond0
+
+	# we didn't create a peer, make sure we can Tx by adding a permanent
+	# neighbour this need to be added after enslave
+	ip -n "$ns" neigh add "$dstip" dev bond0 lladdr 00:11:22:33:44:55
+
+	# create offloaded SAs, both in and out
+	ip -n "$ns" x p add dir out src "$srcip/24" dst "$dstip/24" \
+	    tmpl proto esp src "$srcip" dst "$dstip" spi 9 \
+	    mode transport reqid 42
+
+	ip -n "$ns" x p add dir in src "$dstip/24" dst "$srcip/24" \
+	    tmpl proto esp src "$dstip" dst "$srcip" spi 9 \
+	    mode transport reqid 42
+
+	ip -n "$ns" x s add proto esp src "$srcip" dst "$dstip" spi 9 \
+	    mode transport reqid 42 aead "rfc4106(gcm(aes))" \
+	    0x3132333435363738393031323334353664636261 128 \
+	    sel src "$srcip/24" dst "$dstip/24" \
+	    offload dev bond0 dir out
+
+	ip -n "$ns" x s add proto esp src "$dstip" dst "$srcip" spi 9 \
+	    mode transport reqid 42 aead "rfc4106(gcm(aes))" \
+	    0x3132333435363738393031323334353664636261 128 \
+	    sel src "$dstip/24" dst "$srcip/24" \
+	    offload dev bond0 dir in
+
+	# does offload show up in ip output
+	lines=$(ip -n "$ns" x s list | grep -c "crypto offload parameters: dev bond0 dir")
+	if [ "$lines" -ne 2 ] ; then
+		check_err 1 "bond_ipsec_offload SA offload missing from list output"
+	fi
+}
+
+trap defer_scopes_cleanup EXIT
+setup_env
+setup_bond
+
+# start Offload testing
+test_offload
+
+# do failover and re-test
+ip -n "$ns" link set "$active_slave" down
+slowwait 5 active_slave_changed "$active_slave"
+test_offload
+
+# make sure offload get removed from driver
+ip -n "$ns" x s flush
+ip -n "$ns" x p flush
+line0=$(grep -c "SA count=0" "$ipsec0")
+line1=$(grep -c "SA count=0" "$ipsec1")
+[ "$line0" -ne 1 ] || [ "$line1" -ne 1 ]
+check_fail $? "bond_ipsec_offload SA not removed from driver"
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh
new file mode 100755
index 000000000000..a483d505c6a8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing if bond lacp per port priority works
+#
+#          Switch (s_ns)          Backup Switch (b_ns)
+#  +-------------------------+ +-------------------------+
+#  |          bond0          | |          bond0          |
+#  |            +            | |            +            |
+#  |      eth0  |  eth1      | |      eth0  |  eth1      |
+#  |        +---+---+        | |        +---+---+        |
+#  |        |       |        | |        |       |        |
+#  +-------------------------+ +-------------------------+
+#           |       |                   |       |
+#  +-----------------------------------------------------+
+#  |        |       |                   |       |        |
+#  |        +-------+---------+---------+-------+        |
+#  |      eth0     eth1       |       eth2     eth3      |
+#  |                          +                          |
+#  |                        bond0                        |
+#  +-----------------------------------------------------+
+#                        Client (c_ns)
+
+lib_dir=$(dirname "$0")
+# shellcheck disable=SC1091
+source "$lib_dir"/../../../net/lib.sh
+
+setup_links()
+{
+	# shellcheck disable=SC2154
+	ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}"
+	ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}"
+	# shellcheck disable=SC2154
+	ip -n "${c_ns}" link add eth2 type veth peer name eth0 netns "${b_ns}"
+	ip -n "${c_ns}" link add eth3 type veth peer name eth1 netns "${b_ns}"
+
+	ip -n "${c_ns}" link add bond0 type bond mode 802.3ad miimon 100 \
+		lacp_rate fast ad_select actor_port_prio
+	ip -n "${s_ns}" link add bond0 type bond mode 802.3ad miimon 100 \
+		lacp_rate fast
+	ip -n "${b_ns}" link add bond0 type bond mode 802.3ad miimon 100 \
+		lacp_rate fast
+
+	ip -n "${c_ns}" link set eth0 master bond0
+	ip -n "${c_ns}" link set eth1 master bond0
+	ip -n "${c_ns}" link set eth2 master bond0
+	ip -n "${c_ns}" link set eth3 master bond0
+	ip -n "${s_ns}" link set eth0 master bond0
+	ip -n "${s_ns}" link set eth1 master bond0
+	ip -n "${b_ns}" link set eth0 master bond0
+	ip -n "${b_ns}" link set eth1 master bond0
+
+	ip -n "${c_ns}" link set bond0 up
+	ip -n "${s_ns}" link set bond0 up
+	ip -n "${b_ns}" link set bond0 up
+}
+
+test_port_prio_setting()
+{
+	RET=0
+	ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 1000
+	prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" \
+		".[].linkinfo.info_slave_data.actor_port_prio")
+	[ "$prio" -ne 1000 ] && RET=1
+	ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 10
+	prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" \
+		".[].linkinfo.info_slave_data.actor_port_prio")
+	[ "$prio" -ne 10 ] && RET=1
+}
+
+test_agg_reselect()
+{
+	local bond_agg_id slave_agg_id
+	local expect_slave="$1"
+	RET=0
+
+	# Trigger link state change to reselect the aggregator
+	ip -n "${c_ns}" link set eth1 down
+	sleep 0.5
+	ip -n "${c_ns}" link set eth1 up
+	sleep 0.5
+
+	bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" \
+		".[].linkinfo.info_data.ad_info.aggregator")
+	slave_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show $expect_slave" \
+		".[].linkinfo.info_slave_data.ad_aggregator_id")
+	# shellcheck disable=SC2034
+	[ "${bond_agg_id}" -ne "${slave_agg_id}" ] && \
+		RET=1
+}
+
+trap cleanup_all_ns EXIT
+setup_ns c_ns s_ns b_ns
+setup_links
+
+test_port_prio_setting
+log_test "bond 802.3ad" "actor_port_prio setting"
+
+test_agg_reselect eth0
+log_test "bond 802.3ad" "actor_port_prio select"
+
+# Change the actor port prio and re-test
+ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 10
+ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 1000
+test_agg_reselect eth2
+log_test "bond 802.3ad" "actor_port_prio switch"
+
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
new file mode 100755
index 000000000000..559f300f965a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test macvlan/ipvlan over bond
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_2d1c.sh
+
+xvlan1_ns="xvlan1-$(mktemp -u XXXXXX)"
+xvlan2_ns="xvlan2-$(mktemp -u XXXXXX)"
+xvlan1_ip4="192.0.2.11"
+xvlan1_ip6="2001:db8::11"
+xvlan2_ip4="192.0.2.12"
+xvlan2_ip6="2001:db8::12"
+
+cleanup()
+{
+	client_destroy
+	server_destroy
+	gateway_destroy
+
+	ip netns del ${xvlan1_ns}
+	ip netns del ${xvlan2_ns}
+}
+
+check_connection()
+{
+	local ns=${1}
+	local target=${2}
+	local message=${3}
+	RET=0
+
+	sleep 0.25
+	ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
+	check_err $? "ping failed"
+	log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}"
+}
+
+xvlan_over_bond()
+{
+	local param="$1"
+	local xvlan_type="$2"
+	local xvlan_mode="$3"
+	RET=0
+
+	# setup new bond mode
+	bond_reset "${param}"
+
+	ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode}
+	ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan1_ns}
+	ip -n ${xvlan1_ns} link set dev ${xvlan_type}0 up
+	ip -n ${xvlan1_ns} addr add ${xvlan1_ip4}/24 dev ${xvlan_type}0
+	ip -n ${xvlan1_ns} addr add ${xvlan1_ip6}/24 dev ${xvlan_type}0
+
+	ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode}
+	ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan2_ns}
+	ip -n ${xvlan2_ns} link set dev ${xvlan_type}0 up
+	ip -n ${xvlan2_ns} addr add ${xvlan2_ip4}/24 dev ${xvlan_type}0
+	ip -n ${xvlan2_ns} addr add ${xvlan2_ip6}/24 dev ${xvlan_type}0
+
+	sleep 2
+
+	check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
+	check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
+	check_connection "${c_ns}" "${xvlan1_ip4}" "IPv4: client->${xvlan_type}_1"
+	check_connection "${c_ns}" "${xvlan1_ip6}" "IPv6: client->${xvlan_type}_1"
+	check_connection "${c_ns}" "${xvlan2_ip4}" "IPv4: client->${xvlan_type}_2"
+	check_connection "${c_ns}" "${xvlan2_ip6}" "IPv6: client->${xvlan_type}_2"
+	check_connection "${xvlan1_ns}" "${xvlan2_ip4}" "IPv4: ${xvlan_type}_1->${xvlan_type}_2"
+	check_connection "${xvlan1_ns}" "${xvlan2_ip6}" "IPv6: ${xvlan_type}_1->${xvlan_type}_2"
+
+	check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
+	check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
+	check_connection "${xvlan1_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_1->client"
+	check_connection "${xvlan1_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_1->client"
+	check_connection "${xvlan2_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_2->client"
+	check_connection "${xvlan2_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_2->client"
+	check_connection "${xvlan2_ns}" "${xvlan1_ip4}" "IPv4: ${xvlan_type}_2->${xvlan_type}_1"
+	check_connection "${xvlan2_ns}" "${xvlan1_ip6}" "IPv6: ${xvlan_type}_2->${xvlan_type}_1"
+
+	ip -n ${c_ns} neigh flush dev eth0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+ip netns add ${xvlan1_ns}
+ip netns add ${xvlan2_ns}
+
+bond_modes="active-backup balance-tlb balance-alb"
+
+for bond_mode in ${bond_modes}; do
+	xvlan_over_bond "mode ${bond_mode}" macvlan bridge
+	xvlan_over_bond "mode ${bond_mode}" ipvlan  l2
+done
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
new file mode 100755
index 000000000000..187b478d0ddf
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -0,0 +1,578 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bonding options with mode 1,5,6
+
+ALL_TESTS="
+	prio
+	arp_validate
+	num_grat_arp
+	fail_over_mac
+	vlan_over_bond
+"
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_3d1c.sh
+c_maddr="33:33:ff:00:00:10"
+g_maddr="33:33:ff:00:02:54"
+
+skip_prio()
+{
+	local skip=1
+
+	# check if iproute support prio option
+	ip -n ${s_ns} link set eth0 type bond_slave prio 10
+	[[ $? -ne 0 ]] && skip=0
+
+	# check if kernel support prio option
+	ip -n ${s_ns} -d link show eth0 | grep -q "prio 10"
+	[[ $? -ne 0 ]] && skip=0
+
+	return $skip
+}
+
+skip_ns()
+{
+	local skip=1
+
+	# check if iproute support ns_ip6_target option
+	ip -n ${s_ns} link add bond1 type bond ns_ip6_target ${g_ip6}
+	[[ $? -ne 0 ]] && skip=0
+
+	# check if kernel support ns_ip6_target option
+	ip -n ${s_ns} -d link show bond1 | grep -q "ns_ip6_target ${g_ip6}"
+	[[ $? -ne 0 ]] && skip=0
+
+	ip -n ${s_ns} link del bond1
+
+	return $skip
+}
+
+active_slave=""
+active_slave_changed()
+{
+	local old_active_slave=$1
+	local new_active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" \
+				".[].linkinfo.info_data.active_slave")
+	[ "$new_active_slave" != "$old_active_slave" -a "$new_active_slave" != "null" ]
+}
+
+check_active_slave()
+{
+	local target_active_slave=$1
+	slowwait 5 active_slave_changed $active_slave
+	active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+	test "$active_slave" = "$target_active_slave"
+	check_err $? "Current active slave is $active_slave but not $target_active_slave"
+}
+
+# Test bonding prio option
+prio_test()
+{
+	local param="$1"
+	RET=0
+
+	# create bond
+	bond_reset "${param}"
+	# set active_slave to primary eth1 specifically
+	ip -n ${s_ns} link set bond0 type bond active_slave eth1
+
+	# check bonding member prio value
+	ip -n ${s_ns} link set eth0 type bond_slave prio 0
+	ip -n ${s_ns} link set eth1 type bond_slave prio 10
+	ip -n ${s_ns} link set eth2 type bond_slave prio 11
+	cmd_jq "ip -n ${s_ns} -d -j link show eth0" \
+		".[].linkinfo.info_slave_data | select (.prio == 0)" "-e" &> /dev/null
+	check_err $? "eth0 prio is not 0"
+	cmd_jq "ip -n ${s_ns} -d -j link show eth1" \
+		".[].linkinfo.info_slave_data | select (.prio == 10)" "-e" &> /dev/null
+	check_err $? "eth1 prio is not 10"
+	cmd_jq "ip -n ${s_ns} -d -j link show eth2" \
+		".[].linkinfo.info_slave_data | select (.prio == 11)" "-e" &> /dev/null
+	check_err $? "eth2 prio is not 11"
+
+	bond_check_connection "setup"
+
+	# active slave should be the primary slave
+	check_active_slave eth1
+
+	# active slave should be the higher prio slave
+	ip -n ${s_ns} link set $active_slave down
+	check_active_slave eth2
+	bond_check_connection "fail over"
+
+	# when only 1 slave is up
+	ip -n ${s_ns} link set $active_slave down
+	check_active_slave eth0
+	bond_check_connection "only 1 slave up"
+
+	# when a higher prio slave change to up
+	ip -n ${s_ns} link set eth2 up
+	bond_check_connection "higher prio slave up"
+	case $primary_reselect in
+		"0")
+			check_active_slave "eth2"
+			;;
+		"1")
+			check_active_slave "eth0"
+			;;
+		"2")
+			check_active_slave "eth0"
+			;;
+	esac
+	local pre_active_slave=$active_slave
+
+	# when the primary slave change to up
+	ip -n ${s_ns} link set eth1 up
+	bond_check_connection "primary slave up"
+	case $primary_reselect in
+		"0")
+			check_active_slave "eth1"
+			;;
+		"1")
+			check_active_slave "$pre_active_slave"
+			;;
+		"2")
+			check_active_slave "$pre_active_slave"
+			ip -n ${s_ns} link set $active_slave down
+			bond_check_connection "pre_active slave down"
+			check_active_slave "eth1"
+			;;
+	esac
+
+	# Test changing bond slave prio
+	if [[ "$primary_reselect" == "0" ]];then
+		ip -n ${s_ns} link set eth0 type bond_slave prio 1000000
+		ip -n ${s_ns} link set eth1 type bond_slave prio 0
+		ip -n ${s_ns} link set eth2 type bond_slave prio -50
+		ip -n ${s_ns} -d link show eth0 | grep -q 'prio 1000000'
+		check_err $? "eth0 prio is not 1000000"
+		ip -n ${s_ns} -d link show eth1 | grep -q 'prio 0'
+		check_err $? "eth1 prio is not 0"
+		ip -n ${s_ns} -d link show eth2 | grep -q 'prio -50'
+		check_err $? "eth3 prio is not -50"
+		check_active_slave "eth1"
+
+		ip -n ${s_ns} link set $active_slave down
+		check_active_slave "eth0"
+		bond_check_connection "change slave prio"
+	fi
+}
+
+prio_miimon()
+{
+	local primary_reselect
+	local mode=$1
+
+	for primary_reselect in 0 1 2; do
+		prio_test "mode $mode miimon 100 primary eth1 primary_reselect $primary_reselect"
+		log_test "prio" "$mode miimon primary_reselect $primary_reselect"
+	done
+}
+
+prio_arp()
+{
+	local primary_reselect
+	local mode=$1
+
+	for primary_reselect in 0 1 2; do
+		prio_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect"
+		log_test "prio" "$mode arp_ip_target primary_reselect $primary_reselect"
+	done
+}
+
+prio_ns()
+{
+	local primary_reselect
+	local mode=$1
+
+	if skip_ns; then
+		log_test_skip "prio ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'."
+		return 0
+	fi
+
+	for primary_reselect in 0 1 2; do
+		prio_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect"
+		log_test "prio" "$mode ns_ip6_target primary_reselect $primary_reselect"
+	done
+}
+
+prio()
+{
+	local mode modes="active-backup balance-tlb balance-alb"
+
+	if skip_prio; then
+		log_test_skip "prio" "Current iproute or kernel doesn't support bond option 'prio'."
+		return 0
+	fi
+
+	for mode in $modes; do
+		prio_miimon $mode
+	done
+	prio_arp "active-backup"
+	prio_ns "active-backup"
+}
+
+wait_mii_up()
+{
+	for i in $(seq 0 2); do
+		mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status")
+		[ ${mii_status} != "UP" ] && return 1
+	done
+	return 0
+}
+
+arp_validate_test()
+{
+	local param="$1"
+	RET=0
+
+	# create bond
+	bond_reset "${param}"
+
+	bond_check_connection
+	[ $RET -ne 0 ] && log_test "arp_validate" "$retmsg"
+
+	# wait for a while to make sure the mii status stable
+	slowwait 5 wait_mii_up
+	for i in $(seq 0 2); do
+		mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status")
+		if [ ${mii_status} != "UP" ]; then
+			RET=1
+			log_test "arp_validate" "interface eth$i mii_status $mii_status"
+		fi
+	done
+}
+
+# Testing correct multicast groups are added to slaves for ns targets
+arp_validate_mcast()
+{
+	RET=0
+	local arp_valid=$(cmd_jq "ip -n ${s_ns} -j -d link show bond0" ".[].linkinfo.info_data.arp_validate")
+	local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+
+	for i in $(seq 0 2); do
+		maddr_list=$(ip -n ${s_ns} maddr show dev eth${i})
+
+		# arp_valid == 0 or active_slave should not join any maddrs
+		if { [ "$arp_valid" == "null" ] || [ "eth${i}" == ${active_slave} ]; } && \
+			echo "$maddr_list" | grep -qE "${c_maddr}|${g_maddr}"; then
+			RET=1
+			check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group"
+		# arp_valid != 0 and backup_slave should join both maddrs
+		elif [ "$arp_valid" != "null" ] && [ "eth${i}" != ${active_slave} ] && \
+		     ( ! echo "$maddr_list" | grep -q "${c_maddr}" || \
+		       ! echo "$maddr_list" | grep -q "${m_maddr}"); then
+			RET=1
+			check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group"
+		fi
+	done
+
+	# Do failover
+	ip -n ${s_ns} link set ${active_slave} down
+	# wait for active link change
+	slowwait 2 active_slave_changed $active_slave
+	active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+
+	for i in $(seq 0 2); do
+		maddr_list=$(ip -n ${s_ns} maddr show dev eth${i})
+
+		# arp_valid == 0 or active_slave should not join any maddrs
+		if { [ "$arp_valid" == "null" ] || [ "eth${i}" == ${active_slave} ]; } && \
+			echo "$maddr_list" | grep -qE "${c_maddr}|${g_maddr}"; then
+			RET=1
+			check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group"
+		# arp_valid != 0 and backup_slave should join both maddrs
+		elif [ "$arp_valid" != "null" ] && [ "eth${i}" != ${active_slave} ] && \
+		     ( ! echo "$maddr_list" | grep -q "${c_maddr}" || \
+		       ! echo "$maddr_list" | grep -q "${m_maddr}"); then
+			RET=1
+			check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group"
+		fi
+	done
+}
+
+arp_validate_arp()
+{
+	local mode=$1
+	local val
+	for val in $(seq 0 6); do
+		arp_validate_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} arp_validate $val"
+		log_test "arp_validate" "$mode arp_ip_target arp_validate $val"
+	done
+}
+
+arp_validate_ns()
+{
+	local mode=$1
+	local val
+
+	if skip_ns; then
+		log_test_skip "arp_validate ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'."
+		return 0
+	fi
+
+	for val in $(seq 0 6); do
+		arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6},${c_ip6} arp_validate $val"
+		log_test "arp_validate" "$mode ns_ip6_target arp_validate $val"
+		arp_validate_mcast
+		log_test "arp_validate" "join mcast group"
+	done
+}
+
+arp_validate()
+{
+	arp_validate_arp "active-backup"
+	arp_validate_ns "active-backup"
+}
+
+garp_test()
+{
+	local param="$1"
+	local active_slave exp_num real_num i
+	RET=0
+
+	# create bond
+	bond_reset "${param}"
+
+	bond_check_connection
+	[ $RET -ne 0 ] && log_test "num_grat_arp" "$retmsg"
+
+
+	# Add tc rules to count GARP number
+	for i in $(seq 0 2); do
+		tc -n ${g_ns} filter add dev s$i ingress protocol arp pref 1 handle 101 \
+			flower skip_hw arp_op request arp_sip ${s_ip4} arp_tip ${s_ip4} action pass
+	done
+
+	# Do failover
+	active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+	ip -n ${s_ns} link set ${active_slave} down
+
+	# wait for active link change
+	slowwait 2 active_slave_changed $active_slave
+
+	exp_num=$(echo "${param}" | cut -f6 -d ' ')
+	active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+	slowwait_for_counter $((exp_num + 5)) $exp_num tc_rule_handle_stats_get \
+		"dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" &> /dev/null
+
+	# check result
+	real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}")
+	if [ "${real_num}" -ne "${exp_num}" ]; then
+		echo "$real_num garp packets sent on active slave ${active_slave}"
+		RET=1
+	fi
+
+	for i in $(seq 0 2); do
+		tc -n ${g_ns} filter del dev s$i ingress
+	done
+}
+
+num_grat_arp()
+{
+	local val
+	for val in 10 20 30; do
+		garp_test "mode active-backup miimon 10 num_grat_arp $val peer_notify_delay 100"
+		log_test "num_grat_arp" "active-backup miimon num_grat_arp $val"
+	done
+}
+
+check_all_mac_same()
+{
+	RET=0
+	# all slaves should have same mac address (with the first port's mac)
+	local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+	local eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+	local eth1_mac=$(ip -n "$s_ns" -j link show eth1 | jq -r '.[]["address"]')
+	local eth2_mac=$(ip -n "$s_ns" -j link show eth2 | jq -r '.[]["address"]')
+	if [ "$bond_mac" != "${mac[0]}" ] || [ "$eth0_mac" != "$bond_mac" ] || \
+		[ "$eth1_mac" != "$bond_mac" ] || [ "$eth2_mac" != "$bond_mac" ]; then
+		RET=1
+	fi
+}
+
+check_bond_mac_same_with_first()
+{
+	RET=0
+	# bond mac address should be same with the first added slave
+	local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+	if [ "$bond_mac" != "${mac[0]}" ]; then
+		RET=1
+	fi
+}
+
+check_bond_mac_same_with_active()
+{
+	RET=0
+	# bond mac address should be same with active slave
+	local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+	local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+	local active_slave_mac=$(ip -n "$s_ns" -j link show "$active_slave" | jq -r '.[]["address"]')
+	if [ "$bond_mac" != "$active_slave_mac" ]; then
+		RET=1
+	fi
+}
+
+check_backup_slave_mac_not_change()
+{
+	RET=0
+	# backup slave's mac address is not changed
+	if ip -n "$s_ns" -d -j link show type bond_slave | jq -e '.[]
+		| select(.linkinfo.info_slave_data.state=="BACKUP")
+		| select(.address != .linkinfo.info_slave_data.perm_hwaddr)' &> /dev/null; then
+		RET=1
+	fi
+}
+
+check_backup_slave_mac_inherit()
+{
+	local backup_mac
+	RET=0
+
+	# backup slaves should use mac[1] or mac[2]
+	local backup_macs=$(ip -n "$s_ns" -d -j link show type bond_slave | \
+		jq -r '.[] | select(.linkinfo.info_slave_data.state=="BACKUP") | .address')
+	for backup_mac in $backup_macs; do
+		if [ "$backup_mac" != "${mac[1]}" ] && [ "$backup_mac" != "${mac[2]}" ]; then
+			RET=1
+		fi
+	done
+}
+
+check_first_slave_random_mac()
+{
+	RET=0
+	# remove the first added slave and added it back
+	ip -n "$s_ns" link set eth0 nomaster
+	ip -n "$s_ns" link set eth0 master bond0
+
+	# the first slave should use random mac address
+	eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+	[ "$eth0_mac" = "${mac[0]}" ] && RET=1
+	log_test "bond fail_over_mac follow" "random first slave mac"
+
+	# remove the first slave, the permanent MAC address should be restored back
+	ip -n "$s_ns" link set eth0 nomaster
+	eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+	[ "$eth0_mac" != "${mac[0]}" ] && RET=1
+}
+
+do_active_backup_failover()
+{
+	local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+	ip -n ${s_ns} link set ${active_slave} down
+	slowwait 2 active_slave_changed $active_slave
+	ip -n ${s_ns} link set ${active_slave} up
+}
+
+fail_over_mac()
+{
+	# Bring down the first interface on the switch to force the bond to
+	# select another active interface instead of the first one that joined.
+	ip -n "$g_ns" link set s0 down
+
+	# fail_over_mac none
+	bond_reset "mode active-backup miimon 100 fail_over_mac 0"
+	check_all_mac_same
+	log_test "fail_over_mac 0" "all slaves have same mac"
+	do_active_backup_failover
+	check_all_mac_same
+	log_test "fail_over_mac 0" "failover: all slaves have same mac"
+
+	# fail_over_mac active
+	bond_reset "mode active-backup miimon 100 fail_over_mac 1"
+	check_bond_mac_same_with_active
+	log_test "fail_over_mac 1" "bond mac is same with active slave mac"
+	check_backup_slave_mac_not_change
+	log_test "fail_over_mac 1" "backup slave mac is not changed"
+	do_active_backup_failover
+	check_bond_mac_same_with_active
+	log_test "fail_over_mac 1" "failover: bond mac is same with active slave mac"
+	check_backup_slave_mac_not_change
+	log_test "fail_over_mac 1" "failover: backup slave mac is not changed"
+
+	# fail_over_mac follow
+	bond_reset "mode active-backup miimon 100 fail_over_mac 2"
+	check_bond_mac_same_with_first
+	log_test "fail_over_mac 2" "bond mac is same with first slave mac"
+	check_bond_mac_same_with_active
+	log_test "fail_over_mac 2" "bond mac is same with active slave mac"
+	check_backup_slave_mac_inherit
+	log_test "fail_over_mac 2" "backup slave mac inherit"
+	do_active_backup_failover
+	check_bond_mac_same_with_first
+	log_test "fail_over_mac 2" "failover: bond mac is same with first slave mac"
+	check_bond_mac_same_with_active
+	log_test "fail_over_mac 2" "failover: bond mac is same with active slave mac"
+	check_backup_slave_mac_inherit
+	log_test "fail_over_mac 2" "failover: backup slave mac inherit"
+	check_first_slave_random_mac
+	log_test "fail_over_mac 2" "first slave mac random"
+}
+
+vlan_over_bond_arp()
+{
+	local mode="$1"
+	RET=0
+
+	bond_reset "mode $mode arp_interval 100 arp_ip_target 192.0.3.10"
+	ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3
+	ip -n "${s_ns}" link set bond0.3 up
+	ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3
+	ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3
+
+	slowwait_for_counter 5 5 tc_rule_handle_stats_get \
+		"dev eth0.3 ingress" 101 ".packets" "-n ${c_ns}" &> /dev/null || RET=1
+	log_test "vlan over bond arp" "$mode"
+}
+
+vlan_over_bond_ns()
+{
+	local mode="$1"
+	RET=0
+
+	if skip_ns; then
+		log_test_skip "vlan_over_bond ns" "$mode"
+		return 0
+	fi
+
+	bond_reset "mode $mode arp_interval 100 ns_ip6_target 2001:db8::3:10"
+	ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3
+	ip -n "${s_ns}" link set bond0.3 up
+	ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3
+	ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3
+
+	slowwait_for_counter 5 5 tc_rule_handle_stats_get \
+		"dev eth0.3 ingress" 102 ".packets" "-n ${c_ns}" &> /dev/null || RET=1
+	log_test "vlan over bond ns" "$mode"
+}
+
+vlan_over_bond()
+{
+	# add vlan 3 for client
+	ip -n "${c_ns}" link add eth0.3 link eth0 type vlan id 3
+	ip -n "${c_ns}" link set eth0.3 up
+	ip -n "${c_ns}" addr add 192.0.3.10/24 dev eth0.3
+	ip -n "${c_ns}" addr add 2001:db8::3:10/64 dev eth0.3
+
+	# Add tc rule to check the vlan pkts
+	tc -n "${c_ns}" qdisc add dev eth0.3 clsact
+	tc -n "${c_ns}" filter add dev eth0.3 ingress protocol arp \
+		handle 101 flower skip_hw arp_op request \
+		arp_sip 192.0.3.1 arp_tip 192.0.3.10 action pass
+	tc -n "${c_ns}" filter add dev eth0.3 ingress protocol ipv6 \
+		handle 102 flower skip_hw ip_proto icmpv6 \
+		type 135 src_ip 2001:db8::3:1 action pass
+
+	vlan_over_bond_arp "active-backup"
+	vlan_over_bond_ns "active-backup"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh
new file mode 100755
index 000000000000..9c3b089813df
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test if a bond interface works with lacp_active=off.
+
+# shellcheck disable=SC2034
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+# shellcheck disable=SC1091
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+# shellcheck disable=SC2317
+check_port_state()
+{
+	local netns=$1
+	local port=$2
+	local state=$3
+
+	ip -n "${netns}" -d -j link show "$port" | \
+		jq -e ".[].linkinfo.info_slave_data.ad_actor_oper_port_state_str | index(\"${state}\") != null" > /dev/null
+}
+
+check_pkt_count()
+{
+	RET=0
+	local ns="$1"
+	local iface="$2"
+
+	# wait 65s, one per 30s
+	slowwait_for_counter 65 2 tc_rule_handle_stats_get \
+		"dev ${iface} egress" 101 ".packets" "-n ${ns}" &> /dev/null
+}
+
+setup() {
+	setup_ns c_ns s_ns
+
+	# shellcheck disable=SC2154
+	ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}"
+	ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}"
+
+	# Add tc filter to count the pkts
+	tc -n "${c_ns}" qdisc add dev eth0 clsact
+	tc -n "${c_ns}" filter add dev eth0 egress handle 101 protocol 0x8809 matchall action pass
+	tc -n "${s_ns}" qdisc add dev eth1 clsact
+	tc -n "${s_ns}" filter add dev eth1 egress handle 101 protocol 0x8809 matchall action pass
+
+	ip -n "${s_ns}" link add bond0 type bond mode 802.3ad lacp_active on lacp_rate fast
+	ip -n "${s_ns}" link set eth0 master bond0
+	ip -n "${s_ns}" link set eth1 master bond0
+
+	ip -n "${c_ns}" link add bond0 type bond mode 802.3ad lacp_active off lacp_rate fast
+	ip -n "${c_ns}" link set eth0 master bond0
+	ip -n "${c_ns}" link set eth1 master bond0
+
+}
+
+trap cleanup_all_ns EXIT
+setup
+
+# The bond will send 2 lacpdu pkts during init time, let's wait at least 2s
+# after interface up
+ip -n "${c_ns}" link set bond0 up
+sleep 2
+
+# 1. The passive side shouldn't send LACPDU.
+check_pkt_count "${c_ns}" "eth0" && RET=1
+log_test "802.3ad lacp_active off" "init port"
+
+ip -n "${s_ns}" link set bond0 up
+# 2. The passive side should not have the 'active' flag.
+RET=0
+slowwait 2 check_port_state "${c_ns}" "eth0" "active" && RET=1
+log_test "802.3ad lacp_active off" "port state active"
+
+# 3. The active side should have the 'active' flag.
+RET=0
+slowwait 2 check_port_state "${s_ns}" "eth0" "active" || RET=1
+log_test "802.3ad lacp_active on" "port state active"
+
+# 4. Make sure the connection is not expired.
+RET=0
+slowwait 5 check_port_state "${s_ns}" "eth0" "distributing"
+slowwait 10 check_port_state "${s_ns}" "eth0" "expired" && RET=1
+log_test "bond 802.3ad lacp_active off" "port connection"
+
+# After testing, disconnect one port on each side to check the state.
+ip -n "${s_ns}" link set eth0 nomaster
+ip -n "${s_ns}" link set eth0 up
+ip -n "${c_ns}" link set eth1 nomaster
+ip -n "${c_ns}" link set eth1 up
+# Due to Periodic Machine and Rx Machine state change, the bond will still
+# send lacpdu pkts in a few seconds. sleep at lease 5s to make sure
+# negotiation finished
+sleep 5
+
+# 5. The active side should keep sending LACPDU.
+check_pkt_count "${s_ns}" "eth1" || RET=1
+log_test "bond 802.3ad lacp_active on" "port pkt after disconnect"
+
+# 6. The passive side shouldn't send LACPDU anymore.
+check_pkt_count "${c_ns}" "eth0" && RET=1
+log_test "bond 802.3ad lacp_active off" "port pkt after disconnect"
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
new file mode 100644
index 000000000000..167aa4a4a12a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Topology for Bond mode 1,5,6 testing
+#
+#  +-------------------------+
+#  |          bond0          |  Server
+#  |            +            |  192.0.2.1/24
+#  |      eth0  |  eth1      |  2001:db8::1/24
+#  |        +---+---+        |
+#  |        |       |        |
+#  +-------------------------+
+#           |       |
+#  +-------------------------+
+#  |        |       |        |
+#  |    +---+-------+---+    |  Gateway
+#  |    |      br0      |    |  192.0.2.254/24
+#  |    +-------+-------+    |  2001:db8::254/24
+#  |            |            |
+#  +-------------------------+
+#               |
+#  +-------------------------+
+#  |            |            |  Client
+#  |            +            |  192.0.2.10/24
+#  |          eth0           |  2001:db8::10/24
+#  +-------------------------+
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+s_ns="s-$(mktemp -u XXXXXX)"
+c_ns="c-$(mktemp -u XXXXXX)"
+g_ns="g-$(mktemp -u XXXXXX)"
+s_ip4="192.0.2.1"
+c_ip4="192.0.2.10"
+g_ip4="192.0.2.254"
+s_ip6="2001:db8::1"
+c_ip6="2001:db8::10"
+g_ip6="2001:db8::254"
+mac[0]="00:0a:0b:0c:0d:01"
+mac[1]="00:0a:0b:0c:0d:02"
+
+gateway_create()
+{
+	ip netns add ${g_ns}
+	ip -n ${g_ns} link add br0 type bridge
+	ip -n ${g_ns} link set br0 up
+	ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
+	ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
+}
+
+gateway_destroy()
+{
+	ip -n ${g_ns} link del br0
+	ip netns del ${g_ns}
+}
+
+server_create()
+{
+	ip netns add ${s_ns}
+	ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
+
+	for i in $(seq 0 1); do
+		ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+		ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}"
+
+		ip -n ${g_ns} link set s${i} up
+		ip -n ${g_ns} link set s${i} master br0
+		ip -n ${s_ns} link set eth${i} master bond0
+
+		tc -n ${g_ns} qdisc add dev s${i} clsact
+	done
+
+	ip -n ${s_ns} link set bond0 up
+	ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+	ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+}
+
+# Reset bond with new mode and options
+bond_reset()
+{
+	# Count the eth link number in real-time as this function
+	# maybe called from other topologies.
+	local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+	local param="$1"
+	link_num=$((link_num -1))
+
+	ip -n ${s_ns} link set bond0 down
+	ip -n ${s_ns} link del bond0
+
+	ip -n ${s_ns} link add bond0 type bond $param
+	for i in $(seq 0 ${link_num}); do
+		ip -n ${s_ns} link set eth$i master bond0
+	done
+
+	ip -n ${s_ns} link set bond0 up
+	ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+	ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+	# Wait for IPv6 address ready as it needs DAD
+	slowwait 2 ip netns exec ${s_ns} ping6 ${c_ip6} -c 1 -W 0.1 &> /dev/null
+}
+
+server_destroy()
+{
+	# Count the eth link number in real-time as this function
+	# maybe called from other topologies.
+	local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+	link_num=$((link_num -1))
+	for i in $(seq 0 ${link_num}); do
+		ip -n ${s_ns} link del eth${i}
+	done
+	ip netns del ${s_ns}
+}
+
+client_create()
+{
+	ip netns add ${c_ns}
+	ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
+
+	ip -n ${g_ns} link set c0 up
+	ip -n ${g_ns} link set c0 master br0
+
+	ip -n ${c_ns} link set eth0 up
+	ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
+	ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
+}
+
+client_destroy()
+{
+	ip -n ${c_ns} link del eth0
+	ip netns del ${c_ns}
+}
+
+setup_prepare()
+{
+	gateway_create
+	server_create
+	client_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	client_destroy
+	server_destroy
+	gateway_destroy
+}
+
+bond_check_connection()
+{
+	local msg=${1:-"check connection"}
+
+	slowwait 2 ip netns exec ${s_ns} ping ${c_ip4} -c 1 -W 0.1 &> /dev/null
+	ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
+	check_err $? "${msg}: ping failed"
+	ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
+	check_err $? "${msg}: ping6 failed"
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
new file mode 100644
index 000000000000..23a2932301cc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Topology for Bond mode 1,5,6 testing
+#
+#  +-------------------------------------+
+#  |                bond0                |
+#  |                  +                  |  Server
+#  |      eth0        | eth1   eth2      |  192.0.2.1/24
+#  |        +-------------------+        |  2001:db8::1/24
+#  |        |         |         |        |
+#  +-------------------------------------+
+#           |         |         |
+#  +-------------------------------------+
+#  |        |         |         |        |
+#  |    +---+---------+---------+---+    |  Gateway
+#  |    |            br0            |    |  192.0.2.254/24
+#  |    +-------------+-------------+    |  2001:db8::254/24
+#  |                  |                  |
+#  +-------------------------------------+
+#                     |
+#  +-------------------------------------+
+#  |                  |                  |  Client
+#  |                  +                  |  192.0.2.10/24
+#  |                eth0                 |  2001:db8::10/24
+#  +-------------------------------------+
+
+source bond_topo_2d1c.sh
+mac[2]="00:0a:0b:0c:0d:03"
+
+setup_prepare()
+{
+	gateway_create
+	server_create
+	client_create
+
+	# Add the extra device as we use 3 down links for bond0
+	local i=2
+	ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+	ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}"
+	ip -n ${g_ns} link set s${i} up
+	ip -n ${g_ns} link set s${i} master br0
+	ip -n ${s_ns} link set eth${i} master bond0
+	tc -n ${g_ns} qdisc add dev s${i} clsact
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
new file mode 100644
index 000000000000..991494376223
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -0,0 +1,21 @@
+CONFIG_BONDING=y
+CONFIG_BRIDGE=y
+CONFIG_CONFIGFS_FS=y
+CONFIG_DUMMY=y
+CONFIG_INET_ESP=y
+CONFIG_INET_ESP_OFFLOAD=y
+CONFIG_IPV6=y
+CONFIG_IPVLAN=y
+CONFIG_MACVLAN=y
+CONFIG_NET_ACT_GACT=y
+CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETCONSOLE_EXTENDED_LOG=y
+CONFIG_NETDEVSIM=m
+CONFIG_NET_SCH_INGRESS=y
+CONFIG_NLMON=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh
new file mode 100755
index 000000000000..e6fa24eded5b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bond device handling of addr lists (dev->uc, mc)
+#
+
+ALL_TESTS="
+	bond_cleanup_mode1
+	bond_cleanup_mode4
+	bond_listen_lacpdu_multicast_case_down
+	bond_listen_lacpdu_multicast_case_up
+"
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+source "$lib_dir"/lag_lib.sh
+
+
+destroy()
+{
+	local ifnames=(dummy1 dummy2 bond1 mv0)
+	local ifname
+
+	for ifname in "${ifnames[@]}"; do
+		ip link del "$ifname" &>/dev/null
+	done
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	destroy
+}
+
+
+# bond driver control paths vary between modes that have a primary slave
+# (bond_uses_primary()) and others. Test both kinds of modes.
+
+bond_cleanup_mode1()
+{
+	RET=0
+
+	test_LAG_cleanup "bonding" "active-backup"
+}
+
+bond_cleanup_mode4() {
+	RET=0
+
+	test_LAG_cleanup "bonding" "802.3ad"
+}
+
+bond_listen_lacpdu_multicast()
+{
+	# Initial state of bond device, up | down
+	local init_state=$1
+	local lacpdu_mc="01:80:c2:00:00:02"
+
+	ip link add dummy1 type dummy
+	ip link add bond1 "$init_state" type bond mode 802.3ad
+	ip link set dev dummy1 master bond1
+	if [ "$init_state" = "down" ]; then
+		ip link set dev bond1 up
+	fi
+
+	grep_bridge_fdb "$lacpdu_mc" bridge fdb show brport dummy1 >/dev/null
+	check_err $? "LACPDU multicast address not present on slave (1)"
+
+	ip link set dev bond1 down
+
+	not grep_bridge_fdb "$lacpdu_mc" bridge fdb show brport dummy1 >/dev/null
+	check_err $? "LACPDU multicast address still present on slave"
+
+	ip link set dev bond1 up
+
+	grep_bridge_fdb "$lacpdu_mc" bridge fdb show brport dummy1 >/dev/null
+	check_err $? "LACPDU multicast address not present on slave (2)"
+
+	cleanup
+
+	log_test "bonding LACPDU multicast address to slave (from bond $init_state)"
+}
+
+# The LACPDU mc addr is added by different paths depending on the initial state
+# of the bond when enslaving a device. Test both cases.
+
+bond_listen_lacpdu_multicast_case_down()
+{
+	RET=0
+
+	bond_listen_lacpdu_multicast "down"
+}
+
+bond_listen_lacpdu_multicast_case_up()
+{
+	RET=0
+
+	bond_listen_lacpdu_multicast "up"
+}
+
+
+trap cleanup EXIT
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
new file mode 100644
index 000000000000..bf9bcd1b5ec0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
@@ -0,0 +1,177 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NAMESPACES=""
+
+# Test that a link aggregation device (bonding, team) removes the hardware
+# addresses that it adds on its underlying devices.
+test_LAG_cleanup()
+{
+	local driver=$1
+	local mode=$2
+	local ucaddr="02:00:00:12:34:56"
+	local addr6="fe80::78:9abc/64"
+	local mcaddr="33:33:ff:78:9a:bc"
+	local name
+
+	ip link add dummy1 type dummy
+	ip link add dummy2 type dummy
+	if [ "$driver" = "bonding" ]; then
+		name="bond1"
+		ip link add "$name" up type bond mode "$mode"
+		ip link set dev dummy1 master "$name"
+		ip link set dev dummy2 master "$name"
+	elif [ "$driver" = "team" ]; then
+		name="team0"
+		teamd -d -c '
+			{
+				"device": "'"$name"'",
+				"runner": {
+					"name": "'"$mode"'"
+				},
+				"ports": {
+					"dummy1":
+						{},
+					"dummy2":
+						{}
+				}
+			}
+		'
+		ip link set dev "$name" up
+	else
+		check_err 1
+		log_test test_LAG_cleanup ": unknown driver \"$driver\""
+		return
+	fi
+
+	# Used to test dev->uc handling
+	ip link add mv0 link "$name" up address "$ucaddr" type macvlan
+	# Used to test dev->mc handling
+	ip address add "$addr6" dev "$name"
+
+	# Check that addresses were added as expected
+	(grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy1 ||
+		grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy2) >/dev/null
+	check_err $? "macvlan unicast address not found on a slave"
+
+	# mcaddr is added asynchronously by addrconf_dad_work(), use busywait
+	(busywait 10000 grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy1 ||
+		grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy2) >/dev/null
+	check_err $? "IPv6 solicited-node multicast mac address not found on a slave"
+
+	ip link set dev "$name" down
+	ip link del "$name"
+
+	not grep_bridge_fdb "$ucaddr" bridge fdb show >/dev/null
+	check_err $? "macvlan unicast address still present on a slave"
+
+	not grep_bridge_fdb "$mcaddr" bridge fdb show >/dev/null
+	check_err $? "IPv6 solicited-node multicast mac address still present on a slave"
+
+	cleanup
+
+	log_test "$driver cleanup mode $mode"
+}
+
+# Build a generic 2 node net namespace with 2 connections
+# between the namespaces
+#
+#  +-----------+       +-----------+
+#  | node1     |       | node2     |
+#  |           |       |           |
+#  |           |       |           |
+#  |      eth0 +-------+ eth0      |
+#  |           |       |           |
+#  |      eth1 +-------+ eth1      |
+#  |           |       |           |
+#  +-----------+       +-----------+
+lag_setup2x2()
+{
+	local state=${1:-down}
+	local namespaces="lag_node1 lag_node2"
+
+	# create namespaces
+	for n in ${namespaces}; do
+		ip netns add ${n}
+	done
+
+	# wire up namespaces
+	ip link add name lag1 type veth peer name lag1-end
+	ip link set dev lag1 netns lag_node1 $state name eth0
+	ip link set dev lag1-end netns lag_node2 $state name eth0
+
+	ip link add name lag1 type veth peer name lag1-end
+	ip link set dev lag1 netns lag_node1 $state name eth1
+	ip link set dev lag1-end netns lag_node2 $state name eth1
+
+	NAMESPACES="${namespaces}"
+}
+
+# cleanup all lag related namespaces
+lag_cleanup()
+{
+	for n in ${NAMESPACES}; do
+		ip netns delete ${n} >/dev/null 2>&1 || true
+	done
+}
+
+SWITCH="lag_node1"
+CLIENT="lag_node2"
+CLIENTIP="172.20.2.1"
+SWITCHIP="172.20.2.2"
+
+lag_setup_network()
+{
+	lag_setup2x2 "down"
+
+	# create switch
+	ip netns exec ${SWITCH} ip link add br0 up type bridge
+	ip netns exec ${SWITCH} ip link set eth0 master br0 up
+	ip netns exec ${SWITCH} ip link set eth1 master br0 up
+	ip netns exec ${SWITCH} ip addr add ${SWITCHIP}/24 dev br0
+}
+
+lag_reset_network()
+{
+	ip netns exec ${CLIENT} ip link del bond0
+	ip netns exec ${SWITCH} ip link set eth0 up
+	ip netns exec ${SWITCH} ip link set eth1 up
+}
+
+create_bond()
+{
+	# create client
+	ip netns exec ${CLIENT} ip link set eth0 down
+	ip netns exec ${CLIENT} ip link set eth1 down
+
+	ip netns exec ${CLIENT} ip link add bond0 type bond $@
+	ip netns exec ${CLIENT} ip link set eth0 master bond0
+	ip netns exec ${CLIENT} ip link set eth1 master bond0
+	ip netns exec ${CLIENT} ip link set bond0 up
+	ip netns exec ${CLIENT} ip addr add ${CLIENTIP}/24 dev bond0
+}
+
+test_bond_recovery()
+{
+	RET=0
+
+	create_bond $@
+
+	# verify connectivity
+	slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null
+	check_err $? "No connectivity"
+
+	# force the links of the bond down
+	ip netns exec ${SWITCH} ip link set eth0 down
+	sleep 2
+	ip netns exec ${SWITCH} ip link set eth0 up
+	ip netns exec ${SWITCH} ip link set eth1 down
+
+	# re-verify connectivity
+	slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null
+
+	local rc=$?
+	check_err $rc "Bond failed to recover"
+	log_test "$1 ($2) bond recovery"
+	lag_reset_network
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
new file mode 100755
index 000000000000..9d26ab4cad0b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+#  When the bond is configured with down/updelay and the link state of
+#  slave members flaps if there are no remaining members up the bond
+#  should immediately select a member to bring up. (from bonding.txt
+#  section 13.1 paragraph 4)
+#
+#  +-------------+       +-----------+
+#  | client      |       | switch    |
+#  |             |       |           |
+#  |    +--------| link1 |-----+     |
+#  |    |        +-------+     |     |
+#  |    |        |       |     |     |
+#  |    |        +-------+     |     |
+#  |    | bond   | link2 | Br0 |     |
+#  +-------------+       +-----------+
+#     172.20.2.1           172.20.2.2
+
+
+REQUIRE_MZ=no
+REQUIRE_JQ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/lag_lib.sh
+
+cleanup()
+{
+	lag_cleanup
+}
+
+trap cleanup 0 1 2
+
+lag_setup_network
+test_bond_recovery mode 1 miimon 100 updelay 0
+test_bond_recovery mode 1 miimon 100 updelay 200
+test_bond_recovery mode 1 miimon 100 updelay 500
+test_bond_recovery mode 1 miimon 100 updelay 1000
+test_bond_recovery mode 1 miimon 100 updelay 2000
+test_bond_recovery mode 1 miimon 100 updelay 5000
+test_bond_recovery mode 1 miimon 100 updelay 10000
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
new file mode 100755
index 000000000000..2d275b3e47dd
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+#  When the bond is configured with down/updelay and the link state of
+#  slave members flaps if there are no remaining members up the bond
+#  should immediately select a member to bring up. (from bonding.txt
+#  section 13.1 paragraph 4)
+#
+#  +-------------+       +-----------+
+#  | client      |       | switch    |
+#  |             |       |           |
+#  |    +--------| link1 |-----+     |
+#  |    |        +-------+     |     |
+#  |    |        |       |     |     |
+#  |    |        +-------+     |     |
+#  |    | bond   | link2 | Br0 |     |
+#  +-------------+       +-----------+
+#     172.20.2.1           172.20.2.2
+
+
+REQUIRE_MZ=no
+REQUIRE_JQ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/lag_lib.sh
+
+cleanup()
+{
+	lag_cleanup
+}
+
+trap cleanup 0 1 2
+
+lag_setup_network
+test_bond_recovery mode 2 miimon 100 updelay 0
+test_bond_recovery mode 2 miimon 100 updelay 200
+test_bond_recovery mode 2 miimon 100 updelay 500
+test_bond_recovery mode 2 miimon 100 updelay 1000
+test_bond_recovery mode 2 miimon 100 updelay 2000
+test_bond_recovery mode 2 miimon 100 updelay 5000
+test_bond_recovery mode 2 miimon 100 updelay 10000
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh
new file mode 100755
index 000000000000..477cc9379500
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh
@@ -0,0 +1,361 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This selftest exercises trying to have multiple netpoll users at the same
+# time.
+#
+# This selftest has multiple smalls test inside, and the goal is to
+# get interfaces with bonding and netconsole in different orders in order
+# to catch any possible issue.
+#
+# The main test composes of four interfaces being created using netdevsim; two
+# of them are bonded to serve as the netconsole's transmit interface. The
+# remaining two interfaces are similarly bonded and assigned to a separate
+# network namespace, which acts as the receive interface, where socat monitors
+# for incoming messages.
+#
+# A netconsole message is then sent to ensure it is properly received across
+# this configuration.
+#
+# Later, run a few other tests, to make sure that bonding and netconsole
+# cannot coexist.
+#
+# The test's objective is to exercise netpoll usage when managed simultaneously
+# by multiple subsystems (netconsole and bonding).
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+modprobe bonding 2> /dev/null || true
+modprobe veth 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup_bond EXIT
+
+FORMAT="extended"
+IP_VERSION="ipv4"
+VETH0="veth"$(( RANDOM % 256))
+VETH1="veth"$((256 +  RANDOM % 256))
+TXNS=""
+RXNS=""
+
+# Create "bond_tx_XX" and "bond_rx_XX" interfaces, and set DSTIF and SRCIF with
+# the bonding interfaces
+function setup_bonding_ifaces() {
+	local RAND=$(( RANDOM % 100 ))
+	BOND_TX_MAIN_IF="bond_tx_$RAND"
+	BOND_RX_MAIN_IF="bond_rx_$RAND"
+
+	# Setup TX
+	if ! ip -n "${TXNS}" link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr
+	then
+		echo "Failed to create bond TX interface. Is CONFIG_BONDING set?" >&2
+		# only clean nsim ifaces and namespace. Nothing else has been
+		# initialized
+		cleanup_bond_nsim
+		trap - EXIT
+		exit "${ksft_skip}"
+	fi
+
+	# create_netdevsim() got the interface up, but it needs to be down
+	# before being enslaved.
+	ip -n "${TXNS}" \
+		link set "${BOND_TX1_SLAVE_IF}" down
+	ip -n "${TXNS}" \
+		link set "${BOND_TX2_SLAVE_IF}" down
+	ip -n "${TXNS}" \
+		link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+	ip -n "${TXNS}" \
+		link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+	ip -n "${TXNS}" \
+		link set "${BOND_TX_MAIN_IF}" up
+
+	# Setup RX
+	ip -n "${RXNS}" \
+		link add "${BOND_RX_MAIN_IF}" type bond mode balance-rr
+	ip -n "${RXNS}" \
+		link set "${BOND_RX1_SLAVE_IF}" down
+	ip -n "${RXNS}" \
+		link set "${BOND_RX2_SLAVE_IF}" down
+	ip -n "${RXNS}" \
+		link set "${BOND_RX1_SLAVE_IF}" master "${BOND_RX_MAIN_IF}"
+	ip -n "${RXNS}" \
+		link set "${BOND_RX2_SLAVE_IF}" master "${BOND_RX_MAIN_IF}"
+	ip -n "${RXNS}" \
+		link set "${BOND_RX_MAIN_IF}" up
+
+	export DSTIF="${BOND_RX_MAIN_IF}"
+	export SRCIF="${BOND_TX_MAIN_IF}"
+}
+
+# Create 4 netdevsim interfaces. Two of them will be bound to TX bonding iface
+# and the other two will be bond to the RX interface (on the other namespace)
+function create_ifaces_bond() {
+	BOND_TX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_1}" "${TXNS}")
+	BOND_TX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_2}" "${TXNS}")
+	BOND_RX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_1}" "${RXNS}")
+	BOND_RX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_2}" "${RXNS}")
+}
+
+# netdevsim link BOND_TX to BOND_RX interfaces
+function link_ifaces_bond() {
+	local BOND_TX1_SLAVE_IFIDX
+	local BOND_TX2_SLAVE_IFIDX
+	local BOND_RX1_SLAVE_IFIDX
+	local BOND_RX2_SLAVE_IFIDX
+	local TXNS_FD
+	local RXNS_FD
+
+	BOND_TX1_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \
+				cat /sys/class/net/"$BOND_TX1_SLAVE_IF"/ifindex)
+	BOND_TX2_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \
+				cat /sys/class/net/"$BOND_TX2_SLAVE_IF"/ifindex)
+	BOND_RX1_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \
+				cat /sys/class/net/"$BOND_RX1_SLAVE_IF"/ifindex)
+	BOND_RX2_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \
+				cat /sys/class/net/"$BOND_RX2_SLAVE_IF"/ifindex)
+
+	exec {TXNS_FD}</var/run/netns/"${TXNS}"
+	exec {RXNS_FD}</var/run/netns/"${RXNS}"
+
+	# Linking TX ifaces to the RX ones (on the other namespace)
+	echo "${TXNS_FD}:$BOND_TX1_SLAVE_IFIDX $RXNS_FD:$BOND_RX1_SLAVE_IFIDX"  \
+		> "$NSIM_DEV_SYS_LINK"
+	echo "${TXNS_FD}:$BOND_TX2_SLAVE_IFIDX $RXNS_FD:$BOND_RX2_SLAVE_IFIDX"  \
+		> "$NSIM_DEV_SYS_LINK"
+
+	exec {TXNS_FD}<&-
+	exec {RXNS_FD}<&-
+}
+
+function create_all_ifaces() {
+	# setup_ns function is coming from lib.sh
+	setup_ns TXNS RXNS
+	export NAMESPACE="${RXNS}"
+
+	# Create two interfaces for RX and two for TX
+	create_ifaces_bond
+	# Link netlink ifaces
+	link_ifaces_bond
+}
+
+# configure DSTIF and SRCIF IPs
+function configure_ifaces_ips() {
+	local IP_VERSION=${1:-"ipv4"}
+	select_ipv4_or_ipv6 "${IP_VERSION}"
+
+	ip -n "${RXNS}" addr add "${DSTIP}"/24 dev "${DSTIF}"
+	ip -n "${RXNS}" link set "${DSTIF}" up
+
+	ip -n "${TXNS}" addr add "${SRCIP}"/24 dev "${SRCIF}"
+	ip -n "${TXNS}" link set "${SRCIF}" up
+}
+
+function test_enable_netpoll_on_enslaved_iface() {
+	echo 0 > "${NETCONS_PATH}"/enabled
+
+	# At this stage, BOND_TX1_SLAVE_IF is enslaved to BOND_TX_MAIN_IF, and
+	# linked to BOND_RX1_SLAVE_IF inside the namespace.
+	echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name
+
+	# This should fail with the following message in dmesg:
+	# netpoll: netconsole: ethX is a slave device, aborting
+	set +e
+	enable_netcons_ns 2> /dev/null
+	set -e
+
+	if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]]
+	then
+		echo "test failed: Bonding and netpoll cannot co-exists." >&2
+		exit "${ksft_fail}"
+	fi
+}
+
+function test_delete_bond_and_reenable_target() {
+	ip -n "${TXNS}" \
+		link delete "${BOND_TX_MAIN_IF}" type bond
+
+	# BOND_TX1_SLAVE_IF is not attached to a bond interface anymore
+	# netpoll can be plugged in there
+	echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name
+
+	# this should work, since the interface is not enslaved
+	enable_netcons_ns
+
+	if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+	then
+		echo "test failed: Unable to start netpoll on an unbond iface." >&2
+		exit "${ksft_fail}"
+	fi
+}
+
+# Send a netconsole message to the netconsole target
+function test_send_netcons_msg_through_bond_iface() {
+	# Listen for netconsole port inside the namespace and
+	# destination interface
+	listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" &
+	# Wait for socat to start and listen to the port.
+	wait_for_port "${RXNS}" "${PORT}" "${IP_VERSION}"
+	# Send the message
+	echo "${MSG}: ${TARGET}" > /dev/kmsg
+	# Wait until socat saves the file to disk
+	busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+	# Make sure the message was received in the dst part
+	# and exit
+	validate_result "${OUTPUT_FILE}" "${FORMAT}"
+	# kill socat in case it is still running
+	pkill_socat
+}
+
+# BOND_TX1_SLAVE_IF has netconsole enabled on it, bind it to BOND_TX_MAIN_IF.
+# Given BOND_TX_MAIN_IF was deleted, recreate it first
+function test_enslave_netcons_enabled_iface {
+	# netconsole got disabled while the interface was down
+	if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+	then
+		echo "test failed: netconsole expected to be enabled against BOND_TX1_SLAVE_IF" >&2
+		exit "${ksft_fail}"
+	fi
+
+	# recreate the bonding iface. it got deleted by previous
+	# test (test_delete_bond_and_reenable_target)
+	ip -n "${TXNS}" \
+		link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr
+
+	# sub-interface need to be down before attaching to bonding
+	# This will also disable netconsole.
+	ip -n "${TXNS}" \
+		link set "${BOND_TX1_SLAVE_IF}" down
+	ip -n "${TXNS}" \
+		link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+	ip -n "${TXNS}" \
+		link set "${BOND_TX_MAIN_IF}" up
+
+	# netconsole got disabled while the interface was down
+	if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]]
+	then
+		echo "test failed: Device is part of a bond iface, cannot have netcons enabled" >&2
+		exit "${ksft_fail}"
+	fi
+}
+
+# Get netconsole enabled on a bonding interface and attach a second
+# sub-interface.
+function test_enslave_iface_to_bond {
+	# BOND_TX_MAIN_IF has only BOND_TX1_SLAVE_IF right now
+	echo "${BOND_TX_MAIN_IF}" > "${NETCONS_PATH}"/dev_name
+	enable_netcons_ns
+
+	# netcons is attached to bond0 and BOND_TX1_SLAVE_IF is
+	# part of BOND_TX_MAIN_IF. Attach BOND_TX2_SLAVE_IF to BOND_TX_MAIN_IF.
+	ip -n "${TXNS}" \
+		link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+	if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+	then
+		echo "test failed: Netconsole should be enabled on bonding interface. Failed" >&2
+		exit "${ksft_fail}"
+	fi
+}
+
+function test_enslave_iff_disabled_netpoll_iface {
+	local ret
+
+	# Create two interfaces. veth interfaces it known to have
+	# IFF_DISABLE_NETPOLL set
+	if ! ip link add "${VETH0}" type veth peer name "${VETH1}"
+	then
+		echo "Failed to create veth TX interface. Is CONFIG_VETH set?" >&2
+		exit "${ksft_skip}"
+	fi
+	set +e
+	# This will print RTNETLINK answers: Device or resource busy
+	ip link set "${VETH0}" master "${BOND_TX_MAIN_IF}" 2> /dev/null
+	ret=$?
+	set -e
+	if [[ $ret -eq 0 ]]
+	then
+		echo "test failed: veth interface could not be enslaved"
+		exit "${ksft_fail}"
+	fi
+}
+
+# Given that netconsole picks the current net namespace, we need to enable it
+# from inside the TXNS namespace
+function enable_netcons_ns() {
+	ip netns exec "${TXNS}" sh -c \
+		"mount -t configfs configfs /sys/kernel/config && echo 1 > $NETCONS_PATH/enabled"
+}
+
+####################
+# Tests start here #
+####################
+
+# Create regular interfaces using netdevsim and link them
+create_all_ifaces
+
+# Setup the bonding interfaces
+# BOND_RX_MAIN_IF has BOND_RX{1,2}_SLAVE_IF
+# BOND_TX_MAIN_IF has BOND_TX{1,2}_SLAVE_IF
+setup_bonding_ifaces
+
+# Configure the ips as BOND_RX1_SLAVE_IF and BOND_TX1_SLAVE_IF
+configure_ifaces_ips "${IP_VERSION}"
+
+_create_dynamic_target "${FORMAT}" "${NETCONS_PATH}"
+enable_netcons_ns
+set_user_data
+
+# Test #1 : Create an bonding interface and attach netpoll into
+# the bonding interface. Netconsole/netpoll should work on
+# the bonding interface.
+test_send_netcons_msg_through_bond_iface
+echo "test #1: netpoll on bonding interface worked. Test passed" >&2
+
+# Test #2: Attach netpoll to an enslaved interface
+# Try to attach netpoll to an enslaved sub-interface (while still being part of
+# a bonding interface), which shouldn't be allowed
+test_enable_netpoll_on_enslaved_iface
+echo "test #2: netpoll correctly rejected enslaved interface (expected behavior). Test passed." >&2
+
+# Test #3: Unplug the sub-interface from bond and enable netconsole
+# Detach the interface from a bonding interface and attach netpoll again
+test_delete_bond_and_reenable_target
+echo "test #3: Able to attach to an unbound interface. Test passed." >&2
+
+# Test #4: Enslave a sub-interface that had netconsole enabled
+# Try to enslave an interface that has netconsole/netpoll enabled.
+# Previous test has netconsole enabled in BOND_TX1_SLAVE_IF, try to enslave it
+test_enslave_netcons_enabled_iface
+echo "test #4: Enslaving an interface with netpoll attached. Test passed." >&2
+
+# Test #5: Enslave a sub-interface to a bonding interface
+# Enslave an interface to a bond interface that has netpoll attached
+# At this stage, BOND_TX_MAIN_IF is created and BOND_TX1_SLAVE_IF is part of
+# it. Netconsole is currently disabled
+test_enslave_iface_to_bond
+echo "test #5: Enslaving an interface to bond+netpoll. Test passed." >&2
+
+# Test #6: Enslave a IFF_DISABLE_NETPOLL sub-interface to a bonding interface
+# At this stage, BOND_TX_MAIN_IF has both sub interface and netconsole is
+# enabled. This test will try to enslave an a veth (IFF_DISABLE_NETPOLL) interface
+# and it should fail, with netpoll: veth0 doesn't support polling
+test_enslave_iff_disabled_netpoll_iface
+echo "test #6: Enslaving IFF_DISABLE_NETPOLL ifaces to bond iface is not supported. Test passed." >&2
+
+cleanup_bond
+trap - EXIT
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings
new file mode 100644
index 000000000000..79b65bdf05db
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/settings
@@ -0,0 +1 @@
+timeout=1200
diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config
new file mode 100644
index 000000000000..77ccf83d87e0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/config
@@ -0,0 +1,10 @@
+CONFIG_CONFIGFS_FS=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_BTF_MODULES=n
+CONFIG_INET_PSP=y
+CONFIG_IPV6=y
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETCONSOLE_EXTENDED_LOG=y
+CONFIG_NETDEVSIM=m
+CONFIG_XDP_SOCKETS=y
diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile
new file mode 100644
index 000000000000..7994bd0e5c44
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/Makefile
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS := \
+	bridge_locked_port.sh \
+	bridge_mdb.sh \
+	bridge_mld.sh \
+	bridge_vlan_aware.sh \
+	bridge_vlan_mcast.sh \
+	bridge_vlan_unaware.sh \
+	local_termination.sh \
+	no_forwarding.sh \
+	tc_actions.sh \
+	test_bridge_fdb_stress.sh \
+# end of TEST_PROGS
+
+TEST_FILES := \
+	forwarding.config \
+	run_net_forwarding_test.sh \
+# end of TEST_FILES
+
+TEST_INCLUDES := \
+	../../../net/forwarding/bridge_locked_port.sh \
+	../../../net/forwarding/bridge_mdb.sh \
+	../../../net/forwarding/bridge_mld.sh \
+	../../../net/forwarding/bridge_vlan_aware.sh \
+	../../../net/forwarding/bridge_vlan_mcast.sh \
+	../../../net/forwarding/bridge_vlan_unaware.sh \
+	../../../net/forwarding/lib.sh \
+	../../../net/forwarding/local_termination.sh \
+	../../../net/forwarding/no_forwarding.sh \
+	../../../net/forwarding/tc_actions.sh \
+	../../../net/forwarding/tc_common.sh \
+	../../../net/lib.sh \
+# end of TEST_INCLUDES
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/forwarding.config b/tools/testing/selftests/drivers/net/dsa/forwarding.config
new file mode 100644
index 000000000000..7adc1396fae0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/forwarding.config
@@ -0,0 +1,2 @@
+NETIF_CREATE=no
+STABLE_MAC_ADDRS=yes
diff --git a/tools/testing/selftests/drivers/net/dsa/local_termination.sh b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh
new file mode 100755
index 000000000000..4106c0a102ea
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+libdir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+testname=$(basename "${BASH_SOURCE[0]}")
+
+source "$libdir"/forwarding.config
+cd "$libdir"/../../../net/forwarding/ || exit 1
+source "./$testname" "$@"
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
new file mode 100755
index 000000000000..74682151d04d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Bridge FDB entries can be offloaded to DSA switches without holding the
+# rtnl_mutex. Traditionally this mutex has conferred drivers implicit
+# serialization, which means their code paths are not well tested in the
+# presence of concurrency.
+# This test creates a background task that stresses the FDB by adding and
+# deleting an entry many times in a row without the rtnl_mutex held.
+# It then tests the driver resistance to concurrency by calling .ndo_fdb_dump
+# (with rtnl_mutex held) from a foreground task.
+# Since either the FDB dump or the additions/removals can fail, but the
+# additions and removals are performed in deferred as opposed to process
+# context, we cannot simply check for user space error codes.
+
+WAIT_TIME=1
+NUM_NETIFS=1
+REQUIRE_JQ="no"
+REQUIRE_MZ="no"
+NETIF_CREATE="no"
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+cleanup() {
+	echo "Cleaning up"
+	kill $pid && wait $pid &> /dev/null
+	ip link del br0
+	echo "Please check kernel log for errors"
+}
+trap 'cleanup' EXIT
+
+eth=${NETIFS[p1]}
+
+ip link del br0 2>&1 >/dev/null || :
+ip link add br0 type bridge && ip link set $eth master br0
+
+(while :; do
+	bridge fdb add 00:01:02:03:04:05 dev $eth master static
+	bridge fdb del 00:01:02:03:04:05 dev $eth master static
+done) &
+pid=$!
+
+for i in $(seq 1 50); do
+	bridge fdb show > /dev/null
+	sleep 3
+	echo "$((${i} * 2))% complete..."
+done
diff --git a/tools/testing/selftests/drivers/net/gro.c b/tools/testing/selftests/drivers/net/gro.c
new file mode 100644
index 000000000000..e894037d2e3e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/gro.c
@@ -0,0 +1,1369 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ *  Data packets of the same size and same header setup with correct
+ *  sequence numbers coalesce. The one exception being the last data
+ *  packet coalesced: it can be smaller than the rest and coalesced
+ *  as long as it is in the same flow.
+ * 2.ack
+ *  Pure ACK does not coalesce.
+ * 3.flags
+ *  Specific test cases: no packets with PSH, SYN, URG, RST set will
+ *  be coalesced.
+ * 4.tcp
+ *  Packets with incorrect checksum, non-consecutive seqno and
+ *  different TCP header options shouldn't coalesce. Nit: given that
+ *  some extension headers have paddings, such as timestamp, headers
+ *  that are padding differently would not be coalesced.
+ * 5.ip:
+ *  Packets with different (ECN, TTL, TOS) header, ip options or
+ *  ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ *  Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+#include "../../net/lib/ksft.h"
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MIN_EXTHDR_SIZE 8
+#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00"
+#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11"
+
+#define ipv6_optlen(p)  (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+
+static const char *addr6_src = "fdaa::2";
+static const char *addr6_dst = "fdaa::1";
+static const char *addr4_src = "192.168.1.200";
+static const char *addr4_dst = "192.168.1.100";
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+static bool ipip;
+static const int num_flush_id_cases = 6;
+
+static void vlog(const char *fmt, ...)
+{
+	va_list args;
+
+	if (verbose) {
+		va_start(args, fmt);
+		vfprintf(stderr, fmt, args);
+		va_end(args);
+	}
+}
+
+static void setup_sock_filter(int fd)
+{
+	const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+	const int ethproto_off = offsetof(struct ethhdr, h_proto);
+	int optlen = 0;
+	int ipproto_off, opt_ipproto_off;
+	int next_off;
+
+	if (ipip)
+		next_off = sizeof(struct iphdr) + offsetof(struct iphdr, protocol);
+	else if (proto == PF_INET)
+		next_off = offsetof(struct iphdr, protocol);
+	else
+		next_off = offsetof(struct ipv6hdr, nexthdr);
+	ipproto_off = ETH_HLEN + next_off;
+
+	/* Overridden later if exthdrs are used: */
+	opt_ipproto_off = ipproto_off;
+
+	if (strcmp(testname, "ip") == 0) {
+		if (proto == PF_INET)
+			optlen = sizeof(struct ip_timestamp);
+		else {
+			BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
+			BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
+			BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
+
+			/* same size for HBH and Fragment extension header types */
+			optlen = MIN_EXTHDR_SIZE;
+			opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
+				+ offsetof(struct ip6_ext, ip6e_nxt);
+		}
+	}
+
+	/* this filter validates the following:
+	 *	- packet is IPv4/IPv6 according to the running test.
+	 *	- packet is TCP. Also handles the case of one extension header and then TCP.
+	 *	- checks the packet tcp dport equals to DPORT. Also handles the case of one
+	 *	  extension header and then TCP.
+	 */
+	struct sock_filter filter[] = {
+			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, ethproto_off),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
+			BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, ipproto_off),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
+			BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, opt_ipproto_off),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off + optlen),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+			BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+			BPF_STMT(BPF_RET + BPF_K, 0),
+	};
+
+	struct sock_fprog bpf = {
+		.len = ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+		error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+	uint16_t *words = data;
+	int i;
+
+	for (i = 0; i < len / 2; i++)
+		sum += words[i];
+	if (len & 1)
+		sum += ((char *)data)[len - 1];
+	return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+	sum = checksum_nofold(data, len, sum);
+	while (sum > 0xFFFF)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+	return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+	struct pseudo_header6 {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+		uint16_t protocol;
+		uint16_t payload_len;
+	} ph6;
+	struct pseudo_header4 {
+		struct in_addr saddr;
+		struct in_addr daddr;
+		uint16_t protocol;
+		uint16_t payload_len;
+	} ph4;
+	uint32_t sum = 0;
+
+	if (proto == PF_INET6) {
+		if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1)
+			error(1, errno, "inet_pton6 source ip pseudo");
+		if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1)
+			error(1, errno, "inet_pton6 dest ip pseudo");
+		ph6.protocol = htons(IPPROTO_TCP);
+		ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+		sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+	} else if (proto == PF_INET) {
+		if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1)
+			error(1, errno, "inet_pton source ip pseudo");
+		if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1)
+			error(1, errno, "inet_pton dest ip pseudo");
+		ph4.protocol = htons(IPPROTO_TCP);
+		ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+		sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+	}
+
+	return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+	if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+		   &mac_addr[0], &mac_addr[1], &mac_addr[2],
+		   &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+		error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+	struct ethhdr *eth = buf;
+
+	memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+	memcpy(eth->h_source, src_mac, ETH_ALEN);
+	eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len, int protocol)
+{
+	struct ipv6hdr *ip6h = buf;
+	struct iphdr *iph = buf;
+
+	if (proto == PF_INET6) {
+		memset(ip6h, 0, sizeof(*ip6h));
+
+		ip6h->version = 6;
+		ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+		ip6h->nexthdr = protocol;
+		ip6h->hop_limit = 8;
+		if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1)
+			error(1, errno, "inet_pton source ip6");
+		if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1)
+			error(1, errno, "inet_pton dest ip6");
+	} else if (proto == PF_INET) {
+		memset(iph, 0, sizeof(*iph));
+
+		iph->version = 4;
+		iph->ihl = 5;
+		iph->ttl = 8;
+		iph->protocol	= protocol;
+		iph->tot_len = htons(sizeof(struct tcphdr) +
+				payload_len + sizeof(struct iphdr));
+		iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+		if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1)
+			error(1, errno, "inet_pton source ip");
+		if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1)
+			error(1, errno, "inet_pton dest ip");
+		iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+	}
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+				int payload_len, int fin)
+{
+	struct tcphdr *tcph = buf;
+
+	memset(tcph, 0, sizeof(*tcph));
+
+	tcph->source = htons(SPORT);
+	tcph->dest = htons(DPORT);
+	tcph->seq = ntohl(START_SEQ + seq_offset);
+	tcph->ack_seq = ntohl(START_ACK + ack_offset);
+	tcph->ack = 1;
+	tcph->fin = fin;
+	tcph->doff = 5;
+	tcph->window = htons(TCP_MAXWIN);
+	tcph->urg_ptr = 0;
+	tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+	int ret = -1;
+
+	ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+	if (ret == -1)
+		error(1, errno, "sendto failure");
+	if (ret != len)
+		error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+			  int payload_len, int fin)
+{
+	memset(buf, 0, total_hdr_len);
+	memset(buf + total_hdr_len, 'a', payload_len);
+
+	fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+			    payload_len, fin);
+
+	if (ipip) {
+		fill_networklayer(buf + ETH_HLEN, payload_len + sizeof(struct iphdr),
+				  IPPROTO_IPIP);
+		fill_networklayer(buf + ETH_HLEN + sizeof(struct iphdr),
+				  payload_len, IPPROTO_TCP);
+	} else {
+		fill_networklayer(buf + ETH_HLEN, payload_len, IPPROTO_TCP);
+	}
+
+	fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+		       int rst, int urg)
+{
+	static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	int payload_len, pkt_size, flag, i;
+	struct tcphdr *tcph;
+
+	payload_len = PAYLOAD_LEN * psh;
+	pkt_size = total_hdr_len + payload_len;
+	flag = NUM_PACKETS / 2;
+
+	create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+	tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+	tcph->psh = psh;
+	tcph->syn = syn;
+	tcph->rst = rst;
+	tcph->urg = urg;
+	tcph->check = 0;
+	tcph->check = tcp_checksum(tcph, payload_len);
+
+	for (i = 0; i < NUM_PACKETS + 1; i++) {
+		if (i == flag) {
+			write_packet(fd, flag_buf, pkt_size, daddr);
+			continue;
+		}
+		create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+		write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+	}
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+			   int payload_len1, int payload_len2)
+{
+	static char buf[ETH_HLEN + IP_MAXPACKET];
+
+	create_packet(buf, 0, 0, payload_len1, 0);
+	write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+	create_packet(buf, payload_len1, 0, payload_len2, 0);
+	write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+	static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+	static char last[TOTAL_HDR_LEN + MSS];
+	static char new_seg[TOTAL_HDR_LEN + MSS];
+	int i;
+
+	for (i = 0; i < NUM_LARGE_PKT; i++)
+		create_packet(pkts[i], i * MSS, 0, MSS, 0);
+	create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+	create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+	for (i = 0; i < NUM_LARGE_PKT; i++)
+		write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+	write_packet(fd, last, total_hdr_len + remainder, daddr);
+	write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN];
+
+	create_packet(buf, 0, 0, 0, 0);
+	write_packet(fd, buf, total_hdr_len, daddr);
+	write_packet(fd, buf, total_hdr_len, daddr);
+	create_packet(buf, 0, 1, 0, 0);
+	write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+	struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+	memmove(buf, no_ext, total_hdr_len);
+	memmove(buf + total_hdr_len + extlen,
+		no_ext + total_hdr_len, PAYLOAD_LEN);
+
+	tcphdr->doff = tcphdr->doff + (extlen / 4);
+	tcphdr->check = 0;
+	tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+	if (proto == PF_INET) {
+		iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+		iph->check = 0;
+		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+
+		if (ipip) {
+			iph += 1;
+			iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+			iph->check = 0;
+			iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+		}
+	} else {
+		ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+	}
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+	struct tcp_option_ts {
+		uint8_t kind;
+		uint8_t len;
+		uint32_t tsval;
+		uint32_t tsecr;
+	} *opt_ts = (void *)buf;
+	struct tcp_option_window {
+		uint8_t kind;
+		uint8_t len;
+		uint8_t shift;
+	} *opt_window = (void *)buf;
+
+	switch (kind) {
+	case TCPOPT_NOP:
+		buf[0] = TCPOPT_NOP;
+		break;
+	case TCPOPT_WINDOW:
+		memset(opt_window, 0, sizeof(struct tcp_option_window));
+		opt_window->kind = TCPOPT_WINDOW;
+		opt_window->len = TCPOLEN_WINDOW;
+		opt_window->shift = 0;
+		break;
+	case TCPOPT_TIMESTAMP:
+		memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+		opt_ts->kind = TCPOPT_TIMESTAMP;
+		opt_ts->len = TCPOLEN_TIMESTAMP;
+		opt_ts->tsval = ts;
+		opt_ts->tsecr = 0;
+		break;
+	default:
+		error(1, 0, "unimplemented TCP option");
+		break;
+	}
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+	switch (order) {
+	case 0:
+		tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+		tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+		tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+				  TCPOPT_TIMESTAMP, ts);
+		break;
+	case 1:
+		tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+		tcp_write_options(buf + total_hdr_len + 1,
+				  TCPOPT_TIMESTAMP, ts);
+		tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+				  TCPOPT_NOP, 0);
+		break;
+	case 2:
+		tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+		tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+				  TCPOPT_NOP, 0);
+		tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+				  TCPOPT_NOP, 0);
+		break;
+	default:
+		error(1, 0, "unknown order");
+		break;
+	}
+	recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	tcph->check = tcph->check - 1;
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	tcph->seq = ntohl(htonl(tcph->seq) + 1);
+	tcph->check = 0;
+	tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+  * don't coalesce.
+  */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+	int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 0, 0);
+	write_packet(fd, extpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 0, 0);
+	write_packet(fd, extpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 100, 0);
+	write_packet(fd, extpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 100, 1);
+	write_packet(fd, extpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt, buf, 100, 2);
+	write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+	static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+	int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+	int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt1, buf, 0, 0);
+	write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	add_standard_tcp_options(extpkt1, buf, 0, 0);
+	write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+	tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+	recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+	write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+	struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+	int optlen = sizeof(struct ip_timestamp);
+	struct iphdr *iph;
+
+	if (optlen % 4)
+		error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+	ts->ipt_code = IPOPT_TS;
+	ts->ipt_len = optlen;
+	ts->ipt_ptr = 5;
+	ts->ipt_flg = IPOPT_TS_TSONLY;
+
+	memcpy(optpkt, buf, tcp_offset);
+	memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+	       sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+	iph = (struct iphdr *)(optpkt + ETH_HLEN);
+	iph->ihl = 5 + (optlen / 4);
+	iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+	iph->check = 0;
+	iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload)
+{
+	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
+	struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
+	char *exthdr_payload_start = (char *)(exthdr + 1);
+
+	exthdr->hdrlen = 0;
+	exthdr->nexthdr = IPPROTO_TCP;
+
+	memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr));
+
+	memcpy(optpkt, buf, tcp_offset);
+	memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
+		sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+	iph->nexthdr = exthdr_type;
+	iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
+}
+
+static void fix_ip4_checksum(struct iphdr *iph)
+{
+	iph->check = 0;
+	iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+}
+
+static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
+{
+	static char buf1[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char buf2[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char buf3[MAX_HDR_LEN + PAYLOAD_LEN];
+	bool send_three = false;
+	struct iphdr *iph1;
+	struct iphdr *iph2;
+	struct iphdr *iph3;
+
+	iph1 = (struct iphdr *)(buf1 + ETH_HLEN);
+	iph2 = (struct iphdr *)(buf2 + ETH_HLEN);
+	iph3 = (struct iphdr *)(buf3 + ETH_HLEN);
+
+	create_packet(buf1, 0, 0, PAYLOAD_LEN, 0);
+	create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+
+	switch (tcase) {
+	case 0: /* DF=1, Incrementing - should coalesce */
+		iph1->frag_off |= htons(IP_DF);
+		iph1->id = htons(8);
+
+		iph2->frag_off |= htons(IP_DF);
+		iph2->id = htons(9);
+		break;
+
+	case 1: /* DF=1, Fixed - should coalesce */
+		iph1->frag_off |= htons(IP_DF);
+		iph1->id = htons(8);
+
+		iph2->frag_off |= htons(IP_DF);
+		iph2->id = htons(8);
+		break;
+
+	case 2: /* DF=0, Incrementing - should coalesce */
+		iph1->frag_off &= ~htons(IP_DF);
+		iph1->id = htons(8);
+
+		iph2->frag_off &= ~htons(IP_DF);
+		iph2->id = htons(9);
+		break;
+
+	case 3: /* DF=0, Fixed - should coalesce */
+		iph1->frag_off &= ~htons(IP_DF);
+		iph1->id = htons(8);
+
+		iph2->frag_off &= ~htons(IP_DF);
+		iph2->id = htons(8);
+		break;
+
+	case 4: /* DF=1, two packets incrementing, and one fixed - should
+		 * coalesce only the first two packets
+		 */
+		iph1->frag_off |= htons(IP_DF);
+		iph1->id = htons(8);
+
+		iph2->frag_off |= htons(IP_DF);
+		iph2->id = htons(9);
+
+		iph3->frag_off |= htons(IP_DF);
+		iph3->id = htons(9);
+		send_three = true;
+		break;
+
+	case 5: /* DF=1, two packets fixed, and one incrementing - should
+		 * coalesce only the first two packets
+		 */
+		iph1->frag_off |= htons(IP_DF);
+		iph1->id = htons(8);
+
+		iph2->frag_off |= htons(IP_DF);
+		iph2->id = htons(8);
+
+		iph3->frag_off |= htons(IP_DF);
+		iph3->id = htons(9);
+		send_three = true;
+		break;
+	}
+
+	fix_ip4_checksum(iph1);
+	fix_ip4_checksum(iph2);
+	write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr);
+	write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr);
+
+	if (send_three) {
+		fix_ip4_checksum(iph3);
+		write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr);
+	}
+}
+
+static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt)
+{
+	for (int i = 0; i < num_flush_id_cases; i++) {
+		sleep(1);
+		send_flush_id_case(fd, daddr, i);
+		sleep(1);
+		write_packet(fd, fin_pkt, total_hdr_len, daddr);
+	}
+}
+
+static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1);
+	write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2);
+	write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+	int optlen = sizeof(struct ip_timestamp);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+	add_ipv4_ts_option(buf, optpkt);
+	write_packet(fd, optpkt, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/*  IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[IP_MAXPACKET];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	/* Once fragmented, packet would retain the total_len.
+	 * Tcp header is prepared as if rest of data is in follow-up frags,
+	 * but follow up frags aren't actually sent.
+	 */
+	memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+	fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+	fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN, IPPROTO_TCP);
+	fill_datalinklayer(buf);
+
+	iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+	iph->check = 0;
+	iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	iph->ttl = 7;
+	iph->check = 0;
+	iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	if (proto == PF_INET) {
+		iph->tos = 1;
+		iph->check = 0;
+		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	} else if (proto == PF_INET6) {
+		ip6h->priority = 0xf;
+	}
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	if (proto == PF_INET) {
+		buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+		iph->check = 0;
+		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+	} else {
+		buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+	}
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+			   sizeof(struct ip6_frag)];
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+	struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+	int extlen = sizeof(struct ip6_frag);
+	int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+	int extpkt_len = bufpkt_len + extlen;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+		write_packet(fd, buf, bufpkt_len, daddr);
+	}
+	sleep(1);
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	memset(extpkt, 0, extpkt_len);
+
+	ip6h->nexthdr = IPPROTO_FRAGMENT;
+	ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+	frag->ip6f_nxt = IPPROTO_TCP;
+
+	memcpy(extpkt, buf, tcp_offset);
+	memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+	       sizeof(struct tcphdr) + PAYLOAD_LEN);
+	write_packet(fd, extpkt, extpkt_len, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+	struct sockaddr_ll daddr = {};
+
+	daddr.sll_family = AF_PACKET;
+	daddr.sll_protocol = ethhdr_proto;
+	daddr.sll_ifindex = if_nametoindex(ifname);
+	if (daddr.sll_ifindex == 0)
+		error(1, errno, "if_nametoindex");
+
+	if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+		error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+	struct timeval timeout;
+
+	timeout.tv_sec = 3;
+	timeout.tv_usec = 0;
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+		       sizeof(timeout)) < 0)
+		error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+			    int correct_num_pkts)
+{
+	static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+	struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+	struct tcphdr *tcph;
+	bool bad_packet = false;
+	int tcp_ext_len = 0;
+	int ip_ext_len = 0;
+	int pkt_size = -1;
+	int data_len = 0;
+	int num_pkt = 0;
+	int i;
+
+	vlog("Expected {");
+	for (i = 0; i < correct_num_pkts; i++)
+		vlog("%d ", correct_payload[i]);
+	vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+	while (1) {
+		ip_ext_len = 0;
+		pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+		if (pkt_size < 0)
+			error(1, errno, "could not receive");
+
+		if (iph->version == 4)
+			ip_ext_len = (iph->ihl - 5) * 4;
+		else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+			ip_ext_len = MIN_EXTHDR_SIZE;
+
+		tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+		if (tcph->fin)
+			break;
+
+		tcp_ext_len = (tcph->doff - 5) * 4;
+		data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+		/* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+		 * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+		 * Packet sockets are protocol agnostic, and will not trim the padding.
+		 */
+		if (pkt_size == ETH_ZLEN && iph->version == 4) {
+			data_len = ntohs(iph->tot_len)
+				- sizeof(struct tcphdr) - sizeof(struct iphdr);
+		}
+		vlog("%d ", data_len);
+		if (data_len != correct_payload[num_pkt]) {
+			vlog("[!=%d]", correct_payload[num_pkt]);
+			bad_packet = true;
+		}
+		num_pkt++;
+	}
+	vlog("}, Total %d packets.\n", num_pkt);
+	if (num_pkt != correct_num_pkts)
+		error(1, 0, "incorrect number of packets");
+	if (bad_packet)
+		error(1, 0, "incorrect packet geometry");
+
+	printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+	const int fin_delay_us = 100 * 1000;
+	static char fin_pkt[MAX_HDR_LEN];
+	struct sockaddr_ll daddr = {};
+	int txfd = -1;
+
+	txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+	if (txfd < 0)
+		error(1, errno, "socket creation");
+
+	memset(&daddr, 0, sizeof(daddr));
+	daddr.sll_ifindex = if_nametoindex(ifname);
+	if (daddr.sll_ifindex == 0)
+		error(1, errno, "if_nametoindex");
+	daddr.sll_family = AF_PACKET;
+	memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+	daddr.sll_halen = ETH_ALEN;
+	create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+	if (strcmp(testname, "data") == 0) {
+		send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ack") == 0) {
+		send_ack(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "flags") == 0) {
+		send_flags(txfd, &daddr, 1, 0, 0, 0);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_flags(txfd, &daddr, 0, 1, 0, 0);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_flags(txfd, &daddr, 0, 0, 1, 0);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_flags(txfd, &daddr, 0, 0, 0, 1);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "tcp") == 0) {
+		send_changed_checksum(txfd, &daddr);
+		/* Adding sleep before sending FIN so that it is not
+		 * received prior to other packets.
+		 */
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_changed_seq(txfd, &daddr);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_changed_ts(txfd, &daddr);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_diff_opt(txfd, &daddr);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip") == 0) {
+		send_changed_ECN(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_changed_tos(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+		if (proto == PF_INET) {
+			/* Modified packets may be received out of order.
+			 * Sleep function added to enforce test boundaries
+			 * so that fin pkts are not received prior to other pkts.
+			 */
+			sleep(1);
+			send_changed_ttl(txfd, &daddr);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			sleep(1);
+			send_ip_options(txfd, &daddr);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			sleep(1);
+			send_fragment4(txfd, &daddr);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			test_flush_id(txfd, &daddr, fin_pkt);
+		} else if (proto == PF_INET6) {
+			sleep(1);
+			send_fragment6(txfd, &daddr);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			sleep(1);
+			/* send IPv6 packets with ext header with same payload */
+			send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			sleep(1);
+			/* send IPv6 packets with ext header with different payload */
+			send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+		}
+	} else if (strcmp(testname, "large") == 0) {
+		/* 20 is the difference between min iphdr size
+		 * and min ipv6hdr size. Like MAX_HDR_SIZE,
+		 * MAX_PAYLOAD is defined with the larger header of the two.
+		 */
+		int offset = (proto == PF_INET && !ipip) ? 20 : 0;
+		int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+		send_large(txfd, &daddr, remainder);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+		send_large(txfd, &daddr, remainder + 1);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else {
+		error(1, 0, "Unknown testcase");
+	}
+
+	if (close(txfd))
+		error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+	static int correct_payload[NUM_PACKETS];
+	int rxfd = -1;
+
+	rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+	if (rxfd < 0)
+		error(1, 0, "socket creation");
+	setup_sock_filter(rxfd);
+	set_timeout(rxfd);
+	bind_packetsocket(rxfd);
+
+	ksft_ready();
+
+	memset(correct_payload, 0, sizeof(correct_payload));
+
+	if (strcmp(testname, "data") == 0) {
+		printf("pure data packet of same size: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 1);
+
+		printf("large data packets followed by a smaller one: ");
+		correct_payload[0] = PAYLOAD_LEN * 1.5;
+		check_recv_pkts(rxfd, correct_payload, 1);
+
+		printf("small data packets followed by a larger one: ");
+		correct_payload[0] = PAYLOAD_LEN / 2;
+		correct_payload[1] = PAYLOAD_LEN;
+		check_recv_pkts(rxfd, correct_payload, 2);
+	} else if (strcmp(testname, "ack") == 0) {
+		printf("duplicate ack and pure ack: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+	} else if (strcmp(testname, "flags") == 0) {
+		correct_payload[0] = PAYLOAD_LEN * 3;
+		correct_payload[1] = PAYLOAD_LEN * 2;
+
+		printf("psh flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		correct_payload[1] = 0;
+		correct_payload[2] = PAYLOAD_LEN * 2;
+		printf("syn flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+
+		printf("rst flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+
+		printf("urg flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+	} else if (strcmp(testname, "tcp") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
+		correct_payload[2] = PAYLOAD_LEN;
+		correct_payload[3] = PAYLOAD_LEN;
+
+		printf("changed checksum does not coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		printf("Wrong Seq number doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		printf("Different timestamp doesn't coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 4);
+
+		printf("Different options doesn't coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 2);
+	} else if (strcmp(testname, "ip") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
+
+		printf("different ECN doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		printf("different tos doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		if (proto == PF_INET) {
+			printf("different ttl doesn't coalesce: ");
+			check_recv_pkts(rxfd, correct_payload, 2);
+
+			printf("ip options doesn't coalesce: ");
+			correct_payload[2] = PAYLOAD_LEN;
+			check_recv_pkts(rxfd, correct_payload, 3);
+
+			printf("fragmented ip4 doesn't coalesce: ");
+			check_recv_pkts(rxfd, correct_payload, 2);
+
+			/* is_atomic checks */
+			printf("DF=1, Incrementing - should coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN * 2;
+			check_recv_pkts(rxfd, correct_payload, 1);
+
+			printf("DF=1, Fixed - should coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN * 2;
+			check_recv_pkts(rxfd, correct_payload, 1);
+
+			printf("DF=0, Incrementing - should coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN * 2;
+			check_recv_pkts(rxfd, correct_payload, 1);
+
+			printf("DF=0, Fixed - should coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN * 2;
+			check_recv_pkts(rxfd, correct_payload, 1);
+
+			printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
+			correct_payload[0] = PAYLOAD_LEN * 2;
+			correct_payload[1] = PAYLOAD_LEN;
+			check_recv_pkts(rxfd, correct_payload, 2);
+
+			printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
+			correct_payload[0] = PAYLOAD_LEN * 2;
+			correct_payload[1] = PAYLOAD_LEN;
+			check_recv_pkts(rxfd, correct_payload, 2);
+		} else if (proto == PF_INET6) {
+			/* GRO doesn't check for ipv6 hop limit when flushing.
+			 * Hence no corresponding test to the ipv4 case.
+			 */
+			printf("fragmented ip6 doesn't coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN * 2;
+			correct_payload[1] = PAYLOAD_LEN;
+			correct_payload[2] = PAYLOAD_LEN;
+			check_recv_pkts(rxfd, correct_payload, 3);
+
+			printf("ipv6 with ext header does coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN * 2;
+			check_recv_pkts(rxfd, correct_payload, 1);
+
+			printf("ipv6 with ext header with different payloads doesn't coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN;
+			correct_payload[1] = PAYLOAD_LEN;
+			check_recv_pkts(rxfd, correct_payload, 2);
+		}
+	} else if (strcmp(testname, "large") == 0) {
+		int offset = (proto == PF_INET && !ipip) ? 20 : 0;
+		int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+		correct_payload[0] = (MAX_PAYLOAD + offset);
+		correct_payload[1] = remainder;
+		printf("Shouldn't coalesce if exceed IP max pkt size: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+		/* last segment sent individually, doesn't start new segment */
+		correct_payload[0] = correct_payload[0] - remainder;
+		correct_payload[1] = remainder + 1;
+		correct_payload[2] = remainder + 1;
+		check_recv_pkts(rxfd, correct_payload, 3);
+	} else {
+		error(1, 0, "Test case error, should never trigger");
+	}
+
+	if (close(rxfd))
+		error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+	static const struct option opts[] = {
+		{ "daddr", required_argument, NULL, 'd' },
+		{ "dmac", required_argument, NULL, 'D' },
+		{ "iface", required_argument, NULL, 'i' },
+		{ "ipv4", no_argument, NULL, '4' },
+		{ "ipv6", no_argument, NULL, '6' },
+		{ "ipip", no_argument, NULL, 'e' },
+		{ "rx", no_argument, NULL, 'r' },
+		{ "saddr", required_argument, NULL, 's' },
+		{ "smac", required_argument, NULL, 'S' },
+		{ "test", required_argument, NULL, 't' },
+		{ "verbose", no_argument, NULL, 'v' },
+		{ 0, 0, 0, 0 }
+	};
+	int c;
+
+	while ((c = getopt_long(argc, argv, "46d:D:ei:rs:S:t:v", opts, NULL)) != -1) {
+		switch (c) {
+		case '4':
+			proto = PF_INET;
+			ethhdr_proto = htons(ETH_P_IP);
+			break;
+		case '6':
+			proto = PF_INET6;
+			ethhdr_proto = htons(ETH_P_IPV6);
+			break;
+		case 'e':
+			ipip = true;
+			proto = PF_INET;
+			ethhdr_proto = htons(ETH_P_IP);
+			break;
+		case 'd':
+			addr4_dst = addr6_dst = optarg;
+			break;
+		case 'D':
+			dmac = optarg;
+			break;
+		case 'i':
+			ifname = optarg;
+			break;
+		case 'r':
+			tx_socket = false;
+			break;
+		case 's':
+			addr4_src = addr6_src = optarg;
+			break;
+		case 'S':
+			smac = optarg;
+			break;
+		case 't':
+			testname = optarg;
+			break;
+		case 'v':
+			verbose = true;
+			break;
+		default:
+			error(1, 0, "%s invalid option %c\n", __func__, c);
+			break;
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_args(argc, argv);
+
+	if (ipip) {
+		tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2;
+		total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+	} else if (proto == PF_INET) {
+		tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+		total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+	} else if (proto == PF_INET6) {
+		tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+		total_hdr_len = MAX_HDR_LEN;
+	} else {
+		error(1, 0, "Protocol family is not ipv4 or ipv6");
+	}
+
+	read_MAC(src_mac, smac);
+	read_MAC(dst_mac, dmac);
+
+	if (tx_socket) {
+		gro_sender();
+	} else {
+		/* Only the receiver exit status determines test success. */
+		gro_receiver();
+		fprintf(stderr, "Gro::%s test passed.\n", testname);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py
new file mode 100755
index 000000000000..ba83713bf7b5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/gro.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+GRO (Generic Receive Offload) conformance tests.
+
+Validates that GRO coalescing works correctly by running the gro
+binary in different configurations and checking for correct packet
+coalescing behavior.
+
+Test cases:
+  - data: Data packets with same size/headers and correct seq numbers coalesce
+  - ack: Pure ACK packets do not coalesce
+  - flags: Packets with PSH, SYN, URG, RST flags do not coalesce
+  - tcp: Packets with incorrect checksum, non-consecutive seqno don't coalesce
+  - ip: Packets with different ECN, TTL, TOS, or IP options don't coalesce
+  - large: Packets larger than GRO_MAX_SIZE don't coalesce
+"""
+
+import os
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import NetDrvEpEnv, KsftXfailEx
+from lib.py import cmd, defer, bkg, ip
+from lib.py import ksft_variants
+
+
+def _resolve_dmac(cfg, ipver):
+    """
+    Find the destination MAC address remote host should use to send packets
+    towards the local host. It may be a router / gateway address.
+    """
+
+    attr = "dmac" + ipver
+    # Cache the response across test cases
+    if hasattr(cfg, attr):
+        return getattr(cfg, attr)
+
+    route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}",
+               json=True, host=cfg.remote)[0]
+    gw = route.get("gateway")
+    # Local L2 segment, address directly
+    if not gw:
+        setattr(cfg, attr, cfg.dev['address'])
+        return getattr(cfg, attr)
+
+    # ping to make sure neighbor is resolved,
+    # bind to an interface, for v6 the GW is likely link local
+    cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote)
+
+    neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}",
+               json=True, host=cfg.remote)[0]
+    setattr(cfg, attr, neigh['lladdr'])
+    return getattr(cfg, attr)
+
+
+def _write_defer_restore(cfg, path, val, defer_undo=False):
+    with open(path, "r", encoding="utf-8") as fp:
+        orig_val = fp.read().strip()
+        if str(val) == orig_val:
+            return
+    with open(path, "w", encoding="utf-8") as fp:
+        fp.write(val)
+    if defer_undo:
+        defer(_write_defer_restore, cfg, path, orig_val)
+
+
+def _set_mtu_restore(dev, mtu, host):
+    if dev['mtu'] < mtu:
+        ip(f"link set dev {dev['ifname']} mtu {mtu}", host=host)
+        defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host)
+
+
+def _setup(cfg, test_name):
+    """ Setup hardware loopback mode for GRO testing. """
+
+    if not hasattr(cfg, "bin_remote"):
+        cfg.bin_local = cfg.test_dir / "gro"
+        cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+    # "large" test needs at least 4k MTU
+    if test_name == "large":
+        _set_mtu_restore(cfg.dev, 4096, None)
+        _set_mtu_restore(cfg.remote_dev, 4096, cfg.remote)
+
+    flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout"
+    irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs"
+
+    _write_defer_restore(cfg, flush_path, "200000", defer_undo=True)
+    _write_defer_restore(cfg, irq_path, "10", defer_undo=True)
+
+    try:
+        # Disable TSO for local tests
+        cfg.require_nsim()  # will raise KsftXfailEx if not running on nsim
+
+        cmd(f"ethtool -K {cfg.ifname} gro on tso off")
+        cmd(f"ethtool -K {cfg.remote_ifname} gro on tso off", host=cfg.remote)
+    except KsftXfailEx:
+        pass
+
+def _gro_variants():
+    """Generator that yields all combinations of protocol and test types."""
+
+    for protocol in ["ipv4", "ipv6", "ipip"]:
+        for test_name in ["data", "ack", "flags", "tcp", "ip", "large"]:
+            yield protocol, test_name
+
+
+@ksft_variants(_gro_variants())
+def test(cfg, protocol, test_name):
+    """Run a single GRO test with retries."""
+
+    ipver = "6" if protocol[-1] == "6" else "4"
+    cfg.require_ipver(ipver)
+
+    _setup(cfg, test_name)
+
+    base_cmd_args = [
+        f"--{protocol}",
+        f"--dmac {_resolve_dmac(cfg, ipver)}",
+        f"--smac {cfg.remote_dev['address']}",
+        f"--daddr {cfg.addr_v[ipver]}",
+        f"--saddr {cfg.remote_addr_v[ipver]}",
+        f"--test {test_name}",
+        "--verbose"
+    ]
+    base_args = " ".join(base_cmd_args)
+
+    # Each test is run 6 times to deflake, because given the receive timing,
+    # not all packets that should coalesce will be considered in the same flow
+    # on every try.
+    max_retries = 6
+    for attempt in range(max_retries):
+        rx_cmd = f"{cfg.bin_local} {base_args} --rx --iface {cfg.ifname}"
+        tx_cmd = f"{cfg.bin_remote} {base_args} --iface {cfg.remote_ifname}"
+
+        fail_now = attempt >= max_retries - 1
+
+        with bkg(rx_cmd, ksft_ready=True, exit_wait=True,
+                 fail=fail_now) as rx_proc:
+            cmd(tx_cmd, host=cfg.remote)
+
+        if rx_proc.ret == 0:
+            return
+
+        ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
+        ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+
+        if test_name == "large" and os.environ.get("KSFT_MACHINE_SLOW"):
+            ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment")
+            return
+
+        ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...")
+
+
+def main() -> None:
+    """ Ksft boiler plate main """
+
+    with NetDrvEpEnv(__file__) as cfg:
+        ksft_run(cases=[test], args=(cfg,))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py
new file mode 100755
index 000000000000..c4fe049e9baa
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hds.py
@@ -0,0 +1,329 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import errno
+import os
+import random
+from typing import Union
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx
+from lib.py import CmdExitFailure, EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+from lib.py import defer, ethtool, ip
+
+
+def _get_hds_mode(cfg, netnl) -> str:
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'tcp-data-split' not in rings:
+        raise KsftSkipEx('tcp-data-split not supported by device')
+    return rings['tcp-data-split']
+
+
+def _xdp_onoff(cfg):
+    prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+    ip("link set dev %s xdp obj %s sec xdp" %
+       (cfg.ifname, prog))
+    ip("link set dev %s xdp off" % cfg.ifname)
+
+
+def _ioctl_ringparam_modify(cfg, netnl) -> None:
+    """
+    Helper for performing a hopefully unimportant IOCTL SET.
+    IOCTL does not support HDS, so it should not affect the HDS config.
+    """
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+
+    if 'tx' not in rings:
+        raise KsftSkipEx('setting Tx ring size not supported')
+
+    try:
+        ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] // 2}")
+    except CmdExitFailure as e:
+        ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] * 2}")
+    defer(ethtool, f"-G {cfg.ifname} tx {rings['tx']}")
+
+
+def get_hds(cfg, netnl) -> None:
+    _get_hds_mode(cfg, netnl)
+
+
+def get_hds_thresh(cfg, netnl) -> None:
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'hds-thresh' not in rings:
+        raise KsftSkipEx('hds-thresh not supported by device')
+
+
+def _hds_reset(cfg, netnl, rings) -> None:
+    cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+
+    arg = {'header': {'dev-index': cfg.ifindex}}
+    if cur.get('tcp-data-split') != rings.get('tcp-data-split'):
+        # Try to reset to "unknown" first, we don't know if the setting
+        # was the default or user chose it. Default seems more likely.
+        arg['tcp-data-split'] = "unknown"
+        netnl.rings_set(arg)
+        cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+        if cur['tcp-data-split'] == rings['tcp-data-split']:
+            del arg['tcp-data-split']
+        else:
+            # Try the explicit setting
+            arg['tcp-data-split'] = rings['tcp-data-split']
+    if cur.get('hds-thresh') != rings.get('hds-thresh'):
+        arg['hds-thresh'] = rings['hds-thresh']
+    if len(arg) > 1:
+        netnl.rings_set(arg)
+
+
+def _defer_reset_hds(cfg, netnl) -> Union[dict, None]:
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+        if 'hds-thresh' in rings or 'tcp-data-split' in rings:
+            defer(_hds_reset, cfg, netnl, rings)
+    except NlError as e:
+        pass
+
+
+def set_hds_enable(cfg, netnl) -> None:
+    _defer_reset_hds(cfg, netnl)
+    try:
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'})
+    except NlError as e:
+        if e.error == errno.EINVAL:
+            raise KsftSkipEx("disabling of HDS not supported by the device")
+        elif e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("ring-set not supported by the device")
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'tcp-data-split' not in rings:
+        raise KsftSkipEx('tcp-data-split not supported by device')
+
+    ksft_eq('enabled', rings['tcp-data-split'])
+
+def set_hds_disable(cfg, netnl) -> None:
+    _defer_reset_hds(cfg, netnl)
+    try:
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'})
+    except NlError as e:
+        if e.error == errno.EINVAL:
+            raise KsftSkipEx("disabling of HDS not supported by the device")
+        elif e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("ring-set not supported by the device")
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'tcp-data-split' not in rings:
+        raise KsftSkipEx('tcp-data-split not supported by device')
+
+    ksft_eq('disabled', rings['tcp-data-split'])
+
+def set_hds_thresh_zero(cfg, netnl) -> None:
+    _defer_reset_hds(cfg, netnl)
+    try:
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0})
+    except NlError as e:
+        if e.error == errno.EINVAL:
+            raise KsftSkipEx("hds-thresh-set not supported by the device")
+        elif e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("ring-set not supported by the device")
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'hds-thresh' not in rings:
+        raise KsftSkipEx('hds-thresh not supported by device')
+
+    ksft_eq(0, rings['hds-thresh'])
+
+def set_hds_thresh_random(cfg, netnl) -> None:
+    _defer_reset_hds(cfg, netnl)
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'hds-thresh' not in rings:
+        raise KsftSkipEx('hds-thresh not supported by device')
+    if 'hds-thresh-max' not in rings:
+        raise KsftSkipEx('hds-thresh-max not defined by device')
+
+    if rings['hds-thresh-max'] < 2:
+        raise KsftSkipEx('hds-thresh-max is too small')
+    elif rings['hds-thresh-max'] == 2:
+        hds_thresh = 1
+    else:
+        while True:
+            hds_thresh = random.randint(1, rings['hds-thresh-max'] - 1)
+            if hds_thresh != rings['hds-thresh']:
+                break
+
+    try:
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_thresh})
+    except NlError as e:
+        if e.error == errno.EINVAL:
+            raise KsftSkipEx("hds-thresh-set not supported by the device")
+        elif e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("ring-set not supported by the device")
+    rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    ksft_eq(hds_thresh, rings['hds-thresh'])
+
+def set_hds_thresh_max(cfg, netnl) -> None:
+    _defer_reset_hds(cfg, netnl)
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'hds-thresh' not in rings:
+        raise KsftSkipEx('hds-thresh not supported by device')
+    try:
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': rings['hds-thresh-max']})
+    except NlError as e:
+        if e.error == errno.EINVAL:
+            raise KsftSkipEx("hds-thresh-set not supported by the device")
+        elif e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("ring-set not supported by the device")
+    rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    ksft_eq(rings['hds-thresh'], rings['hds-thresh-max'])
+
+def set_hds_thresh_gt(cfg, netnl) -> None:
+    _defer_reset_hds(cfg, netnl)
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'hds-thresh' not in rings:
+        raise KsftSkipEx('hds-thresh not supported by device')
+    if 'hds-thresh-max' not in rings:
+        raise KsftSkipEx('hds-thresh-max not defined by device')
+    hds_gt = rings['hds-thresh-max'] + 1
+    with ksft_raises(NlError) as e:
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt})
+    ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+
+def set_xdp(cfg, netnl) -> None:
+    """
+    Enable single-buffer XDP on the device.
+    When HDS is in "auto" / UNKNOWN mode, XDP installation should work.
+    """
+    mode = _get_hds_mode(cfg, netnl)
+    if mode == 'enabled':
+        _defer_reset_hds(cfg, netnl)
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+                         'tcp-data-split': 'unknown'})
+
+    _xdp_onoff(cfg)
+
+
+def enabled_set_xdp(cfg, netnl) -> None:
+    """
+    Enable single-buffer XDP on the device.
+    When HDS is in "enabled" mode, XDP installation should not work.
+    """
+    _get_hds_mode(cfg, netnl)
+    netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+                     'tcp-data-split': 'enabled'})
+
+    defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+                            'tcp-data-split': 'unknown'})
+
+    with ksft_raises(CmdExitFailure) as e:
+        _xdp_onoff(cfg)
+
+
+def set_xdp(cfg, netnl) -> None:
+    """
+    Enable single-buffer XDP on the device.
+    When HDS is in "auto" / UNKNOWN mode, XDP installation should work.
+    """
+    mode = _get_hds_mode(cfg, netnl)
+    if mode == 'enabled':
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+                         'tcp-data-split': 'unknown'})
+
+    _xdp_onoff(cfg)
+
+
+def enabled_set_xdp(cfg, netnl) -> None:
+    """
+    Enable single-buffer XDP on the device.
+    When HDS is in "enabled" mode, XDP installation should not work.
+    """
+    _get_hds_mode(cfg, netnl)  # Trigger skip if not supported
+
+    netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+                     'tcp-data-split': 'enabled'})
+    defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+                            'tcp-data-split': 'unknown'})
+
+    with ksft_raises(CmdExitFailure) as e:
+        _xdp_onoff(cfg)
+
+
+def ioctl(cfg, netnl) -> None:
+    mode1 = _get_hds_mode(cfg, netnl)
+    _ioctl_ringparam_modify(cfg, netnl)
+    mode2 = _get_hds_mode(cfg, netnl)
+
+    ksft_eq(mode1, mode2)
+
+
+def ioctl_set_xdp(cfg, netnl) -> None:
+    """
+    Like set_xdp(), but we perturb the settings via the legacy ioctl.
+    """
+    mode = _get_hds_mode(cfg, netnl)
+    if mode == 'enabled':
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+                         'tcp-data-split': 'unknown'})
+
+    _ioctl_ringparam_modify(cfg, netnl)
+
+    _xdp_onoff(cfg)
+
+
+def ioctl_enabled_set_xdp(cfg, netnl) -> None:
+    """
+    Enable single-buffer XDP on the device.
+    When HDS is in "enabled" mode, XDP installation should not work.
+    """
+    _get_hds_mode(cfg, netnl)  # Trigger skip if not supported
+
+    netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+                     'tcp-data-split': 'enabled'})
+    defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+                            'tcp-data-split': 'unknown'})
+
+    with ksft_raises(CmdExitFailure) as e:
+        _xdp_onoff(cfg)
+
+
+def main() -> None:
+    with NetDrvEnv(__file__, queue_count=3) as cfg:
+        ksft_run([get_hds,
+                  get_hds_thresh,
+                  set_hds_disable,
+                  set_hds_enable,
+                  set_hds_thresh_random,
+                  set_hds_thresh_zero,
+                  set_hds_thresh_max,
+                  set_hds_thresh_gt,
+                  set_xdp,
+                  enabled_set_xdp,
+                  ioctl,
+                  ioctl_set_xdp,
+                  ioctl_enabled_set_xdp],
+                 args=(cfg, EthtoolFamily()))
+    ksft_exit()
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore
new file mode 100644
index 000000000000..46540468a775
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+iou-zcrx
+ncdevmem
+toeplitz
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
new file mode 100644
index 000000000000..9c163ba6feee
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+# Check if io_uring supports zero-copy receive
+HAS_IOURING_ZCRX := $(shell \
+	echo -e '#include <liburing.h>\n' \
+	     'void *func = (void *)io_uring_register_ifq;\n' \
+	     'int main() {return 0;}' | \
+	$(CC) -luring -x c - -o /dev/null 2>&1 && echo y)
+
+ifeq ($(HAS_IOURING_ZCRX),y)
+COND_GEN_FILES += iou-zcrx
+else
+$(warning excluding iouring tests, liburing not installed or too old)
+endif
+
+TEST_GEN_FILES := \
+	$(COND_GEN_FILES) \
+# end of TEST_GEN_FILES
+
+TEST_PROGS = \
+	csum.py \
+	devlink_port_split.py \
+	devlink_rate_tc_bw.py \
+	devmem.py \
+	ethtool.sh \
+	ethtool_extended_state.sh \
+	ethtool_mm.sh \
+	ethtool_rmon.sh \
+	hw_stats_l3.sh \
+	hw_stats_l3_gre.sh \
+	iou-zcrx.py \
+	irq.py \
+	loopback.sh \
+	nic_timestamp.py \
+	pp_alloc_fail.py \
+	rss_api.py \
+	rss_ctx.py \
+	rss_flow_label.py \
+	rss_input_xfrm.py \
+	toeplitz.py \
+	tso.py \
+	xsk_reconfig.py \
+	#
+
+TEST_FILES := \
+	ethtool_lib.sh \
+	#
+
+TEST_INCLUDES := \
+	$(wildcard lib/py/*.py ../lib/py/*.py) \
+	../../../net/lib.sh \
+	../../../net/forwarding/ipip_lib.sh \
+	../../../net/forwarding/lib.sh \
+	../../../net/forwarding/tc_common.sh \
+	#
+
+# YNL files, must be before "include ..lib.mk"
+YNL_GEN_FILES := \
+	ncdevmem \
+	toeplitz \
+# end of YNL_GEN_FILES
+TEST_GEN_FILES += $(YNL_GEN_FILES)
+TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
+
+include ../../../lib.mk
+
+# YNL build
+YNL_GENS := \
+	ethtool \
+	netdev \
+# end of YNL_GENS
+
+include ../../../net/ynl.mk
+
+include ../../../net/bpf.mk
+
+ifeq ($(HAS_IOURING_ZCRX),y)
+$(OUTPUT)/iou-zcrx: LDLIBS += -luring
+endif
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config
new file mode 100644
index 000000000000..2307aa001be1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/config
@@ -0,0 +1,11 @@
+CONFIG_FAIL_FUNCTION=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FUNCTION_ERROR_INJECTION=y
+CONFIG_IO_URING=y
+CONFIG_IPV6=y
+CONFIG_IPV6_GRE=y
+CONFIG_NET_IPGRE=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_UDMABUF=y
+CONFIG_VXLAN=y
diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py
new file mode 100755
index 000000000000..3e3a89a34afe
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/csum.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Run the tools/testing/selftests/net/csum testsuite."""
+
+from os import path
+
+from lib.py import ksft_run, ksft_exit, KsftSkipEx
+from lib.py import EthtoolFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, wait_port_listen
+
+def test_receive(cfg, ipver="6", extra_args=None):
+    """Test local nic checksum receive. Remote host sends crafted packets."""
+    if not cfg.have_rx_csum:
+        raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}")
+
+    ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}"
+
+    rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}"
+    tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
+
+    with bkg(rx_cmd, exit_wait=True):
+        wait_port_listen(34000, proto="udp")
+        cmd(tx_cmd, host=cfg.remote)
+
+
+def test_transmit(cfg, ipver="6", extra_args=None):
+    """Test local nic checksum transmit. Remote host verifies packets."""
+    if (not cfg.have_tx_csum_generic and
+        not (cfg.have_tx_csum_ipv4 and ipver == "4") and
+        not (cfg.have_tx_csum_ipv6 and ipver == "6")):
+        raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}")
+
+    ip_args = f"-{ipver} -S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}"
+
+    # Cannot randomize input when calculating zero checksum
+    if extra_args != "-U -Z":
+        extra_args += " -r 1"
+
+    rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}"
+    tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}"
+
+    with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+        wait_port_listen(34000, proto="udp", host=cfg.remote)
+        cmd(tx_cmd)
+
+
+def test_builder(name, cfg, ipver="6", tx=False, extra_args=""):
+    """Construct specific tests from the common template.
+
+       Most tests follow the same basic pattern, differing only in
+       Direction of the test and optional flags passed to csum."""
+    def f(cfg):
+        cfg.require_ipver(ipver)
+
+        if tx:
+            test_transmit(cfg, ipver, extra_args)
+        else:
+            test_receive(cfg, ipver, extra_args)
+
+    f.__name__ = f"ipv{ipver}_" + name
+    return f
+
+
+def check_nic_features(cfg) -> None:
+    """Test whether Tx and Rx checksum offload are enabled.
+
+       If the device under test has either off, then skip the relevant tests."""
+    cfg.have_tx_csum_generic = False
+    cfg.have_tx_csum_ipv4 = False
+    cfg.have_tx_csum_ipv6 = False
+    cfg.have_rx_csum = False
+
+    ethnl = EthtoolFamily()
+    features = ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+    for f in features["active"]["bits"]["bit"]:
+        if f["name"] == "tx-checksum-ip-generic":
+            cfg.have_tx_csum_generic = True
+        elif f["name"] == "tx-checksum-ipv4":
+            cfg.have_tx_csum_ipv4 = True
+        elif f["name"] == "tx-checksum-ipv6":
+            cfg.have_tx_csum_ipv6 = True
+        elif f["name"] == "rx-checksum":
+            cfg.have_rx_csum = True
+
+
+def main() -> None:
+    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+        check_nic_features(cfg)
+
+        cfg.bin_local = cfg.net_lib_dir / "csum"
+        cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+        cases = []
+        for ipver in ["4", "6"]:
+            cases.append(test_builder("rx_tcp", cfg, ipver, False, "-t"))
+            cases.append(test_builder("rx_tcp_invalid", cfg, ipver, False, "-t -E"))
+
+            cases.append(test_builder("rx_udp", cfg, ipver, False, ""))
+            cases.append(test_builder("rx_udp_invalid", cfg, ipver, False, "-E"))
+
+            cases.append(test_builder("tx_udp_csum_offload", cfg, ipver, True, "-U"))
+            cases.append(test_builder("tx_udp_zero_checksum", cfg, ipver, True, "-U -Z"))
+
+        ksft_run(cases=cases, args=(cfg, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/devlink_port_split.py b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py
new file mode 100755
index 000000000000..2d84c7a0be6b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from subprocess import PIPE, Popen
+import json
+import time
+import argparse
+import collections
+import sys
+
+#
+# Test port split configuration using devlink-port lanes attribute.
+# The test is skipped in case the attribute is not available.
+#
+# First, check that all the ports with 1 lane fail to split.
+# Second, check that all the ports with more than 1 lane can be split
+# to all valid configurations (e.g., split to 2, split to 4 etc.)
+#
+
+
+# Kselftest framework requirement - SKIP code is 4
+KSFT_SKIP=4
+Port = collections.namedtuple('Port', 'bus_info name')
+
+
+def run_command(cmd, should_fail=False):
+    """
+    Run a command in subprocess.
+    Return: Tuple of (stdout, stderr).
+    """
+
+    p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
+    stdout, stderr = p.communicate()
+    stdout, stderr = stdout.decode(), stderr.decode()
+
+    if stderr != "" and not should_fail:
+        print("Error sending command: %s" % cmd)
+        print(stdout)
+        print(stderr)
+    return stdout, stderr
+
+
+class devlink_ports(object):
+    """
+    Class that holds information on the devlink ports, required to the tests;
+    if_names: A list of interfaces in the devlink ports.
+    """
+
+    def get_if_names(dev):
+        """
+        Get a list of physical devlink ports.
+        Return: Array of tuples (bus_info/port, if_name).
+        """
+
+        arr = []
+
+        cmd = "devlink -j port show"
+        stdout, stderr = run_command(cmd)
+        assert stderr == ""
+        ports = json.loads(stdout)['port']
+
+        validate_devlink_output(ports, 'flavour')
+
+        for port in ports:
+            if dev in port:
+                if ports[port]['flavour'] == 'physical':
+                    arr.append(Port(bus_info=port, name=ports[port]['netdev']))
+
+        return arr
+
+    def __init__(self, dev):
+        self.if_names = devlink_ports.get_if_names(dev)
+
+
+def get_max_lanes(port):
+    """
+    Get the $port's maximum number of lanes.
+    Return: number of lanes, e.g. 1, 2, 4 and 8.
+    """
+
+    cmd = "devlink -j port show %s" % port
+    stdout, stderr = run_command(cmd)
+    assert stderr == ""
+    values = list(json.loads(stdout)['port'].values())[0]
+
+    if 'lanes' in values:
+        lanes = values['lanes']
+    else:
+        lanes = 0
+    return lanes
+
+
+def get_split_ability(port):
+    """
+    Get the $port split ability.
+    Return: split ability, true or false.
+    """
+
+    cmd = "devlink -j port show %s" % port.name
+    stdout, stderr = run_command(cmd)
+    assert stderr == ""
+    values = list(json.loads(stdout)['port'].values())[0]
+
+    return values['splittable']
+
+
+def split(k, port, should_fail=False):
+    """
+    Split $port into $k ports.
+    If should_fail == True, the split should fail. Otherwise, should pass.
+    Return: Array of sub ports after splitting.
+            If the $port wasn't split, the array will be empty.
+    """
+
+    cmd = "devlink port split %s count %s" % (port.bus_info, k)
+    stdout, stderr = run_command(cmd, should_fail=should_fail)
+
+    if should_fail:
+        if not test(stderr != "", "%s is unsplittable" % port.name):
+            print("split an unsplittable port %s" % port.name)
+            return create_split_group(port, k)
+    else:
+        if stderr == "":
+            return create_split_group(port, k)
+        print("didn't split a splittable port %s" % port.name)
+
+    return []
+
+
+def unsplit(port):
+    """
+    Unsplit $port.
+    """
+
+    cmd = "devlink port unsplit %s" % port
+    stdout, stderr = run_command(cmd)
+    test(stderr == "", "Unsplit port %s" % port)
+
+
+def exists(port, dev):
+    """
+    Check if $port exists in the devlink ports.
+    Return: True is so, False otherwise.
+    """
+
+    return any(dev_port.name == port
+               for dev_port in devlink_ports.get_if_names(dev))
+
+
+def exists_and_lanes(ports, lanes, dev):
+    """
+    Check if every port in the list $ports exists in the devlink ports and has
+    $lanes number of lanes after splitting.
+    Return: True if both are True, False otherwise.
+    """
+
+    for port in ports:
+        max_lanes = get_max_lanes(port)
+        if not exists(port, dev):
+            print("port %s doesn't exist in devlink ports" % port)
+            return False
+        if max_lanes != lanes:
+            print("port %s has %d lanes, but %s were expected"
+                  % (port, lanes, max_lanes))
+            return False
+    return True
+
+
+def test(cond, msg):
+    """
+    Check $cond and print a message accordingly.
+    Return: True is pass, False otherwise.
+    """
+
+    if cond:
+        print("TEST: %-60s [ OK ]" % msg)
+    else:
+        print("TEST: %-60s [FAIL]" % msg)
+
+    return cond
+
+
+def create_split_group(port, k):
+    """
+    Create the split group for $port.
+    Return: Array with $k elements, which are the split port group.
+    """
+
+    return list(port.name + "s" + str(i) for i in range(k))
+
+
+def split_unsplittable_port(port, k):
+    """
+    Test that splitting of unsplittable port fails.
+    """
+
+    # split to max
+    new_split_group = split(k, port, should_fail=True)
+
+    if new_split_group != []:
+        unsplit(port.bus_info)
+
+
+def split_splittable_port(port, k, lanes, dev):
+    """
+    Test that splitting of splittable port passes correctly.
+    """
+
+    new_split_group = split(k, port)
+
+    # Once the split command ends, it takes some time to the sub ifaces'
+    # to get their names. Use udevadm to continue only when all current udev
+    # events are handled.
+    cmd = "udevadm settle"
+    stdout, stderr = run_command(cmd)
+    assert stderr == ""
+
+    if new_split_group != []:
+        test(exists_and_lanes(new_split_group, lanes/k, dev),
+             "split port %s into %s" % (port.name, k))
+
+    unsplit(port.bus_info)
+
+
+def validate_devlink_output(devlink_data, target_property=None):
+    """
+    Determine if test should be skipped by checking:
+      1. devlink_data contains values
+      2. The target_property exist in devlink_data
+    """
+    skip_reason = None
+    if any(devlink_data.values()):
+        if target_property:
+            skip_reason = "{} not found in devlink output, test skipped".format(target_property)
+            for key in devlink_data:
+                if target_property in devlink_data[key]:
+                    skip_reason = None
+    else:
+        skip_reason = 'devlink output is empty, test skipped'
+
+    if skip_reason:
+        print(skip_reason)
+        sys.exit(KSFT_SKIP)
+
+
+def make_parser():
+    parser = argparse.ArgumentParser(description='A test for port splitting.')
+    parser.add_argument('--dev',
+                        help='The devlink handle of the device under test. ' +
+                             'The default is the first registered devlink ' +
+                             'handle.')
+
+    return parser
+
+
+def main(cmdline=None):
+    parser = make_parser()
+    args = parser.parse_args(cmdline)
+
+    dev = args.dev
+    if not dev:
+        cmd = "devlink -j dev show"
+        stdout, stderr = run_command(cmd)
+        assert stderr == ""
+
+        validate_devlink_output(json.loads(stdout))
+        devs = json.loads(stdout)['dev']
+        dev = list(devs.keys())[0]
+
+    cmd = "devlink dev show %s" % dev
+    stdout, stderr = run_command(cmd)
+    if stderr != "":
+        print("devlink device %s can not be found" % dev)
+        sys.exit(1)
+
+    ports = devlink_ports(dev)
+
+    found_max_lanes = False
+    for port in ports.if_names:
+        max_lanes = get_max_lanes(port.name)
+
+        # If max lanes is 0, do not test port splitting at all
+        if max_lanes == 0:
+            continue
+
+        # If 1 lane, shouldn't be able to split
+        elif max_lanes == 1:
+            test(not get_split_ability(port),
+                 "%s should not be able to split" % port.name)
+            split_unsplittable_port(port, max_lanes)
+
+        # Else, splitting should pass and all the split ports should exist.
+        else:
+            lane = max_lanes
+            test(get_split_ability(port),
+                 "%s should be able to split" % port.name)
+            while lane > 1:
+                split_splittable_port(port, lane, max_lanes, dev)
+
+                lane //= 2
+        found_max_lanes = True
+
+    if not found_max_lanes:
+        print(f"Test not started, no port of device {dev} reports max_lanes")
+        sys.exit(KSFT_SKIP)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
new file mode 100755
index 000000000000..4e4faa9275bb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
@@ -0,0 +1,439 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Devlink Rate TC Bandwidth Test Suite
+===================================
+
+This test suite verifies the functionality of devlink-rate traffic class (TC)
+bandwidth distribution in a virtualized environment. The tests validate that
+bandwidth can be properly allocated between different traffic classes and
+that TC mapping works as expected.
+
+Test Environment:
+----------------
+- Creates 1 VF
+- Establishes a bridge connecting the VF representor and the uplink representor
+- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102)
+- Configures different traffic classes (TC3 and TC4) for each VLAN
+
+Test Cases:
+----------
+1. test_no_tc_mapping_bandwidth:
+   - Verifies that without TC mapping, bandwidth is NOT distributed according to
+     the configured 20/80 split between TC3 and TC4
+   - This test should fail if bandwidth matches the 20/80 split without TC
+     mapping
+   - Expected: Bandwidth should NOT be distributed as 20/80
+
+2. test_tc_mapping_bandwidth:
+   - Configures TC mapping using mqprio qdisc
+   - Verifies that with TC mapping, bandwidth IS distributed according to the
+     configured 20/80 split between TC3 and TC4
+   - Expected: Bandwidth should be distributed as 20/80
+
+Bandwidth Distribution:
+----------------------
+- TC3 (VLAN 101): Configured for 20% of total bandwidth
+- TC4 (VLAN 102): Configured for 80% of total bandwidth
+- Total bandwidth: 1Gbps
+- Tolerance: +-12%
+
+Hardware-Specific Behavior (mlx5):
+--------------------------
+mlx5 hardware enforces traffic class separation by ensuring that each transmit
+queue (SQ) is associated with a single TC. If a packet is sent on a queue that
+doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set
+mapping), the hardware moves the queue to the correct TC scheduler to preserve
+traffic isolation.
+
+This behavior means that even without explicit TC-to-queue mapping, bandwidth
+enforcement may still appear to work—because the hardware dynamically adjusts
+the scheduling context. However, this can lead to performance issues in high
+rates and HOL blocking if traffic from different TCs is mixed on the same queue.
+"""
+
+import json
+import os
+import subprocess
+import threading
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit
+from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+from lib.py import NetDrvEpEnv, DevlinkFamily
+from lib.py import NlError
+from lib.py import cmd, defer, ethtool, ip
+from lib.py import Iperf3Runner
+
+
+class BandwidthValidator:
+    """
+    Validates total bandwidth and individual shares with tolerance
+    relative to the overall total.
+    """
+
+    def __init__(self, shares):
+        self.tolerance_percent = 12
+        self.expected_total = sum(shares.values())
+        self.bounds = {}
+
+        for name, exp in shares.items():
+            self.bounds[name] = (self.min_expected(exp), self.max_expected(exp))
+
+    def min_expected(self, value):
+        """Calculates the minimum acceptable value based on tolerance."""
+        return value - (self.expected_total * self.tolerance_percent / 100)
+
+    def max_expected(self, value):
+        """Calculates the maximum acceptable value based on tolerance."""
+        return value + (self.expected_total * self.tolerance_percent / 100)
+
+    def bound(self, values):
+        """
+        Return True if all given values fall within tolerance.
+        """
+        for name, value in values.items():
+            low, high = self.bounds[name]
+            if not low <= value <= high:
+                return False
+        return True
+
+
+def setup_vf(cfg, set_tc_mapping=True):
+    """
+    Sets up a VF on the given network interface.
+
+    Enables SR-IOV and switchdev mode, brings the VF interface up,
+    and optionally configures TC mapping using mqprio.
+    """
+    try:
+        cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev")
+        defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy")
+    except Exception as exc:
+        raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc
+    try:
+        cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
+        defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
+    except Exception as exc:
+        raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc
+
+    time.sleep(2)
+    vf_ifc = (os.listdir(
+        f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0]
+    if vf_ifc:
+        ip(f"link set dev {vf_ifc} up")
+    else:
+        raise KsftSkipEx("VF interface not found")
+    if set_tc_mapping:
+        cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8")
+
+    return vf_ifc
+
+
+def setup_vlans_on_vf(vf_ifc):
+    """
+    Sets up two VLAN interfaces on the given VF, each mapped to a different TC.
+    """
+    vlan_configs = [
+        {"vlan_id": 101, "tc": 3, "ip": "198.51.100.1"},
+        {"vlan_id": 102, "tc": 4, "ip": "198.51.100.9"},
+    ]
+
+    for config in vlan_configs:
+        vlan_dev = f"{vf_ifc}.{config['vlan_id']}"
+        ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}")
+        ip(f"addr add {config['ip']}/29 dev {vlan_dev}")
+        ip(f"link set dev {vlan_dev} up")
+        ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}")
+        ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}")
+
+
+def get_vf_info(cfg):
+    """
+    Finds the VF representor interface and devlink port index
+    for the given PCI device used in the test environment.
+    """
+    cfg.vf_representor = None
+    cfg.vf_port_index = None
+    out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8")
+    ports = json.loads(out)["port"]
+
+    for port_name, props in ports.items():
+        netdev = props.get("netdev")
+
+        if (port_name.startswith(f"pci/{cfg.pci}/") and
+            props.get("vfnum") == 0):
+            cfg.vf_representor = netdev
+            cfg.vf_port_index = int(port_name.split("/")[-1])
+            break
+
+
+def setup_bridge(cfg):
+    """
+    Creates and configures a Linux bridge, with both the uplink
+    and VF representor interfaces attached to it.
+    """
+    bridge_name = f"br_{os.getpid()}"
+    ip(f"link add name {bridge_name} type bridge")
+    defer(cmd, f"ip link del name {bridge_name} type bridge")
+
+    ip(f"link set dev {cfg.ifname} master {bridge_name}")
+
+    rep_name = cfg.vf_representor
+    if rep_name:
+        ip(f"link set dev {rep_name} master {bridge_name}")
+        ip(f"link set dev {rep_name} up")
+        ksft_pr(f"Set representor {rep_name} up and added to bridge")
+    else:
+        raise KsftSkipEx("Could not find representor for the VF")
+
+    ip(f"link set dev {bridge_name} up")
+
+
+def setup_devlink_rate(cfg):
+    """
+    Configures devlink rate tx_max and traffic class bandwidth for the VF.
+    """
+    port_index = cfg.vf_port_index
+    if port_index is None:
+        raise KsftSkipEx("Could not find VF port index")
+    try:
+        cfg.devnl.rate_set({
+            "bus-name": "pci",
+            "dev-name": cfg.pci,
+            "port-index": port_index,
+            "rate-tx-max": 125000000,
+            "rate-tc-bws": [
+                {"index": 0, "bw": 0},
+                {"index": 1, "bw": 0},
+                {"index": 2, "bw": 0},
+                {"index": 3, "bw": 20},
+                {"index": 4, "bw": 80},
+                {"index": 5, "bw": 0},
+                {"index": 6, "bw": 0},
+                {"index": 7, "bw": 0},
+            ]
+        })
+    except NlError as exc:
+        if exc.error == 95:  # EOPNOTSUPP
+            raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc
+        raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc
+
+
+def setup_remote_vlans(cfg):
+    """
+    Sets up VLAN interfaces on the remote side.
+    """
+    remote_dev = cfg.remote_ifname
+    vlan_ids = [101, 102]
+    remote_ips = ["198.51.100.2", "198.51.100.10"]
+
+    for vlan_id, ip_addr in zip(vlan_ids, remote_ips):
+        vlan_dev = f"{remote_dev}.{vlan_id}"
+        cmd(f"ip link add link {remote_dev} name {vlan_dev} "
+            f"type vlan id {vlan_id}", host=cfg.remote)
+        cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote)
+        cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote)
+        defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote)
+
+
+def setup_test_environment(cfg, set_tc_mapping=True):
+    """
+    Sets up the complete test environment including VF creation, VLANs,
+    bridge configuration and devlink rate setup.
+    """
+    vf_ifc = setup_vf(cfg, set_tc_mapping)
+    ksft_pr(f"Created VF interface: {vf_ifc}")
+
+    setup_vlans_on_vf(vf_ifc)
+
+    get_vf_info(cfg)
+    setup_bridge(cfg)
+
+    setup_devlink_rate(cfg)
+    setup_remote_vlans(cfg)
+
+
+def measure_bandwidth(cfg, server_ip, client_ip, barrier):
+    """
+    Synchronizes with peers and runs an iperf3-based bandwidth measurement
+    between the given endpoints. Returns average Gbps.
+    """
+    runner = Iperf3Runner(cfg, server_ip=server_ip, client_ip=client_ip)
+    try:
+        barrier.wait(timeout=10)
+    except Exception as exc:
+        raise KsftFailEx("iperf3 barrier wait timed") from exc
+
+    try:
+        bw_gbps = runner.measure_bandwidth(reverse=True)
+    except Exception as exc:
+        raise KsftFailEx("iperf3 bandwidth measurement failed") from exc
+
+    return bw_gbps
+
+
+def run_bandwidth_test(cfg):
+    """
+    Runs parallel bandwidth measurements for each VLAN/TC pair and collects results.
+    """
+    def _run_measure_bandwidth_thread(local_ip, remote_ip, results, barrier, tc_ix):
+        results[tc_ix] = measure_bandwidth(cfg, local_ip, remote_ip, barrier)
+
+    vf_vlan_data = [
+        # (local_ip, remote_ip, TC)
+        ("198.51.100.1",  "198.51.100.2", 3),
+        ("198.51.100.9", "198.51.100.10", 4),
+    ]
+
+    results = {}
+    threads = []
+    start_barrier = threading.Barrier(len(vf_vlan_data))
+
+    for local_ip, remote_ip, tc_ix in vf_vlan_data:
+        thread = threading.Thread(
+            target=_run_measure_bandwidth_thread,
+            args=(local_ip, remote_ip, results, start_barrier, tc_ix)
+        )
+        thread.start()
+        threads.append(thread)
+
+    for thread in threads:
+        thread.join()
+
+    for tc_ix, tc_bw in results.items():
+        if tc_bw is None:
+            raise KsftFailEx("iperf3 failed; cannot evaluate bandwidth")
+
+    return results
+
+
+def calculate_bandwidth_percentages(results):
+    """
+    Calculates the percentage of total bandwidth received by TC3 and TC4.
+    """
+    if 3 not in results or 4 not in results:
+        raise KsftFailEx(f"Missing expected TC results in {results}")
+
+    tc3_bw = results[3]
+    tc4_bw = results[4]
+    total_bw = tc3_bw + tc4_bw
+    tc3_percentage = (tc3_bw / total_bw) * 100
+    tc4_percentage = (tc4_bw / total_bw) * 100
+
+    return {
+        'tc3_bw': tc3_bw,
+        'tc4_bw': tc4_bw,
+        'tc3_percentage': tc3_percentage,
+        'tc4_percentage': tc4_percentage,
+        'total_bw': total_bw
+    }
+
+
+def print_bandwidth_results(bw_data, test_name):
+    """
+    Prints bandwidth measurements and TC usage summary for a given test.
+    """
+    ksft_pr(f"Bandwidth check results {test_name}:")
+    ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec")
+    ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec")
+    ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec")
+    ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%")
+    ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%")
+
+
+def verify_total_bandwidth(bw_data, validator):
+    """
+    Ensures the total measured bandwidth falls within the acceptable tolerance.
+    """
+    total = bw_data['total_bw']
+
+    if validator.bound({"total": total}):
+        return
+
+    low, high = validator.bounds["total"]
+
+    if total < low:
+        raise KsftSkipEx(
+            f"Total bandwidth {total:.2f} Gbps < minimum "
+            f"{low:.2f} Gbps; "
+            f"parent tx_max ({validator.expected_total:.1f} G) "
+            f"not reached, cannot validate share"
+        )
+
+    raise KsftFailEx(
+        f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling "
+        f"{high:.2f} Gbps "
+        f"(VF tx_max set to {validator.expected_total:.1f} G)"
+    )
+
+
+def run_bandwidth_distribution_test(cfg, set_tc_mapping):
+    """
+    Runs parallel bandwidth measurements for both TCs and collects results.
+    """
+    setup_test_environment(cfg, set_tc_mapping)
+    bandwidths = run_bandwidth_test(cfg)
+    bw_data = calculate_bandwidth_percentages(bandwidths)
+    test_name = "with TC mapping" if set_tc_mapping else "without TC mapping"
+    print_bandwidth_results(bw_data, test_name)
+
+    verify_total_bandwidth(bw_data, cfg.traffic_bw_validator)
+
+    return cfg.tc_bw_validator.bound({"tc3": bw_data['tc3_percentage'],
+                                     "tc4": bw_data['tc4_percentage']})
+
+
+def test_no_tc_mapping_bandwidth(cfg):
+    """
+    Verifies that bandwidth is not split 20/80 without traffic class mapping.
+    """
+    pass_bw_msg = "Bandwidth is NOT distributed as 20/80 without TC mapping"
+    fail_bw_msg = "Bandwidth matched 20/80 split without TC mapping"
+    is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout
+
+    if run_bandwidth_distribution_test(cfg, set_tc_mapping=False):
+        if is_mlx5:
+            raise KsftXfailEx(fail_bw_msg)
+        raise KsftFailEx(fail_bw_msg)
+    if is_mlx5:
+        raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg)
+    ksft_pr(pass_bw_msg)
+
+
+def test_tc_mapping_bandwidth(cfg):
+    """
+    Verifies that bandwidth is correctly split 20/80 between TC3 and TC4
+    when traffic class mapping is set.
+    """
+    if run_bandwidth_distribution_test(cfg, set_tc_mapping=True):
+        ksft_pr("Bandwidth is distributed as 20/80 with TC mapping")
+    else:
+        raise KsftFailEx("Bandwidth did not match 20/80 split with TC mapping")
+
+
+def main() -> None:
+    """
+    Main entry point for running the test cases.
+    """
+    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+        cfg.devnl = DevlinkFamily()
+
+        cfg.pci = os.path.basename(
+            os.path.realpath(f"/sys/class/net/{cfg.ifname}/device")
+        )
+        if not cfg.pci:
+            raise KsftSkipEx("Could not get PCI address of the interface")
+
+        cfg.traffic_bw_validator = BandwidthValidator({"total": 1})
+        cfg.tc_bw_validator = BandwidthValidator({"tc3": 20, "tc4": 80})
+
+        cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth]
+
+        ksft_run(cases=cases, args=(cfg,))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
new file mode 100755
index 000000000000..45c2d49d55b6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from os import path
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, KsftSkipEx
+from lib.py import NetDrvEpEnv
+from lib.py import bkg, cmd, rand_port, wait_port_listen
+from lib.py import ksft_disruptive
+
+
+def require_devmem(cfg):
+    if not hasattr(cfg, "_devmem_probed"):
+        probe_command = f"{cfg.bin_local} -f {cfg.ifname}"
+        cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0
+        cfg._devmem_probed = True
+
+    if not cfg._devmem_supported:
+        raise KsftSkipEx("Test requires devmem support")
+
+
+@ksft_disruptive
+def check_rx(cfg) -> None:
+    require_devmem(cfg)
+
+    port = rand_port()
+    socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}"
+    listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7"
+
+    with bkg(listen_cmd, exit_wait=True) as ncdevmem:
+        wait_port_listen(port)
+        cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \
+            head -c 1K | {socat}", host=cfg.remote, shell=True)
+
+    ksft_eq(ncdevmem.ret, 0)
+
+
+@ksft_disruptive
+def check_tx(cfg) -> None:
+    require_devmem(cfg)
+
+    port = rand_port()
+    listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
+
+    with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat:
+        wait_port_listen(port, host=cfg.remote)
+        cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True)
+
+    ksft_eq(socat.stdout.strip(), "hello\nworld")
+
+
+@ksft_disruptive
+def check_tx_chunks(cfg) -> None:
+    require_devmem(cfg)
+
+    port = rand_port()
+    listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
+
+    with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat:
+        wait_port_listen(port, host=cfg.remote)
+        cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True)
+
+    ksft_eq(socat.stdout.strip(), "hello\nworld")
+
+
+def main() -> None:
+    with NetDrvEpEnv(__file__) as cfg:
+        cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem")
+        cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+        ksft_run([check_rx, check_tx, check_tx_chunks],
+                 args=(cfg, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/ethtool.sh b/tools/testing/selftests/drivers/net/hw/ethtool.sh
new file mode 100755
index 000000000000..fa6953de6b6d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ethtool.sh
@@ -0,0 +1,297 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	same_speeds_autoneg_off
+	different_speeds_autoneg_off
+	combination_of_neg_on_and_off
+	advertise_subset_of_speeds
+	check_highest_speed_is_chosen
+	different_speeds_autoneg_on
+"
+NUM_NETIFS=2
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source ethtool_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+}
+
+same_speeds_autoneg_off()
+{
+	# Check that when each of the reported speeds is forced, the links come
+	# up and are operational.
+	local -a speeds_arr=($(common_speeds_get $h1 $h2 0 0))
+
+	for speed in "${speeds_arr[@]}"; do
+		RET=0
+		ethtool_set $h1 speed $speed autoneg off
+		ethtool_set $h2 speed $speed autoneg off
+
+		setup_wait_dev_with_timeout $h1
+		setup_wait_dev_with_timeout $h2
+		ping_do $h1 192.0.2.2
+		check_err $? "ping with speed $speed autoneg off"
+		log_test "force speed $speed on both ends"
+	done
+
+	ethtool -s $h2 autoneg on
+	ethtool -s $h1 autoneg on
+}
+
+different_speeds_autoneg_off()
+{
+	# Test that when we force different speeds, links are not up and ping
+	# fails.
+	RET=0
+
+	local -a speeds_arr=($(different_speeds_get $h1 $h2 0 0))
+	local speed1=${speeds_arr[0]}
+	local speed2=${speeds_arr[1]}
+
+	ethtool_set $h1 speed $speed1 autoneg off
+	ethtool_set $h2 speed $speed2 autoneg off
+
+	setup_wait_dev_with_timeout $h1
+	setup_wait_dev_with_timeout $h2
+	ping_do $h1 192.0.2.2
+	check_fail $? "ping with different speeds"
+
+	log_test "force of different speeds autoneg off"
+
+	ethtool -s $h2 autoneg on
+	ethtool -s $h1 autoneg on
+}
+
+combination_of_neg_on_and_off()
+{
+	# Test that when one device is forced to a speed supported by both
+	# endpoints and the other device is configured to autoneg on, the links
+	# are up and ping passes.
+	local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1))
+
+	for speed in "${speeds_arr[@]}"; do
+		RET=0
+		ethtool_set $h1 speed $speed autoneg off
+
+		setup_wait_dev_with_timeout $h1
+		setup_wait_dev_with_timeout $h2
+		ping_do $h1 192.0.2.2
+		check_err $? "ping with h1-speed=$speed autoneg off, h2 autoneg on"
+		log_test "force speed $speed vs. autoneg"
+	done
+
+	ethtool -s $h1 autoneg on
+}
+
+hex_speed_value_get()
+{
+	local speed=$1; shift
+
+	local shift_size=${speed_values[$speed]}
+	speed=$((0x1 << $"shift_size"))
+	printf "%#x" "$speed"
+}
+
+subset_of_common_speeds_get()
+{
+	local dev1=$1; shift
+	local dev2=$1; shift
+	local adver=$1; shift
+
+	local -a speeds_arr=($(common_speeds_get $dev1 $dev2 0 $adver))
+	local speed_to_advertise=0
+	local speed_to_remove=${speeds_arr[0]}
+	speed_to_remove+='base'
+
+	local -a speeds_mode_arr=($(common_speeds_get $dev1 $dev2 1 $adver))
+
+	for speed in ${speeds_mode_arr[@]}; do
+		if [[ $speed != $speed_to_remove* ]]; then
+			speed=$(hex_speed_value_get $speed)
+			speed_to_advertise=$(($speed_to_advertise | \
+						$speed))
+		fi
+
+	done
+
+	# Convert to hex.
+	printf "%#x" "$speed_to_advertise"
+}
+
+speed_to_advertise_get()
+{
+	# The function returns the hex number that is composed by OR-ing all
+	# the modes corresponding to the provided speed.
+	local speed_without_mode=$1; shift
+	local supported_speeds=("$@"); shift
+	local speed_to_advertise=0
+
+	speed_without_mode+='base'
+
+	for speed in ${supported_speeds[@]}; do
+		if [[ $speed == $speed_without_mode* ]]; then
+			speed=$(hex_speed_value_get $speed)
+			speed_to_advertise=$(($speed_to_advertise | \
+						$speed))
+		fi
+
+	done
+
+	# Convert to hex.
+	printf "%#x" "$speed_to_advertise"
+}
+
+advertise_subset_of_speeds()
+{
+	# Test that when one device advertises a subset of speeds and another
+	# advertises a specific speed (but all modes of this speed), the links
+	# are up and ping passes.
+	RET=0
+
+	local speed_1_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1)
+	ethtool_set $h1 advertise $speed_1_to_advertise
+
+	if [ $RET != 0 ]; then
+		log_test "advertise subset of speeds"
+		return
+	fi
+
+	local -a speeds_arr_without_mode=($(common_speeds_get $h1 $h2 0 1))
+	# Check only speeds that h1 advertised. Remove the first speed.
+	unset speeds_arr_without_mode[0]
+	local -a speeds_arr_with_mode=($(common_speeds_get $h1 $h2 1 1))
+
+	for speed_value in ${speeds_arr_without_mode[@]}; do
+		RET=0
+		local speed_2_to_advertise=$(speed_to_advertise_get $speed_value \
+			"${speeds_arr_with_mode[@]}")
+		ethtool_set $h2 advertise $speed_2_to_advertise
+
+		setup_wait_dev_with_timeout $h1
+		setup_wait_dev_with_timeout $h2
+		ping_do $h1 192.0.2.2
+		check_err $? "ping with h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)"
+
+		log_test "advertise $speed_1_to_advertise vs. $speed_2_to_advertise"
+	done
+
+	ethtool -s $h2 autoneg on
+	ethtool -s $h1 autoneg on
+}
+
+check_highest_speed_is_chosen()
+{
+	# Test that when one device advertises a subset of speeds, the other
+	# chooses the highest speed. This test checks configuration without
+	# traffic.
+	RET=0
+
+	local max_speed
+	local chosen_speed
+	local speed_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1)
+
+	ethtool_set $h1 advertise $speed_to_advertise
+
+	if [ $RET != 0 ]; then
+		log_test "check highest speed"
+		return
+	fi
+
+	local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1))
+
+	max_speed=${speeds_arr[0]}
+	for current in ${speeds_arr[@]}; do
+		if [[ $current -gt $max_speed ]]; then
+			max_speed=$current
+		fi
+	done
+
+	setup_wait_dev_with_timeout $h1
+	setup_wait_dev_with_timeout $h2
+	chosen_speed=$(ethtool $h1 | grep 'Speed:')
+	chosen_speed=${chosen_speed%"Mb/s"*}
+	chosen_speed=${chosen_speed#*"Speed: "}
+	((chosen_speed == max_speed))
+	check_err $? "h1 advertise $speed_to_advertise, h2 sync to speed $chosen_speed"
+
+	log_test "check highest speed"
+
+	ethtool -s $h2 autoneg on
+	ethtool -s $h1 autoneg on
+}
+
+different_speeds_autoneg_on()
+{
+	# Test that when we configure links to advertise different speeds,
+	# links are not up and ping fails.
+	RET=0
+
+	local -a speeds=($(different_speeds_get $h1 $h2 1 1))
+	local speed1=${speeds[0]}
+	local speed2=${speeds[1]}
+
+	speed1=$(hex_speed_value_get $speed1)
+	speed2=$(hex_speed_value_get $speed2)
+
+	ethtool_set $h1 advertise $speed1
+	ethtool_set $h2 advertise $speed2
+
+	if (($RET)); then
+		setup_wait_dev_with_timeout $h1
+		setup_wait_dev_with_timeout $h2
+		ping_do $h1 192.0.2.2
+		check_fail $? "ping with different speeds autoneg on"
+	fi
+
+	log_test "advertise different speeds autoneg on"
+
+	ethtool -s $h2 autoneg on
+	ethtool -s $h1 autoneg on
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+declare -gA speed_values
+eval "speed_values=($(speeds_arr_get))"
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh
new file mode 100755
index 000000000000..a7584448416e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	autoneg
+	autoneg_force_mode
+	no_cable
+"
+
+NUM_NETIFS=2
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source ethtool_lib.sh
+
+TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+	swp3=$NETIF_NO_CABLE
+}
+
+ethtool_ext_state()
+{
+	local dev=$1; shift
+	local expected_ext_state=$1; shift
+	local expected_ext_substate=${1:-""}; shift
+
+	local ext_state=$(ethtool $dev | grep "Link detected" \
+		| cut -d "(" -f2 | cut -d ")" -f1)
+	local ext_substate=$(echo $ext_state | cut -sd "," -f2 \
+		| sed -e 's/^[[:space:]]*//')
+	ext_state=$(echo $ext_state | cut -d "," -f1)
+
+	if [[ $ext_state != $expected_ext_state ]]; then
+		echo "Expected \"$expected_ext_state\", got \"$ext_state\""
+		return 1
+	fi
+	if [[ $ext_substate != $expected_ext_substate ]]; then
+		echo "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+		return 1
+	fi
+}
+
+autoneg()
+{
+	local msg
+
+	RET=0
+
+	ip link set dev $swp1 up
+
+	msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
+			"Autoneg" "No partner detected")
+	check_err $? "$msg"
+
+	log_test "Autoneg, No partner detected"
+
+	ip link set dev $swp1 down
+}
+
+autoneg_force_mode()
+{
+	local msg
+
+	RET=0
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0))
+	local speed1=${speeds_arr[0]}
+	local speed2=${speeds_arr[1]}
+
+	ethtool_set $swp1 speed $speed1 autoneg off
+	ethtool_set $swp2 speed $speed2 autoneg off
+
+	msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
+			"Autoneg" "No partner detected during force mode")
+	check_err $? "$msg"
+
+	msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \
+			"Autoneg" "No partner detected during force mode")
+	check_err $? "$msg"
+
+	log_test "Autoneg, No partner detected during force mode"
+
+	ethtool -s $swp2 autoneg on
+	ethtool -s $swp1 autoneg on
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+no_cable()
+{
+	local msg
+
+	RET=0
+
+	ip link set dev $swp3 up
+
+	msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable")
+	check_err $? "$msg"
+
+	log_test "No cable"
+
+	ip link set dev $swp3 down
+}
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh
new file mode 100644
index 000000000000..b9bfb45085af
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+speeds_arr_get()
+{
+	cmd='/ETHTOOL_LINK_MODE_[^[:space:]]*_BIT[[:space:]]+=[[:space:]]+/ \
+		{sub(/,$/, "") \
+		sub(/ETHTOOL_LINK_MODE_/,"") \
+		sub(/_BIT/,"") \
+		sub(/_Full/,"/Full") \
+		sub(/_Half/,"/Half");\
+		print "["$1"]="$3}'
+
+	awk "${cmd}" /usr/include/linux/ethtool.h
+}
+
+ethtool_set()
+{
+	local cmd="$@"
+	local out=$(ethtool -s $cmd 2>&1 | wc -l)
+
+	check_err $out "error in configuration. $cmd"
+}
+
+dev_linkmodes_params_get()
+{
+	local dev=$1; shift
+	local adver=$1; shift
+	local -a linkmodes_params
+	local param_count
+	local arr
+
+	if (($adver)); then
+		mode="Advertised link modes"
+	else
+		mode="Supported link modes"
+	fi
+
+	local -a dev_linkmodes=($(dev_speeds_get $dev 1 $adver))
+	for ((i=0; i<${#dev_linkmodes[@]}; i++)); do
+		linkmodes_params[$i]=$(echo -e "${dev_linkmodes[$i]}" | \
+			# Replaces all non numbers with spaces
+			sed -e 's/[^0-9]/ /g' | \
+			# Squeeze spaces in sequence to 1 space
+			tr -s ' ')
+		# Count how many numbers were found in the linkmode
+		param_count=$(echo "${linkmodes_params[$i]}" | wc -w)
+		if [[ $param_count -eq 1 ]]; then
+			linkmodes_params[$i]="${linkmodes_params[$i]} 1"
+		elif [[ $param_count -ge 3 ]]; then
+			arr=(${linkmodes_params[$i]})
+			# Take only first two params
+			linkmodes_params[$i]=$(echo "${arr[@]:0:2}")
+		fi
+	done
+	echo ${linkmodes_params[@]}
+}
+
+dev_speeds_get()
+{
+	local dev=$1; shift
+	local with_mode=$1; shift
+	local adver=$1; shift
+	local speeds_str
+
+	if (($adver)); then
+		mode="Advertised link modes"
+	else
+		mode="Supported link modes"
+	fi
+
+	speeds_str=$(ethtool "$dev" | \
+		# Snip everything before the link modes section.
+		sed -n '/'"$mode"':/,$p' | \
+		# Quit processing the rest at the start of the next section.
+		# When checking, skip the header of this section (hence the 2,).
+		sed -n '2,${/^[\t][^ \t]/q};p' | \
+		# Drop the section header of the current section.
+		cut -d':' -f2)
+
+	local -a speeds_arr=($speeds_str)
+	if [[ $with_mode -eq 0 ]]; then
+		for ((i=0; i<${#speeds_arr[@]}; i++)); do
+			speeds_arr[$i]=${speeds_arr[$i]%base*}
+		done
+	fi
+	echo ${speeds_arr[@]}
+}
+
+common_speeds_get()
+{
+	dev1=$1; shift
+	dev2=$1; shift
+	with_mode=$1; shift
+	adver=$1; shift
+
+	local -a dev1_speeds=($(dev_speeds_get $dev1 $with_mode $adver))
+	local -a dev2_speeds=($(dev_speeds_get $dev2 $with_mode $adver))
+
+	comm -12 \
+		<(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \
+		<(printf '%s\n' "${dev2_speeds[@]}" | sort -u)
+}
+
+different_speeds_get()
+{
+	local dev1=$1; shift
+	local dev2=$1; shift
+	local with_mode=$1; shift
+	local adver=$1; shift
+
+	local -a speeds_arr
+
+	speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
+	if [[ ${#speeds_arr[@]} < 2 ]]; then
+		check_err 1 "cannot check different speeds. There are not enough speeds"
+	fi
+
+	echo ${speeds_arr[0]} ${speeds_arr[1]}
+}
diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh
new file mode 100755
index 000000000000..c301e735c8ab
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh
@@ -0,0 +1,341 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	manual_with_verification_h1_to_h2
+	manual_with_verification_h2_to_h1
+	manual_without_verification_h1_to_h2
+	manual_without_verification_h2_to_h1
+	manual_failed_verification_h1_to_h2
+	manual_failed_verification_h2_to_h1
+	lldp
+"
+
+NUM_NETIFS=2
+REQUIRE_MZ=no
+PREEMPTIBLE_PRIO=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+traffic_test()
+{
+	local if=$1; shift
+	local src=$1; shift
+	local num_pkts=10000
+	local before=
+	local after=
+	local delta=
+
+	if [ ${has_pmac_stats[$if]} = false ]; then
+		src="aggregate"
+	fi
+
+	before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
+
+	$MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO
+
+	after=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
+
+	delta=$((after - before))
+
+	# Allow an extra 1% tolerance for random packets sent by the stack
+	[ $delta -ge $num_pkts ] && [ $delta -le $((num_pkts + 100)) ]
+}
+
+manual_with_verification()
+{
+	local tx=$1; shift
+	local rx=$1; shift
+
+	RET=0
+
+	# It isn't completely clear from IEEE 802.3-2018 Figure 99-5: Transmit
+	# Processing state diagram whether the "send_r" variable (send response
+	# to verification frame) should be taken into consideration while the
+	# MAC Merge TX direction is disabled. That being said, at least the
+	# NXP ENETC does not, and requires tx-enabled on in order to respond to
+	# the link partner's verification frames.
+	ethtool --set-mm $rx tx-enabled on
+	ethtool --set-mm $tx verify-enabled on tx-enabled on
+
+	# Wait for verification to finish
+	sleep 1
+
+	ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+		grep -q 'SUCCEEDED'
+	check_err "$?" "Verification did not succeed"
+
+	ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+	check_err "$?" "pMAC TX is not active"
+
+	traffic_test $tx "pmac"
+	check_err "$?" "Traffic did not get sent through $tx's pMAC"
+
+	ethtool --set-mm $tx verify-enabled off tx-enabled off
+	ethtool --set-mm $rx tx-enabled off
+
+	log_test "Manual configuration with verification: $tx to $rx"
+}
+
+manual_with_verification_h1_to_h2()
+{
+	manual_with_verification $h1 $h2
+}
+
+manual_with_verification_h2_to_h1()
+{
+	manual_with_verification $h2 $h1
+}
+
+manual_without_verification()
+{
+	local tx=$1; shift
+	local rx=$1; shift
+
+	RET=0
+
+	ethtool --set-mm $tx verify-enabled off tx-enabled on
+
+	ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+		grep -q 'DISABLED'
+	check_err "$?" "Verification is not disabled"
+
+	ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+	check_err "$?" "pMAC TX is not active"
+
+	traffic_test $tx "pmac"
+	check_err "$?" "Traffic did not get sent through $tx's pMAC"
+
+	ethtool --set-mm $tx verify-enabled off tx-enabled off
+
+	log_test "Manual configuration without verification: $tx to $rx"
+}
+
+manual_without_verification_h1_to_h2()
+{
+	manual_without_verification $h1 $h2
+}
+
+manual_without_verification_h2_to_h1()
+{
+	manual_without_verification $h2 $h1
+}
+
+manual_failed_verification()
+{
+	local tx=$1; shift
+	local rx=$1; shift
+
+	RET=0
+
+	ethtool --set-mm $rx pmac-enabled off
+	ethtool --set-mm $tx verify-enabled on tx-enabled on
+
+	# Wait for verification to time out
+	sleep 1
+
+	ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+		grep -q 'SUCCEEDED'
+	check_fail "$?" "Verification succeeded when it shouldn't have"
+
+	ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+	check_fail "$?" "pMAC TX is active when it shouldn't have"
+
+	traffic_test $tx "emac"
+	check_err "$?" "Traffic did not get sent through $tx's eMAC"
+
+	ethtool --set-mm $tx verify-enabled off tx-enabled off
+	ethtool --set-mm $rx pmac-enabled on
+
+	log_test "Manual configuration with failed verification: $tx to $rx"
+}
+
+manual_failed_verification_h1_to_h2()
+{
+	manual_failed_verification $h1 $h2
+}
+
+manual_failed_verification_h2_to_h1()
+{
+	manual_failed_verification $h2 $h1
+}
+
+smallest_supported_add_frag_size()
+{
+	local iface=$1
+	local rx_min_frag_size=
+
+	rx_min_frag_size=$(ethtool --json --show-mm $iface | \
+		jq '.[]."rx-min-frag-size"')
+
+	if [ $rx_min_frag_size -le 60 ]; then
+		echo 0
+	elif [ $rx_min_frag_size -le 124 ]; then
+		echo 1
+	elif [ $rx_min_frag_size -le 188 ]; then
+		echo 2
+	elif [ $rx_min_frag_size -le 252 ]; then
+		echo 3
+	else
+		echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP"
+		exit 1
+	fi
+}
+
+expected_add_frag_size()
+{
+	local iface=$1
+	local requested=$2
+	local min=$(smallest_supported_add_frag_size $iface)
+
+	[ $requested -le $min ] && echo $min || echo $requested
+}
+
+lldp_change_add_frag_size()
+{
+	local add_frag_size=$1
+	local pattern=
+
+	lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null
+	# Wait for TLVs to be received
+	sleep 2
+	pattern=$(printf "Additional fragment size: %d" \
+			 $(expected_add_frag_size $h1 $add_frag_size))
+	lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern"
+}
+
+lldp()
+{
+	RET=0
+
+	systemctl start lldpad
+
+	# Configure the interfaces to receive and transmit LLDPDUs
+	lldptool -L -i $h1 adminStatus=rxtx >/dev/null
+	lldptool -L -i $h2 adminStatus=rxtx >/dev/null
+
+	# Enable the transmission of Additional Ethernet Capabilities TLV
+	lldptool -T -i $h1 -V addEthCaps enableTx=yes >/dev/null
+	lldptool -T -i $h2 -V addEthCaps enableTx=yes >/dev/null
+
+	# Wait for TLVs to be received
+	sleep 2
+
+	lldptool -i $h1 -t -n -V addEthCaps | \
+		grep -q "Preemption capability active"
+	check_err "$?" "$h1 pMAC TX is not active"
+
+	lldptool -i $h2 -t -n -V addEthCaps | \
+		grep -q "Preemption capability active"
+	check_err "$?" "$h2 pMAC TX is not active"
+
+	lldp_change_add_frag_size 3
+	check_err "$?" "addFragSize 3"
+
+	lldp_change_add_frag_size 2
+	check_err "$?" "addFragSize 2"
+
+	lldp_change_add_frag_size 1
+	check_err "$?" "addFragSize 1"
+
+	lldp_change_add_frag_size 0
+	check_err "$?" "addFragSize 0"
+
+	traffic_test $h1 "pmac"
+	check_err "$?" "Traffic did not get sent through $h1's pMAC"
+
+	traffic_test $h2 "pmac"
+	check_err "$?" "Traffic did not get sent through $h2's pMAC"
+
+	systemctl stop lldpad
+
+	log_test "LLDP"
+}
+
+h1_create()
+{
+	ip link set dev $h1 up
+
+	tc qdisc add dev $h1 root mqprio num_tc 4 map 0 1 2 3 \
+		queues 1@0 1@1 1@2 1@3 \
+		fp P E E E \
+		hw 1
+
+	ethtool --set-mm $h1 pmac-enabled on tx-enabled off verify-enabled off
+}
+
+h2_create()
+{
+	ip link set dev $h2 up
+
+	ethtool --set-mm $h2 pmac-enabled on tx-enabled off verify-enabled off
+
+	tc qdisc add dev $h2 root mqprio num_tc 4 map 0 1 2 3 \
+		queues 1@0 1@1 1@2 1@3 \
+		fp P E E E \
+		hw 1
+}
+
+h1_destroy()
+{
+	ethtool --set-mm $h1 pmac-enabled off tx-enabled off verify-enabled off
+
+	tc qdisc del dev $h1 root
+
+	ip link set dev $h1 down
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 root
+
+	ethtool --set-mm $h2 pmac-enabled off tx-enabled off verify-enabled off
+
+	ip link set dev $h2 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+}
+
+check_ethtool_mm_support
+check_tc_fp_support
+require_command lldptool
+bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
+
+for netif in ${NETIFS[@]}; do
+	ethtool --show-mm $netif 2>&1 &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: $netif does not support MAC Merge"
+		exit $ksft_skip
+	fi
+
+	if check_ethtool_pmac_std_stats_support $netif eth-mac; then
+		has_pmac_stats[$netif]=true
+	else
+		has_pmac_stats[$netif]=false
+		echo "$netif does not report pMAC statistics, falling back to aggregate"
+	fi
+done
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh
new file mode 100755
index 000000000000..8f60c1685ad4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	rmon_rx_histogram
+	rmon_tx_histogram
+"
+
+NUM_NETIFS=2
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+ETH_FCS_LEN=4
+ETH_HLEN=$((6+6+2))
+
+declare -A netif_mtu
+
+ensure_mtu()
+{
+	local iface=$1; shift
+	local len=$1; shift
+	local current=$(ip -j link show dev $iface | jq -r '.[0].mtu')
+	local required=$((len - ETH_HLEN - ETH_FCS_LEN))
+
+	if [ $current -lt $required ]; then
+		ip link set dev $iface mtu $required || return 1
+	fi
+}
+
+bucket_test()
+{
+	local iface=$1; shift
+	local neigh=$1; shift
+	local set=$1; shift
+	local bucket=$1; shift
+	local len=$1; shift
+	local num_rx=10000
+	local num_tx=20000
+	local expected=
+	local before=
+	local after=
+	local delta=
+
+	# Mausezahn does not include FCS bytes in its length - but the
+	# histogram counters do
+	len=$((len - ETH_FCS_LEN))
+	len=$((len > 0 ? len : 0))
+
+	before=$(ethtool --json -S $iface --groups rmon | \
+		jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
+
+	# Send 10k one way and 20k in the other, to detect counters
+	# mapped to the wrong direction
+	$MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us
+	$MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us
+
+	after=$(ethtool --json -S $iface --groups rmon | \
+		jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
+
+	delta=$((after - before))
+
+	expected=$([ $set = rx ] && echo $num_rx || echo $num_tx)
+
+	# Allow some extra tolerance for other packets sent by the stack
+	[ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ]
+}
+
+rmon_histogram()
+{
+	local iface=$1; shift
+	local neigh=$1; shift
+	local set=$1; shift
+	local nbuckets=0
+	local step=
+
+	RET=0
+
+	while read -r -a bucket; do
+		step="$set-pkts${bucket[0]}to${bucket[1]} on $iface"
+
+		for if in $iface $neigh; do
+			if ! ensure_mtu $if ${bucket[0]}; then
+				log_test_xfail "$if does not support the required MTU for $step"
+				return
+			fi
+		done
+
+		if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then
+			check_err 1 "$step failed"
+			return 1
+		fi
+		log_test "$step"
+		nbuckets=$((nbuckets + 1))
+	done < <(ethtool --json -S $iface --groups rmon | \
+		jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null)
+
+	if [ $nbuckets -eq 0 ]; then
+		log_test_xfail "$iface does not support $set histogram counters"
+		return
+	fi
+}
+
+rmon_rx_histogram()
+{
+	rmon_histogram $h1 $h2 rx
+	rmon_histogram $h2 $h1 rx
+}
+
+rmon_tx_histogram()
+{
+	rmon_histogram $h1 $h2 tx
+	rmon_histogram $h2 $h1 tx
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	for iface in $h1 $h2; do
+		netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu')
+		ip link set dev $iface up
+	done
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	for iface in $h2 $h1; do
+		ip link set dev $iface \
+			mtu ${netif_mtu[$iface]} \
+			down
+	done
+}
+
+check_ethtool_counter_group_support
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh
new file mode 100755
index 000000000000..67fafefc80be
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh
@@ -0,0 +1,334 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+                     +----------------------+
+# | H1                 |                     |                   H2 |
+# |                    |                     |                      |
+# |          $h1.200 + |                     | + $h2.200            |
+# |     192.0.2.1/28 | |                     | | 192.0.2.18/28      |
+# | 2001:db8:1::1/64 | |                     | | 2001:db8:2::1/64   |
+# |                  | |                     | |                    |
+# |              $h1 + |                     | + $h2                |
+# |                  | |                     | |                    |
+# +------------------|-+                     +-|--------------------+
+#                    |                         |
+# +------------------|-------------------------|--------------------+
+# | SW               |                         |                    |
+# |                  |                         |                    |
+# |             $rp1 +                         + $rp2               |
+# |                  |                         |                    |
+# |         $rp1.200 +                         + $rp2.200           |
+# |     192.0.2.2/28                             192.0.2.17/28      |
+# | 2001:db8:1::2/64                             2001:db8:2::2/64   |
+# |                                                                 |
+# +-----------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	test_stats_rx_ipv4
+	test_stats_tx_ipv4
+	test_stats_rx_ipv6
+	test_stats_tx_ipv6
+	respin_enablement
+	test_stats_rx_ipv4
+	test_stats_tx_ipv4
+	test_stats_rx_ipv6
+	test_stats_tx_ipv6
+	reapply_config
+	ping_ipv4
+	ping_ipv6
+	test_stats_rx_ipv4
+	test_stats_tx_ipv4
+	test_stats_rx_ipv6
+	test_stats_tx_ipv6
+	test_stats_report_rx
+	test_stats_report_tx
+	test_destroy_enabled
+	test_double_enable
+"
+NUM_NETIFS=4
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+	ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+	vlan_destroy $h1 200
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64
+	ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+	ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+	ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+	vlan_destroy $h2 200
+	simple_if_fini $h2
+}
+
+router_rp1_200_create()
+{
+	ip link add name $rp1.200 link $rp1 type vlan id 200
+	ip link set dev $rp1.200 addrgenmode eui64
+	ip link set dev $rp1.200 up
+	ip address add dev $rp1.200 192.0.2.2/28
+	ip address add dev $rp1.200 2001:db8:1::2/64
+	ip stats set dev $rp1.200 l3_stats on
+}
+
+router_rp1_200_destroy()
+{
+	ip stats set dev $rp1.200 l3_stats off
+	ip address del dev $rp1.200 2001:db8:1::2/64
+	ip address del dev $rp1.200 192.0.2.2/28
+	ip link del dev $rp1.200
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	router_rp1_200_create
+
+	ip link set dev $rp2 up
+	vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	vlan_destroy $rp2 200
+	ip link set dev $rp2 down
+
+	router_rp1_200_destroy
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1mac=$(mac_get $rp1)
+	rp2mac=$(mac_get $rp2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1.200 192.0.2.18 " IPv4"
+}
+
+ping_ipv6()
+{
+	ping_test $h1.200 2001:db8:2::1 " IPv6"
+}
+
+send_packets_rx_ipv4()
+{
+	# Send 21 packets instead of 20, because the first one might trap and go
+	# through the SW datapath, which might not bump the HW counter.
+	$MZ $h1.200 -c 21 -d 20msec -p 100 \
+	    -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+send_packets_rx_ipv6()
+{
+	$MZ $h1.200 -6 -c 21 -d 20msec -p 100 \
+	    -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv4()
+{
+	$MZ $h2.200 -c 21 -d 20msec -p 100 \
+	    -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv6()
+{
+	$MZ $h2.200 -6 -c 21 -d 20msec -p 100 \
+	    -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+___test_stats()
+{
+	local dir=$1; shift
+	local prot=$1; shift
+
+	local a
+	local b
+
+	a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
+	send_packets_${dir}_${prot}
+	"$@"
+	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+		       hw_stats_get l3_stats $rp1.200 ${dir} packets)
+	check_err $? "Traffic not reflected in the counter: $a -> $b"
+}
+
+__test_stats()
+{
+	local dir=$1; shift
+	local prot=$1; shift
+
+	RET=0
+	___test_stats "$dir" "$prot"
+	log_test "Test $dir packets: $prot"
+}
+
+test_stats_rx_ipv4()
+{
+	__test_stats rx ipv4
+}
+
+test_stats_tx_ipv4()
+{
+	__test_stats tx ipv4
+}
+
+test_stats_rx_ipv6()
+{
+	__test_stats rx ipv6
+}
+
+test_stats_tx_ipv6()
+{
+	__test_stats tx ipv6
+}
+
+# Make sure everything works well even after stats have been disabled and
+# reenabled on the same device without touching the L3 configuration.
+respin_enablement()
+{
+	log_info "Turning stats off and on again"
+	ip stats set dev $rp1.200 l3_stats off
+	ip stats set dev $rp1.200 l3_stats on
+}
+
+# For the initial run, l3_stats is enabled on a completely set up netdevice. Now
+# do it the other way around: enabling the L3 stats on an L2 netdevice, and only
+# then apply the L3 configuration.
+reapply_config()
+{
+	log_info "Reapplying configuration"
+
+	router_rp1_200_destroy
+
+	ip link add name $rp1.200 link $rp1 type vlan id 200
+	ip link set dev $rp1.200 addrgenmode none
+	ip stats set dev $rp1.200 l3_stats on
+	ip link set dev $rp1.200 addrgenmode eui64
+	ip link set dev $rp1.200 up
+	ip address add dev $rp1.200 192.0.2.2/28
+	ip address add dev $rp1.200 2001:db8:1::2/64
+}
+
+__test_stats_report()
+{
+	local dir=$1; shift
+	local prot=$1; shift
+
+	local a
+	local b
+
+	RET=0
+
+	a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
+	send_packets_${dir}_${prot}
+	ip address flush dev $rp1.200
+	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+		       hw_stats_get l3_stats $rp1.200 ${dir} packets)
+	check_err $? "Traffic not reflected in the counter: $a -> $b"
+	log_test "Test ${dir} packets: stats pushed on loss of L3"
+
+	ip stats set dev $rp1.200 l3_stats off
+	ip link del dev $rp1.200
+	router_rp1_200_create
+}
+
+test_stats_report_rx()
+{
+	__test_stats_report rx ipv4
+}
+
+test_stats_report_tx()
+{
+	__test_stats_report tx ipv4
+}
+
+test_destroy_enabled()
+{
+	RET=0
+
+	ip link del dev $rp1.200
+	router_rp1_200_create
+
+	log_test "Destroy l3_stats-enabled netdev"
+}
+
+test_double_enable()
+{
+	RET=0
+	___test_stats rx ipv4 \
+		ip stats set dev $rp1.200 l3_stats on
+	log_test "Test stat retention across a spurious enablement"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info |
+	   jq '.[].info.l3_stats.used')
+[[ $used = true ]]
+check_err $? "hw_stats_info.used=$used"
+log_test "l3_stats offloaded"
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh
new file mode 100755
index 000000000000..a94d92e1abce
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test L3 stats on IP-in-IP GRE tunnel without key.
+
+# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
+# details.
+
+ALL_TESTS="
+	ping_ipv4
+	test_stats_rx
+	test_stats_tx
+"
+NUM_NETIFS=6
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/../../../net/forwarding/ipip_lib.sh
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	ol1mac=$(mac_get $ol1)
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create gre $ol1 $ul1
+	sw2_flat_create gre $ol2 $ul2
+	ip stats set dev g1a l3_stats on
+	ip stats set dev g2a l3_stats on
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip stats set dev g1a l3_stats off
+	ip stats set dev g2a l3_stats off
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+	forwarding_restore
+}
+
+ping_ipv4()
+{
+	RET=0
+
+	ping_test $h1 192.0.2.18 " gre flat"
+}
+
+send_packets_ipv4()
+{
+	# Send 21 packets instead of 20, because the first one might trap and go
+	# through the SW datapath, which might not bump the HW counter.
+	$MZ $h1 -c 21 -d 20msec -p 100 \
+	    -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+test_stats()
+{
+	local dev=$1; shift
+	local dir=$1; shift
+
+	local a
+	local b
+
+	RET=0
+
+	a=$(hw_stats_get l3_stats $dev $dir packets)
+	send_packets_ipv4
+	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+		     hw_stats_get l3_stats $dev $dir packets)
+	check_err $? "Traffic not reflected in the counter: $a -> $b"
+
+	log_test "Test $dir packets: $prot"
+}
+
+test_stats_tx()
+{
+	test_stats g1a tx
+}
+
+test_stats_rx()
+{
+	test_stats g2a rx
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
new file mode 100644
index 000000000000..62456df947bc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+
+#include <liburing.h>
+
+static long page_size;
+#define AREA_SIZE (8192 * page_size)
+#define SEND_SIZE (512 * 4096)
+#define min(a, b) \
+	({ \
+		typeof(a) _a = (a); \
+		typeof(b) _b = (b); \
+		_a < _b ? _a : _b; \
+	})
+#define min_t(t, a, b) \
+	({ \
+		t _ta = (a); \
+		t _tb = (b); \
+		min(_ta, _tb); \
+	})
+
+#define ALIGN_UP(v, align) (((v) + (align) - 1) & ~((align) - 1))
+
+static int cfg_server;
+static int cfg_client;
+static int cfg_port = 8000;
+static int cfg_payload_len;
+static const char *cfg_ifname;
+static int cfg_queue_id = -1;
+static bool cfg_oneshot;
+static int cfg_oneshot_recvs;
+static int cfg_send_size = SEND_SIZE;
+static struct sockaddr_in6 cfg_addr;
+
+static char *payload;
+static void *area_ptr;
+static void *ring_ptr;
+static size_t ring_size;
+static struct io_uring_zcrx_rq rq_ring;
+static unsigned long area_token;
+static int connfd;
+static bool stop;
+static size_t received;
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+	int ret;
+
+	sin6->sin6_family = AF_INET6;
+	sin6->sin6_port = htons(port);
+
+	ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+	if (ret != 1) {
+		/* fallback to plain IPv4 */
+		ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+		if (ret != 1)
+			return -1;
+
+		/* add ::ffff prefix */
+		sin6->sin6_addr.s6_addr32[0] = 0;
+		sin6->sin6_addr.s6_addr32[1] = 0;
+		sin6->sin6_addr.s6_addr16[4] = 0;
+		sin6->sin6_addr.s6_addr16[5] = 0xffff;
+	}
+
+	return 0;
+}
+
+static inline size_t get_refill_ring_size(unsigned int rq_entries)
+{
+	size_t size;
+
+	ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe);
+	/* add space for the header (head/tail/etc.) */
+	ring_size += page_size;
+	return ALIGN_UP(ring_size, page_size);
+}
+
+static void setup_zcrx(struct io_uring *ring)
+{
+	unsigned int ifindex;
+	unsigned int rq_entries = 4096;
+	int ret;
+
+	ifindex = if_nametoindex(cfg_ifname);
+	if (!ifindex)
+		error(1, 0, "bad interface name: %s", cfg_ifname);
+
+	area_ptr = mmap(NULL,
+			AREA_SIZE,
+			PROT_READ | PROT_WRITE,
+			MAP_ANONYMOUS | MAP_PRIVATE,
+			0,
+			0);
+	if (area_ptr == MAP_FAILED)
+		error(1, 0, "mmap(): zero copy area");
+
+	ring_size = get_refill_ring_size(rq_entries);
+	ring_ptr = mmap(NULL,
+			ring_size,
+			PROT_READ | PROT_WRITE,
+			MAP_ANONYMOUS | MAP_PRIVATE,
+			0,
+			0);
+
+	struct io_uring_region_desc region_reg = {
+		.size = ring_size,
+		.user_addr = (__u64)(unsigned long)ring_ptr,
+		.flags = IORING_MEM_REGION_TYPE_USER,
+	};
+
+	struct io_uring_zcrx_area_reg area_reg = {
+		.addr = (__u64)(unsigned long)area_ptr,
+		.len = AREA_SIZE,
+		.flags = 0,
+	};
+
+	struct io_uring_zcrx_ifq_reg reg = {
+		.if_idx = ifindex,
+		.if_rxq = cfg_queue_id,
+		.rq_entries = rq_entries,
+		.area_ptr = (__u64)(unsigned long)&area_reg,
+		.region_ptr = (__u64)(unsigned long)&region_reg,
+	};
+
+	ret = io_uring_register_ifq(ring, &reg);
+	if (ret)
+		error(1, 0, "io_uring_register_ifq(): %d", ret);
+
+	rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head);
+	rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail);
+	rq_ring.rqes = (struct io_uring_zcrx_rqe *)((char *)ring_ptr + reg.offsets.rqes);
+	rq_ring.rq_tail = 0;
+	rq_ring.ring_entries = reg.rq_entries;
+
+	area_token = area_reg.rq_area_token;
+}
+
+static void add_accept(struct io_uring *ring, int sockfd)
+{
+	struct io_uring_sqe *sqe;
+
+	sqe = io_uring_get_sqe(ring);
+
+	io_uring_prep_accept(sqe, sockfd, NULL, NULL, 0);
+	sqe->user_data = 1;
+}
+
+static void add_recvzc(struct io_uring *ring, int sockfd)
+{
+	struct io_uring_sqe *sqe;
+
+	sqe = io_uring_get_sqe(ring);
+
+	io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, 0, 0);
+	sqe->ioprio |= IORING_RECV_MULTISHOT;
+	sqe->user_data = 2;
+}
+
+static void add_recvzc_oneshot(struct io_uring *ring, int sockfd, size_t len)
+{
+	struct io_uring_sqe *sqe;
+
+	sqe = io_uring_get_sqe(ring);
+
+	io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, len, 0);
+	sqe->ioprio |= IORING_RECV_MULTISHOT;
+	sqe->user_data = 2;
+}
+
+static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+	if (cqe->res < 0)
+		error(1, 0, "accept()");
+	if (connfd)
+		error(1, 0, "Unexpected second connection");
+
+	connfd = cqe->res;
+	if (cfg_oneshot)
+		add_recvzc_oneshot(ring, connfd, page_size);
+	else
+		add_recvzc(ring, connfd);
+}
+
+static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+	unsigned rq_mask = rq_ring.ring_entries - 1;
+	struct io_uring_zcrx_cqe *rcqe;
+	struct io_uring_zcrx_rqe *rqe;
+	struct io_uring_sqe *sqe;
+	uint64_t mask;
+	char *data;
+	ssize_t n;
+	int i;
+
+	if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs == 0) {
+		stop = true;
+		return;
+	}
+
+	if (cqe->res < 0)
+		error(1, 0, "recvzc(): %d", cqe->res);
+
+	if (cfg_oneshot) {
+		if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) {
+			add_recvzc_oneshot(ring, connfd, page_size);
+			cfg_oneshot_recvs--;
+		}
+	} else if (!(cqe->flags & IORING_CQE_F_MORE)) {
+		add_recvzc(ring, connfd);
+	}
+
+	rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1);
+
+	n = cqe->res;
+	mask = (1ULL << IORING_ZCRX_AREA_SHIFT) - 1;
+	data = (char *)area_ptr + (rcqe->off & mask);
+
+	for (i = 0; i < n; i++) {
+		if (*(data + i) != payload[(received + i)])
+			error(1, 0, "payload mismatch at %d", i);
+	}
+	received += n;
+
+	rqe = &rq_ring.rqes[(rq_ring.rq_tail & rq_mask)];
+	rqe->off = (rcqe->off & ~IORING_ZCRX_AREA_MASK) | area_token;
+	rqe->len = cqe->res;
+	io_uring_smp_store_release(rq_ring.ktail, ++rq_ring.rq_tail);
+}
+
+static void server_loop(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	unsigned int count = 0;
+	unsigned int head;
+	int i, ret;
+
+	io_uring_submit_and_wait(ring, 1);
+
+	io_uring_for_each_cqe(ring, head, cqe) {
+		if (cqe->user_data == 1)
+			process_accept(ring, cqe);
+		else if (cqe->user_data == 2)
+			process_recvzc(ring, cqe);
+		else
+			error(1, 0, "unknown cqe");
+		count++;
+	}
+	io_uring_cq_advance(ring, count);
+}
+
+static void run_server(void)
+{
+	unsigned int flags = 0;
+	struct io_uring ring;
+	int fd, enable, ret;
+	uint64_t tstop;
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (fd == -1)
+		error(1, 0, "socket()");
+
+	enable = 1;
+	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int));
+	if (ret < 0)
+		error(1, 0, "setsockopt(SO_REUSEADDR)");
+
+	ret = bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+	if (ret < 0)
+		error(1, 0, "bind()");
+
+	if (listen(fd, 1024) < 0)
+		error(1, 0, "listen()");
+
+	flags |= IORING_SETUP_COOP_TASKRUN;
+	flags |= IORING_SETUP_SINGLE_ISSUER;
+	flags |= IORING_SETUP_DEFER_TASKRUN;
+	flags |= IORING_SETUP_SUBMIT_ALL;
+	flags |= IORING_SETUP_CQE32;
+
+	io_uring_queue_init(512, &ring, flags);
+
+	setup_zcrx(&ring);
+
+	add_accept(&ring, fd);
+
+	tstop = gettimeofday_ms() + 5000;
+	while (!stop && gettimeofday_ms() < tstop)
+		server_loop(&ring);
+
+	if (!stop)
+		error(1, 0, "test failed\n");
+}
+
+static void run_client(void)
+{
+	ssize_t to_send = cfg_send_size;
+	ssize_t sent = 0;
+	ssize_t chunk, res;
+	int fd;
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (fd == -1)
+		error(1, 0, "socket()");
+
+	if (connect(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)))
+		error(1, 0, "connect()");
+
+	while (to_send) {
+		void *src = &payload[sent];
+
+		chunk = min_t(ssize_t, cfg_payload_len, to_send);
+		res = send(fd, src, chunk, 0);
+		if (res < 0)
+			error(1, 0, "send(): %zd", sent);
+		sent += res;
+		to_send -= res;
+	}
+
+	close(fd);
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s (-4|-6) (-s|-c) -h<server_ip> -p<port> "
+		    "-l<payload_size> -i<ifname> -q<rxq_id>", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	const int max_payload_len = SEND_SIZE -
+				    sizeof(struct ipv6hdr) -
+				    sizeof(struct tcphdr) -
+				    40 /* max tcp options */;
+	struct sockaddr_in6 *addr6 = (void *) &cfg_addr;
+	char *addr = NULL;
+	int ret;
+	int c;
+
+	if (argc <= 1)
+		usage(argv[0]);
+	cfg_payload_len = max_payload_len;
+
+	while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) {
+		switch (c) {
+		case 's':
+			if (cfg_client)
+				error(1, 0, "Pass one of -s or -c");
+			cfg_server = 1;
+			break;
+		case 'c':
+			if (cfg_server)
+				error(1, 0, "Pass one of -s or -c");
+			cfg_client = 1;
+			break;
+		case 'h':
+			addr = optarg;
+			break;
+		case 'p':
+			cfg_port = strtoul(optarg, NULL, 0);
+			break;
+		case 'l':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'i':
+			cfg_ifname = optarg;
+			break;
+		case 'q':
+			cfg_queue_id = strtoul(optarg, NULL, 0);
+			break;
+		case 'o': {
+			cfg_oneshot = true;
+			cfg_oneshot_recvs = strtoul(optarg, NULL, 0);
+			break;
+		}
+		case 'z':
+			cfg_send_size = strtoul(optarg, NULL, 0);
+			break;
+		}
+	}
+
+	if (cfg_server && addr)
+		error(1, 0, "Receiver cannot have -h specified");
+
+	memset(addr6, 0, sizeof(*addr6));
+	addr6->sin6_family = AF_INET6;
+	addr6->sin6_port = htons(cfg_port);
+	addr6->sin6_addr = in6addr_any;
+	if (addr) {
+		ret = parse_address(addr, cfg_port, addr6);
+		if (ret)
+			error(1, 0, "receiver address parse error: %s", addr);
+	}
+
+	if (cfg_payload_len > max_payload_len)
+		error(1, 0, "-l: payload exceeds max (%d)", max_payload_len);
+}
+
+int main(int argc, char **argv)
+{
+	const char *cfg_test = argv[argc - 1];
+	int i;
+
+	page_size = sysconf(_SC_PAGESIZE);
+	if (page_size < 0)
+		return 1;
+
+	if (posix_memalign((void **)&payload, page_size, SEND_SIZE))
+		return 1;
+
+	parse_opts(argc, argv);
+
+	for (i = 0; i < SEND_SIZE; i++)
+		payload[i] = 'a' + (i % 26);
+
+	if (cfg_server)
+		run_server();
+	else if (cfg_client)
+		run_client();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
new file mode 100755
index 000000000000..712c806508b5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import re
+from os import path
+from lib.py import ksft_run, ksft_exit, KsftSkipEx
+from lib.py import NetDrvEpEnv
+from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
+
+
+def _get_current_settings(cfg):
+    output = ethtool(f"-g {cfg.ifname}", json=True)[0]
+    return (output['rx'], output['hds-thresh'])
+
+
+def _get_combined_channels(cfg):
+    output = ethtool(f"-l {cfg.ifname}").stdout
+    values = re.findall(r'Combined:\s+(\d+)', output)
+    return int(values[1])
+
+
+def _create_rss_ctx(cfg, chan):
+    output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout
+    values = re.search(r'New RSS context is (\d+)', output).group(1)
+    ctx_id = int(values)
+    return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}"))
+
+
+def _set_flow_rule(cfg, port, chan):
+    output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout
+    values = re.search(r'ID (\d+)', output).group(1)
+    return int(values)
+
+
+def _set_flow_rule_rss(cfg, port, ctx_id):
+    output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout
+    values = re.search(r'ID (\d+)', output).group(1)
+    return int(values)
+
+
+def test_zcrx(cfg) -> None:
+    cfg.require_ipver('6')
+
+    combined_chans = _get_combined_channels(cfg)
+    if combined_chans < 2:
+        raise KsftSkipEx('at least 2 combined channels required')
+    (rx_ring, hds_thresh) = _get_current_settings(cfg)
+    port = rand_port()
+
+    ethtool(f"-G {cfg.ifname} tcp-data-split on")
+    defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+    ethtool(f"-G {cfg.ifname} hds-thresh 0")
+    defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+    ethtool(f"-G {cfg.ifname} rx 64")
+    defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+    ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+    defer(ethtool, f"-X {cfg.ifname} default")
+
+    flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+    defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+    rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+    tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+    with bkg(rx_cmd, exit_wait=True):
+        wait_port_listen(port, proto="tcp")
+        cmd(tx_cmd, host=cfg.remote)
+
+
+def test_zcrx_oneshot(cfg) -> None:
+    cfg.require_ipver('6')
+
+    combined_chans = _get_combined_channels(cfg)
+    if combined_chans < 2:
+        raise KsftSkipEx('at least 2 combined channels required')
+    (rx_ring, hds_thresh) = _get_current_settings(cfg)
+    port = rand_port()
+
+    ethtool(f"-G {cfg.ifname} tcp-data-split on")
+    defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+    ethtool(f"-G {cfg.ifname} hds-thresh 0")
+    defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+    ethtool(f"-G {cfg.ifname} rx 64")
+    defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+    ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+    defer(ethtool, f"-X {cfg.ifname} default")
+
+    flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+    defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+    rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+    tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384"
+    with bkg(rx_cmd, exit_wait=True):
+        wait_port_listen(port, proto="tcp")
+        cmd(tx_cmd, host=cfg.remote)
+
+
+def test_zcrx_rss(cfg) -> None:
+    cfg.require_ipver('6')
+
+    combined_chans = _get_combined_channels(cfg)
+    if combined_chans < 2:
+        raise KsftSkipEx('at least 2 combined channels required')
+    (rx_ring, hds_thresh) = _get_current_settings(cfg)
+    port = rand_port()
+
+    ethtool(f"-G {cfg.ifname} tcp-data-split on")
+    defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+    ethtool(f"-G {cfg.ifname} hds-thresh 0")
+    defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+    ethtool(f"-G {cfg.ifname} rx 64")
+    defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+    ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+    defer(ethtool, f"-X {cfg.ifname} default")
+
+    (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
+    flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
+    defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+    rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+    tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+    with bkg(rx_cmd, exit_wait=True):
+        wait_port_listen(port, proto="tcp")
+        cmd(tx_cmd, host=cfg.remote)
+
+
+def main() -> None:
+    with NetDrvEpEnv(__file__) as cfg:
+        cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx")
+        cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+        ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py
new file mode 100755
index 000000000000..0699d6a8b4e2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/irq.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_ge, ksft_eq
+from lib.py import KsftSkipEx
+from lib.py import ksft_disruptive
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import NetDrvEnv
+from lib.py import cmd, ip, defer
+
+
+def read_affinity(irq) -> str:
+    with open(f'/proc/irq/{irq}/smp_affinity', 'r') as fp:
+        return fp.read().lstrip("0,").strip()
+
+
+def write_affinity(irq, what) -> str:
+    if what != read_affinity(irq):
+        with open(f'/proc/irq/{irq}/smp_affinity', 'w') as fp:
+            fp.write(what)
+
+
+def check_irqs_reported(cfg) -> None:
+    """ Check that device reports IRQs for NAPI instances """
+    napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+    irqs = sum(['irq' in x for x in napis])
+
+    ksft_ge(irqs, 1)
+    ksft_eq(irqs, len(napis))
+
+
+def _check_reconfig(cfg, reconfig_cb) -> None:
+    napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+    for n in reversed(napis):
+        if 'irq' in n:
+            break
+    else:
+        raise KsftSkipEx(f"Device has no NAPI with IRQ attribute (#napis: {len(napis)}")
+
+    old = read_affinity(n['irq'])
+    # pick an affinity that's not the current one
+    new = "3" if old != "3" else "5"
+    write_affinity(n['irq'], new)
+    defer(write_affinity, n['irq'], old)
+
+    reconfig_cb(cfg)
+
+    ksft_eq(read_affinity(n['irq']), new, comment="IRQ affinity changed after reconfig")
+
+
+def check_reconfig_queues(cfg) -> None:
+    def reconfig(cfg) -> None:
+        channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+        if channels['combined-count'] == 0:
+            rx_type = 'rx'
+        else:
+            rx_type = 'combined'
+        cur_queue_cnt = channels[f'{rx_type}-count']
+        max_queue_cnt = channels[f'{rx_type}-max']
+
+        cmd(f"ethtool -L {cfg.ifname} {rx_type} 1")
+        cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
+        cmd(f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
+
+    _check_reconfig(cfg, reconfig)
+
+
+def check_reconfig_xdp(cfg) -> None:
+    def reconfig(cfg) -> None:
+        ip(f"link set dev %s xdp obj %s sec xdp" %
+           (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
+        ip(f"link set dev %s xdp off" % cfg.ifname)
+
+    _check_reconfig(cfg, reconfig)
+
+
+@ksft_disruptive
+def check_down(cfg) -> None:
+    def reconfig(cfg) -> None:
+        ip("link set dev %s down" % cfg.ifname)
+        ip("link set dev %s up" % cfg.ifname)
+
+    _check_reconfig(cfg, reconfig)
+
+
+def main() -> None:
+    with NetDrvEnv(__file__, nsim_test=False) as cfg:
+        cfg.ethnl = EthtoolFamily()
+        cfg.netnl = NetdevFamily()
+
+        ksft_run([check_irqs_reported, check_reconfig_queues,
+                  check_reconfig_xdp, check_down],
+                 args=(cfg, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
new file mode 100644
index 000000000000..766bfc4ad842
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Driver test environment (hardware-only tests).
+NetDrvEnv and NetDrvEpEnv are the main environment classes.
+Former is for local host only tests, latter creates / connects
+to a remote endpoint. See NIPA wiki for more information about
+running and writing driver tests.
+"""
+
+import sys
+from pathlib import Path
+
+KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()
+
+try:
+    sys.path.append(KSFT_DIR.as_posix())
+
+    # Import one by one to avoid pylint false positives
+    from net.lib.py import NetNS, NetNSEnter, NetdevSimDev
+    from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+        NlError, RtnlFamily, DevlinkFamily, PSPFamily
+    from net.lib.py import CmdExitFailure
+    from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
+        fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
+    from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+    from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+        ksft_setup, ksft_variants, KsftNamedVariant
+    from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+        ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
+    from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner
+    from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv
+
+    __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
+               "EthtoolFamily", "NetdevFamily", "NetshaperFamily",
+               "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily",
+               "CmdExitFailure",
+               "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
+               "fd_read_timeout", "ip", "rand_port",
+               "wait_port_listen", "wait_file",
+               "KsftSkipEx", "KsftFailEx", "KsftXfailEx",
+               "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
+               "ksft_setup", "ksft_variants", "KsftNamedVariant",
+               "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
+               "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
+               "ksft_not_none", "ksft_not_none",
+               "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+               "Iperf3Runner"]
+except ModuleNotFoundError as e:
+    print("Failed importing `net` library from kernel sources")
+    print(str(e))
+    sys.exit(4)
diff --git a/tools/testing/selftests/drivers/net/hw/loopback.sh b/tools/testing/selftests/drivers/net/hw/loopback.sh
new file mode 100755
index 000000000000..5acc3ff820aa
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/loopback.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+ALL_TESTS="loopback_test"
+NUM_NETIFS=2
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+	tc qdisc add dev $h1 clsact
+}
+
+h1_destroy()
+{
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2
+}
+
+loopback_test()
+{
+	RET=0
+
+	tc filter add dev $h1 ingress protocol arp pref 1 handle 101 flower \
+		skip_hw arp_op reply arp_tip 192.0.2.1 action drop
+
+	$MZ $h1 -c 1 -t arp -q
+
+	tc_check_packets "dev $h1 ingress" 101 1
+	check_fail $? "Matched on a filter without loopback setup"
+
+	ethtool -K $h1 loopback on
+	check_err $? "Failed to enable loopback"
+
+	setup_wait_dev $h1
+
+	$MZ $h1 -c 1 -t arp -q
+
+	tc_check_packets "dev $h1 ingress" 101 1
+	check_err $? "Did not match on filter with loopback"
+
+	ethtool -K $h1 loopback off
+	check_err $? "Failed to disable loopback"
+
+	$MZ $h1 -c 1 -t arp -q
+
+	tc_check_packets "dev $h1 ingress" 101 2
+	check_fail $? "Matched on a filter after loopback was removed"
+
+	tc filter del dev $h1 ingress protocol arp pref 1 handle 101 flower
+
+	log_test "loopback"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	if ethtool -k $h1 | grep loopback | grep -q fixed; then
+		log_test "SKIP: dev $h1 does not support loopback feature"
+		exit $ksft_skip
+	fi
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
new file mode 100644
index 000000000000..3288ed04ce08
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -0,0 +1,1524 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tcpdevmem netcat. Works similarly to netcat but does device memory TCP
+ * instead of regular TCP. Uses udmabuf to mock a dmabuf provider.
+ *
+ * Usage:
+ *
+ *     On server:
+ *     ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201
+ *
+ *     On client:
+ *     echo -n "hello\nworld" | \
+ *		ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
+ *
+ * Note this is compatible with regular netcat. i.e. the sender or receiver can
+ * be replaced with regular netcat to test the RX or TX path in isolation.
+ *
+ * Test data validation (devmem TCP on RX only):
+ *
+ *     On server:
+ *     ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7
+ *
+ *     On client:
+ *     yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
+ *             head -c 1G | \
+ *             nc <server IP> 5201 -p 5201
+ *
+ * Test data validation (devmem TCP on RX and TX, validation happens on RX):
+ *
+ *	On server:
+ *	ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1
+ *
+ *	On client:
+ *	yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \
+ *		head -c 1M | \
+ *		ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
+ */
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <linux/uio.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#define __iovec_defined
+#include <fcntl.h>
+#include <malloc.h>
+#include <error.h>
+#include <poll.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+
+#include <linux/memfd.h>
+#include <linux/dma-buf.h>
+#include <linux/errqueue.h>
+#include <linux/udmabuf.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+#include <linux/netdev.h>
+#include <linux/ethtool_netlink.h>
+#include <time.h>
+#include <net/if.h>
+
+#include "netdev-user.h"
+#include "ethtool-user.h"
+#include <ynl.h>
+
+#define PAGE_SHIFT 12
+#define TEST_PREFIX "ncdevmem"
+#define NUM_PAGES 16000
+
+#ifndef MSG_SOCK_DEVMEM
+#define MSG_SOCK_DEVMEM 0x2000000
+#endif
+
+#define MAX_IOV 1024
+
+static size_t max_chunk;
+static char *server_ip;
+static char *client_ip;
+static char *port;
+static size_t do_validation;
+static int start_queue = -1;
+static int num_queues = -1;
+static char *ifname;
+static unsigned int ifindex;
+static unsigned int dmabuf_id;
+static uint32_t tx_dmabuf_id;
+static int waittime_ms = 500;
+
+/* System state loaded by current_config_load() */
+#define MAX_FLOWS	8
+static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, };
+
+struct memory_buffer {
+	int fd;
+	size_t size;
+
+	int devfd;
+	int memfd;
+	char *buf_mem;
+};
+
+struct memory_provider {
+	struct memory_buffer *(*alloc)(size_t size);
+	void (*free)(struct memory_buffer *ctx);
+	void (*memcpy_to_device)(struct memory_buffer *dst, size_t off,
+				 void *src, int n);
+	void (*memcpy_from_device)(void *dst, struct memory_buffer *src,
+				   size_t off, int n);
+};
+
+static void pr_err(const char *fmt, ...)
+{
+	va_list args;
+
+	fprintf(stderr, "%s: ", TEST_PREFIX);
+
+	va_start(args, fmt);
+	vfprintf(stderr, fmt, args);
+	va_end(args);
+
+	if (errno != 0)
+		fprintf(stderr, ": %s", strerror(errno));
+	fprintf(stderr, "\n");
+}
+
+static struct memory_buffer *udmabuf_alloc(size_t size)
+{
+	struct udmabuf_create create;
+	struct memory_buffer *ctx;
+	int ret;
+
+	ctx = malloc(sizeof(*ctx));
+	if (!ctx)
+		return NULL;
+
+	ctx->size = size;
+
+	ctx->devfd = open("/dev/udmabuf", O_RDWR);
+	if (ctx->devfd < 0) {
+		pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]");
+		goto err_free_ctx;
+	}
+
+	ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
+	if (ctx->memfd < 0) {
+		pr_err("[skip,no-memfd]");
+		goto err_close_dev;
+	}
+
+	ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK);
+	if (ret < 0) {
+		pr_err("[skip,fcntl-add-seals]");
+		goto err_close_memfd;
+	}
+
+	ret = ftruncate(ctx->memfd, size);
+	if (ret == -1) {
+		pr_err("[FAIL,memfd-truncate]");
+		goto err_close_memfd;
+	}
+
+	memset(&create, 0, sizeof(create));
+
+	create.memfd = ctx->memfd;
+	create.offset = 0;
+	create.size = size;
+	ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create);
+	if (ctx->fd < 0) {
+		pr_err("[FAIL, create udmabuf]");
+		goto err_close_fd;
+	}
+
+	ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
+			    ctx->fd, 0);
+	if (ctx->buf_mem == MAP_FAILED) {
+		pr_err("[FAIL, map udmabuf]");
+		goto err_close_fd;
+	}
+
+	return ctx;
+
+err_close_fd:
+	close(ctx->fd);
+err_close_memfd:
+	close(ctx->memfd);
+err_close_dev:
+	close(ctx->devfd);
+err_free_ctx:
+	free(ctx);
+	return NULL;
+}
+
+static void udmabuf_free(struct memory_buffer *ctx)
+{
+	munmap(ctx->buf_mem, ctx->size);
+	close(ctx->fd);
+	close(ctx->memfd);
+	close(ctx->devfd);
+	free(ctx);
+}
+
+static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off,
+				     void *src, int n)
+{
+	struct dma_buf_sync sync = {};
+
+	sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE;
+	ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+
+	memcpy(dst->buf_mem + off, src, n);
+
+	sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE;
+	ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+}
+
+static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
+				       size_t off, int n)
+{
+	struct dma_buf_sync sync = {};
+
+	sync.flags = DMA_BUF_SYNC_START;
+	ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync);
+
+	memcpy(dst, src->buf_mem + off, n);
+
+	sync.flags = DMA_BUF_SYNC_END;
+	ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync);
+}
+
+static struct memory_provider udmabuf_memory_provider = {
+	.alloc = udmabuf_alloc,
+	.free = udmabuf_free,
+	.memcpy_to_device = udmabuf_memcpy_to_device,
+	.memcpy_from_device = udmabuf_memcpy_from_device,
+};
+
+static struct memory_provider *provider = &udmabuf_memory_provider;
+
+static void print_nonzero_bytes(void *ptr, size_t size)
+{
+	unsigned char *p = ptr;
+	unsigned int i;
+
+	for (i = 0; i < size; i++)
+		putchar(p[i]);
+}
+
+int validate_buffer(void *line, size_t size)
+{
+	static unsigned char seed = 1;
+	unsigned char *ptr = line;
+	unsigned char expected;
+	static int errors;
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		expected = seed ? seed : '\n';
+		if (ptr[i] != expected) {
+			fprintf(stderr,
+				"Failed validation: expected=%u, actual=%u, index=%lu\n",
+				expected, ptr[i], i);
+			errors++;
+			if (errors > 20) {
+				pr_err("validation failed");
+				return -1;
+			}
+		}
+		seed++;
+		if (seed == do_validation)
+			seed = 0;
+	}
+
+	fprintf(stdout, "Validated buffer\n");
+	return 0;
+}
+
+static int
+__run_command(char *out, size_t outlen, const char *cmd, va_list args)
+{
+	char command[256];
+	FILE *fp;
+
+	vsnprintf(command, sizeof(command), cmd, args);
+
+	fprintf(stderr, "Running: %s\n", command);
+	fp = popen(command, "r");
+	if (!fp)
+		return -1;
+	if (out) {
+		size_t len;
+
+		if (!fgets(out, outlen, fp))
+			return -1;
+
+		/* Remove trailing newline if present */
+		len = strlen(out);
+		if (len && out[len - 1] == '\n')
+			out[len - 1] = '\0';
+	}
+	return pclose(fp);
+}
+
+static int run_command(const char *cmd, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, cmd);
+	ret = __run_command(NULL, 0, cmd, args);
+	va_end(args);
+
+	return ret;
+}
+
+static int ethtool_add_flow(const char *format, ...)
+{
+	char local_output[256], cmd[256];
+	const char *id_start;
+	int flow_idx, ret;
+	char *endptr;
+	long flow_id;
+	va_list args;
+
+	for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++)
+		if (ntuple_ids[flow_idx] == -1)
+			break;
+	if (flow_idx == MAX_FLOWS) {
+		fprintf(stderr, "Error: too many flows\n");
+		return -1;
+	}
+
+	snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format);
+
+	va_start(args, format);
+	ret = __run_command(local_output, sizeof(local_output), cmd, args);
+	va_end(args);
+
+	if (ret != 0)
+		return ret;
+
+	/* Extract the ID from the output */
+	id_start = strstr(local_output, "Added rule with ID ");
+	if (!id_start)
+		return -1;
+	id_start += strlen("Added rule with ID ");
+
+	flow_id = strtol(id_start, &endptr, 10);
+	if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX)
+		return -1;
+
+	fprintf(stderr, "Added flow rule with ID %ld\n", flow_id);
+	ntuple_ids[flow_idx] = flow_id;
+	return flow_id;
+}
+
+static int rxq_num(int ifindex)
+{
+	struct ethtool_channels_get_req *req;
+	struct ethtool_channels_get_rsp *rsp;
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+	int num = -1;
+
+	ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+	if (!ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return -1;
+	}
+
+	req = ethtool_channels_get_req_alloc();
+	ethtool_channels_get_req_set_header_dev_index(req, ifindex);
+	rsp = ethtool_channels_get(ys, req);
+	if (rsp)
+		num = rsp->rx_count + rsp->combined_count;
+	ethtool_channels_get_req_free(req);
+	ethtool_channels_get_rsp_free(rsp);
+
+	ynl_sock_destroy(ys);
+
+	return num;
+}
+
+static void reset_flow_steering(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_FLOWS; i++) {
+		if (ntuple_ids[i] == -1)
+			continue;
+		run_command("ethtool -N %s delete %d",
+			    ifname, ntuple_ids[i]);
+		ntuple_ids[i] = -1;
+	}
+}
+
+static const char *tcp_data_split_str(int val)
+{
+	switch (val) {
+	case 0:
+		return "off";
+	case 1:
+		return "auto";
+	case 2:
+		return "on";
+	default:
+		return "?";
+	}
+}
+
+static struct ethtool_rings_get_rsp *get_ring_config(void)
+{
+	struct ethtool_rings_get_req *get_req;
+	struct ethtool_rings_get_rsp *get_rsp;
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+
+	ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+	if (!ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return NULL;
+	}
+
+	get_req = ethtool_rings_get_req_alloc();
+	ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+	get_rsp = ethtool_rings_get(ys, get_req);
+	ethtool_rings_get_req_free(get_req);
+
+	ynl_sock_destroy(ys);
+
+	return get_rsp;
+}
+
+static void restore_ring_config(const struct ethtool_rings_get_rsp *config)
+{
+	struct ethtool_rings_get_req *get_req;
+	struct ethtool_rings_get_rsp *get_rsp;
+	struct ethtool_rings_set_req *req;
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+	int ret;
+
+	if (!config)
+		return;
+
+	ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+	if (!ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return;
+	}
+
+	req = ethtool_rings_set_req_alloc();
+	ethtool_rings_set_req_set_header_dev_index(req, ifindex);
+	ethtool_rings_set_req_set_tcp_data_split(req,
+						ETHTOOL_TCP_DATA_SPLIT_UNKNOWN);
+	if (config->_present.hds_thresh)
+		ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh);
+
+	ret = ethtool_rings_set(ys, req);
+	if (ret < 0)
+		fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg);
+
+	get_req = ethtool_rings_get_req_alloc();
+	ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+	get_rsp = ethtool_rings_get(ys, get_req);
+	ethtool_rings_get_req_free(get_req);
+
+	/* use explicit value if UKNOWN didn't give us the previous */
+	if (get_rsp->tcp_data_split != config->tcp_data_split) {
+		ethtool_rings_set_req_set_tcp_data_split(req,
+							config->tcp_data_split);
+		ret = ethtool_rings_set(ys, req);
+		if (ret < 0)
+			fprintf(stderr, "YNL restoring expl HDS cfg: %s\n",
+				ys->err.msg);
+	}
+
+	ethtool_rings_get_rsp_free(get_rsp);
+	ethtool_rings_set_req_free(req);
+
+	ynl_sock_destroy(ys);
+}
+
+static int
+configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on)
+{
+	struct ethtool_rings_get_req *get_req;
+	struct ethtool_rings_get_rsp *get_rsp;
+	struct ethtool_rings_set_req *req;
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+	int ret;
+
+	ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+	if (!ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return -1;
+	}
+
+	req = ethtool_rings_set_req_alloc();
+	ethtool_rings_set_req_set_header_dev_index(req, ifindex);
+	if (on) {
+		ethtool_rings_set_req_set_tcp_data_split(req,
+						ETHTOOL_TCP_DATA_SPLIT_ENABLED);
+		if (old->_present.hds_thresh)
+			ethtool_rings_set_req_set_hds_thresh(req, 0);
+	} else {
+		ethtool_rings_set_req_set_tcp_data_split(req,
+						ETHTOOL_TCP_DATA_SPLIT_UNKNOWN);
+	}
+	ret = ethtool_rings_set(ys, req);
+	if (ret < 0)
+		fprintf(stderr, "YNL failed: %s\n", ys->err.msg);
+	ethtool_rings_set_req_free(req);
+
+	if (ret == 0) {
+		get_req = ethtool_rings_get_req_alloc();
+		ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+		get_rsp = ethtool_rings_get(ys, get_req);
+		ethtool_rings_get_req_free(get_req);
+		if (get_rsp)
+			fprintf(stderr, "TCP header split: %s\n",
+				tcp_data_split_str(get_rsp->tcp_data_split));
+		ethtool_rings_get_rsp_free(get_rsp);
+	}
+
+	ynl_sock_destroy(ys);
+
+	return ret;
+}
+
+static int configure_rss(void)
+{
+	return run_command("ethtool -X %s equal %d >&2", ifname, start_queue);
+}
+
+static void reset_rss(void)
+{
+	run_command("ethtool -X %s default >&2", ifname, start_queue);
+}
+
+static int check_changing_channels(unsigned int rx, unsigned int tx)
+{
+	struct ethtool_channels_get_req *gchan;
+	struct ethtool_channels_set_req *schan;
+	struct ethtool_channels_get_rsp *chan;
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+	int ret;
+
+	fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx);
+
+	ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+	if (!ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return -1;
+	}
+
+	gchan = ethtool_channels_get_req_alloc();
+	if (!gchan) {
+		ret = -1;
+		goto exit_close_sock;
+	}
+
+	ethtool_channels_get_req_set_header_dev_index(gchan, ifindex);
+	chan = ethtool_channels_get(ys, gchan);
+	ethtool_channels_get_req_free(gchan);
+	if (!chan) {
+		fprintf(stderr, "YNL get channels: %s\n", ys->err.msg);
+		ret = -1;
+		goto exit_close_sock;
+	}
+
+	schan =	ethtool_channels_set_req_alloc();
+	if (!schan) {
+		ret = -1;
+		goto exit_free_chan;
+	}
+
+	ethtool_channels_set_req_set_header_dev_index(schan, ifindex);
+
+	if (chan->_present.combined_count) {
+		if (chan->_present.rx_count || chan->_present.tx_count) {
+			ethtool_channels_set_req_set_rx_count(schan, 0);
+			ethtool_channels_set_req_set_tx_count(schan, 0);
+		}
+
+		if (rx == tx) {
+			ethtool_channels_set_req_set_combined_count(schan, rx);
+		} else if (rx > tx) {
+			ethtool_channels_set_req_set_combined_count(schan, tx);
+			ethtool_channels_set_req_set_rx_count(schan, rx - tx);
+		} else {
+			ethtool_channels_set_req_set_combined_count(schan, rx);
+			ethtool_channels_set_req_set_tx_count(schan, tx - rx);
+		}
+
+	} else if (chan->_present.rx_count) {
+		ethtool_channels_set_req_set_rx_count(schan, rx);
+		ethtool_channels_set_req_set_tx_count(schan, tx);
+	} else {
+		fprintf(stderr, "Error: device has neither combined nor rx channels\n");
+		ret = -1;
+		goto exit_free_schan;
+	}
+
+	ret = ethtool_channels_set(ys, schan);
+	if (ret) {
+		fprintf(stderr, "YNL set channels: %s\n", ys->err.msg);
+	} else {
+		/* We were expecting a failure, go back to previous settings */
+		ethtool_channels_set_req_set_combined_count(schan,
+							    chan->combined_count);
+		ethtool_channels_set_req_set_rx_count(schan, chan->rx_count);
+		ethtool_channels_set_req_set_tx_count(schan, chan->tx_count);
+
+		ret = ethtool_channels_set(ys, schan);
+		if (ret)
+			fprintf(stderr, "YNL un-setting channels: %s\n",
+				ys->err.msg);
+	}
+
+exit_free_schan:
+	ethtool_channels_set_req_free(schan);
+exit_free_chan:
+	ethtool_channels_get_rsp_free(chan);
+exit_close_sock:
+	ynl_sock_destroy(ys);
+
+	return ret;
+}
+
+static int configure_flow_steering(struct sockaddr_in6 *server_sin)
+{
+	const char *type = "tcp6";
+	const char *server_addr;
+	char buf[40];
+	int flow_id;
+
+	inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf));
+	server_addr = buf;
+
+	if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) {
+		type = "tcp4";
+		server_addr = strrchr(server_addr, ':') + 1;
+	}
+
+	/* Try configure 5-tuple */
+	flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d",
+				   type,
+				   client_ip ? "src-ip" : "",
+				   client_ip ?: "",
+				   server_addr,
+				   client_ip ? "src-port" : "",
+				   client_ip ? port : "",
+				   port, start_queue);
+	if (flow_id < 0) {
+		/* If that fails, try configure 3-tuple */
+		flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d",
+					   type, server_addr, port, start_queue);
+		if (flow_id < 0)
+			/* If that fails, return error */
+			return -1;
+	}
+
+	return 0;
+}
+
+static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+			 struct netdev_queue_id *queues,
+			 unsigned int n_queue_index, struct ynl_sock **ys)
+{
+	struct netdev_bind_rx_req *req = NULL;
+	struct netdev_bind_rx_rsp *rsp = NULL;
+	struct ynl_error yerr;
+
+	*ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+	if (!*ys) {
+		netdev_queue_id_free(queues);
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return -1;
+	}
+
+	req = netdev_bind_rx_req_alloc();
+	netdev_bind_rx_req_set_ifindex(req, ifindex);
+	netdev_bind_rx_req_set_fd(req, dmabuf_fd);
+	__netdev_bind_rx_req_set_queues(req, queues, n_queue_index);
+
+	rsp = netdev_bind_rx(*ys, req);
+	if (!rsp) {
+		perror("netdev_bind_rx");
+		goto err_close;
+	}
+
+	if (!rsp->_present.id) {
+		perror("id not present");
+		goto err_close;
+	}
+
+	fprintf(stderr, "got dmabuf id=%d\n", rsp->id);
+	dmabuf_id = rsp->id;
+
+	netdev_bind_rx_req_free(req);
+	netdev_bind_rx_rsp_free(rsp);
+
+	return 0;
+
+err_close:
+	fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+	netdev_bind_rx_req_free(req);
+	ynl_sock_destroy(*ys);
+	return -1;
+}
+
+static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+			 struct ynl_sock **ys)
+{
+	struct netdev_bind_tx_req *req = NULL;
+	struct netdev_bind_tx_rsp *rsp = NULL;
+	struct ynl_error yerr;
+
+	*ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+	if (!*ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return -1;
+	}
+
+	req = netdev_bind_tx_req_alloc();
+	netdev_bind_tx_req_set_ifindex(req, ifindex);
+	netdev_bind_tx_req_set_fd(req, dmabuf_fd);
+
+	rsp = netdev_bind_tx(*ys, req);
+	if (!rsp) {
+		perror("netdev_bind_tx");
+		goto err_close;
+	}
+
+	if (!rsp->_present.id) {
+		perror("id not present");
+		goto err_close;
+	}
+
+	fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id);
+	tx_dmabuf_id = rsp->id;
+
+	netdev_bind_tx_req_free(req);
+	netdev_bind_tx_rsp_free(rsp);
+
+	return 0;
+
+err_close:
+	fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+	netdev_bind_tx_req_free(req);
+	ynl_sock_destroy(*ys);
+	return -1;
+}
+
+static int enable_reuseaddr(int fd)
+{
+	int opt = 1;
+	int ret;
+
+	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+	if (ret) {
+		pr_err("SO_REUSEPORT failed");
+		return -1;
+	}
+
+	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+	if (ret) {
+		pr_err("SO_REUSEADDR failed");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+	int ret;
+
+	sin6->sin6_family = AF_INET6;
+	sin6->sin6_port = htons(port);
+
+	ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+	if (ret != 1) {
+		/* fallback to plain IPv4 */
+		ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+		if (ret != 1)
+			return -1;
+
+		/* add ::ffff prefix */
+		sin6->sin6_addr.s6_addr32[0] = 0;
+		sin6->sin6_addr.s6_addr32[1] = 0;
+		sin6->sin6_addr.s6_addr16[4] = 0;
+		sin6->sin6_addr.s6_addr16[5] = 0xffff;
+	}
+
+	return 0;
+}
+
+static struct netdev_queue_id *create_queues(void)
+{
+	struct netdev_queue_id *queues;
+	size_t i = 0;
+
+	queues = netdev_queue_id_alloc(num_queues);
+	for (i = 0; i < num_queues; i++) {
+		netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX);
+		netdev_queue_id_set_id(&queues[i], start_queue + i);
+	}
+
+	return queues;
+}
+
+static int do_server(struct memory_buffer *mem)
+{
+	struct ethtool_rings_get_rsp *ring_config;
+	char ctrl_data[sizeof(int) * 20000];
+	size_t non_page_aligned_frags = 0;
+	struct sockaddr_in6 client_addr;
+	struct sockaddr_in6 server_sin;
+	size_t page_aligned_frags = 0;
+	size_t total_received = 0;
+	socklen_t client_addr_len;
+	bool is_devmem = false;
+	char *tmp_mem = NULL;
+	struct ynl_sock *ys;
+	char iobuf[819200];
+	int ret, err = -1;
+	char buffer[256];
+	int socket_fd;
+	int client_fd;
+
+	ret = parse_address(server_ip, atoi(port), &server_sin);
+	if (ret < 0) {
+		pr_err("parse server address");
+		return -1;
+	}
+
+	ring_config = get_ring_config();
+	if (!ring_config) {
+		pr_err("Failed to get current ring configuration");
+		return -1;
+	}
+
+	if (configure_headersplit(ring_config, 1)) {
+		pr_err("Failed to enable TCP header split");
+		goto err_free_ring_config;
+	}
+
+	/* Configure RSS to divert all traffic from our devmem queues */
+	if (configure_rss()) {
+		pr_err("Failed to configure rss");
+		goto err_reset_headersplit;
+	}
+
+	/* Flow steer our devmem flows to start_queue */
+	if (configure_flow_steering(&server_sin)) {
+		pr_err("Failed to configure flow steering");
+		goto err_reset_rss;
+	}
+
+	if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) {
+		pr_err("Failed to bind");
+		goto err_reset_flow_steering;
+	}
+
+	tmp_mem = malloc(mem->size);
+	if (!tmp_mem)
+		goto err_unbind;
+
+	socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (socket_fd < 0) {
+		pr_err("Failed to create socket");
+		goto err_free_tmp;
+	}
+
+	if (enable_reuseaddr(socket_fd))
+		goto err_close_socket;
+
+	fprintf(stderr, "binding to address %s:%d\n", server_ip,
+		ntohs(server_sin.sin6_port));
+
+	ret = bind(socket_fd, &server_sin, sizeof(server_sin));
+	if (ret) {
+		pr_err("Failed to bind");
+		goto err_close_socket;
+	}
+
+	ret = listen(socket_fd, 1);
+	if (ret) {
+		pr_err("Failed to listen");
+		goto err_close_socket;
+	}
+
+	client_addr_len = sizeof(client_addr);
+
+	inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer,
+		  sizeof(buffer));
+	fprintf(stderr, "Waiting or connection on %s:%d\n", buffer,
+		ntohs(server_sin.sin6_port));
+	client_fd = accept(socket_fd, &client_addr, &client_addr_len);
+	if (client_fd < 0) {
+		pr_err("Failed to accept");
+		goto err_close_socket;
+	}
+
+	inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer,
+		  sizeof(buffer));
+	fprintf(stderr, "Got connection from %s:%d\n", buffer,
+		ntohs(client_addr.sin6_port));
+
+	while (1) {
+		struct iovec iov = { .iov_base = iobuf,
+				     .iov_len = sizeof(iobuf) };
+		struct dmabuf_cmsg *dmabuf_cmsg = NULL;
+		struct cmsghdr *cm = NULL;
+		struct msghdr msg = { 0 };
+		struct dmabuf_token token;
+		ssize_t ret;
+
+		is_devmem = false;
+
+		msg.msg_iov = &iov;
+		msg.msg_iovlen = 1;
+		msg.msg_control = ctrl_data;
+		msg.msg_controllen = sizeof(ctrl_data);
+		ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM);
+		fprintf(stderr, "recvmsg ret=%ld\n", ret);
+		if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
+			continue;
+		if (ret < 0) {
+			perror("recvmsg");
+			if (errno == EFAULT) {
+				pr_err("received EFAULT, won't recover");
+				goto err_close_client;
+			}
+			continue;
+		}
+		if (ret == 0) {
+			errno = 0;
+			pr_err("client exited");
+			goto cleanup;
+		}
+
+		for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+			if (cm->cmsg_level != SOL_SOCKET ||
+			    (cm->cmsg_type != SCM_DEVMEM_DMABUF &&
+			     cm->cmsg_type != SCM_DEVMEM_LINEAR)) {
+				fprintf(stderr, "skipping non-devmem cmsg\n");
+				continue;
+			}
+
+			dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm);
+			is_devmem = true;
+
+			if (cm->cmsg_type == SCM_DEVMEM_LINEAR) {
+				/* TODO: process data copied from skb's linear
+				 * buffer.
+				 */
+				fprintf(stderr,
+					"SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n",
+					dmabuf_cmsg->frag_size);
+
+				continue;
+			}
+
+			token.token_start = dmabuf_cmsg->frag_token;
+			token.token_count = 1;
+
+			total_received += dmabuf_cmsg->frag_size;
+			fprintf(stderr,
+				"received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n",
+				dmabuf_cmsg->frag_offset >> PAGE_SHIFT,
+				dmabuf_cmsg->frag_offset % getpagesize(),
+				dmabuf_cmsg->frag_offset,
+				dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token,
+				total_received, dmabuf_cmsg->dmabuf_id);
+
+			if (dmabuf_cmsg->dmabuf_id != dmabuf_id) {
+				pr_err("received on wrong dmabuf_id: flow steering error");
+				goto err_close_client;
+			}
+
+			if (dmabuf_cmsg->frag_size % getpagesize())
+				non_page_aligned_frags++;
+			else
+				page_aligned_frags++;
+
+			provider->memcpy_from_device(tmp_mem, mem,
+						     dmabuf_cmsg->frag_offset,
+						     dmabuf_cmsg->frag_size);
+
+			if (do_validation) {
+				if (validate_buffer(tmp_mem,
+						    dmabuf_cmsg->frag_size))
+					goto err_close_client;
+			} else {
+				print_nonzero_bytes(tmp_mem,
+						    dmabuf_cmsg->frag_size);
+			}
+
+			ret = setsockopt(client_fd, SOL_SOCKET,
+					 SO_DEVMEM_DONTNEED, &token,
+					 sizeof(token));
+			if (ret != 1) {
+				pr_err("SO_DEVMEM_DONTNEED not enough tokens");
+				goto err_close_client;
+			}
+		}
+		if (!is_devmem) {
+			pr_err("flow steering error");
+			goto err_close_client;
+		}
+
+		fprintf(stderr, "total_received=%lu\n", total_received);
+	}
+
+	fprintf(stderr, "%s: ok\n", TEST_PREFIX);
+
+	fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
+		page_aligned_frags, non_page_aligned_frags);
+
+cleanup:
+	err = 0;
+
+err_close_client:
+	close(client_fd);
+err_close_socket:
+	close(socket_fd);
+err_free_tmp:
+	free(tmp_mem);
+err_unbind:
+	ynl_sock_destroy(ys);
+err_reset_flow_steering:
+	reset_flow_steering();
+err_reset_rss:
+	reset_rss();
+err_reset_headersplit:
+	restore_ring_config(ring_config);
+err_free_ring_config:
+	ethtool_rings_get_rsp_free(ring_config);
+	return err;
+}
+
+int run_devmem_tests(void)
+{
+	struct ethtool_rings_get_rsp *ring_config;
+	struct netdev_queue_id *queues;
+	struct memory_buffer *mem;
+	struct ynl_sock *ys;
+	int err = -1;
+
+	mem = provider->alloc(getpagesize() * NUM_PAGES);
+	if (!mem) {
+		pr_err("Failed to allocate memory buffer");
+		return -1;
+	}
+
+	ring_config = get_ring_config();
+	if (!ring_config) {
+		pr_err("Failed to get current ring configuration");
+		goto err_free_mem;
+	}
+
+	/* Configure RSS to divert all traffic from our devmem queues */
+	if (configure_rss()) {
+		pr_err("rss error");
+		goto err_free_ring_config;
+	}
+
+	if (configure_headersplit(ring_config, 1)) {
+		pr_err("Failed to configure header split");
+		goto err_reset_rss;
+	}
+
+	queues = netdev_queue_id_alloc(num_queues);
+	if (!queues) {
+		pr_err("Failed to allocate empty queues array");
+		goto err_reset_headersplit;
+	}
+
+	if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+		pr_err("Binding empty queues array should have failed");
+		goto err_unbind;
+	}
+
+	if (configure_headersplit(ring_config, 0)) {
+		pr_err("Failed to configure header split");
+		goto err_reset_headersplit;
+	}
+
+	queues = create_queues();
+	if (!queues) {
+		pr_err("Failed to create queues");
+		goto err_reset_headersplit;
+	}
+
+	if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+		pr_err("Configure dmabuf with header split off should have failed");
+		goto err_unbind;
+	}
+
+	if (configure_headersplit(ring_config, 1)) {
+		pr_err("Failed to configure header split");
+		goto err_reset_headersplit;
+	}
+
+	queues = create_queues();
+	if (!queues) {
+		pr_err("Failed to create queues");
+		goto err_reset_headersplit;
+	}
+
+	if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+		pr_err("Failed to bind");
+		goto err_reset_headersplit;
+	}
+
+	/* Deactivating a bound queue should not be legal */
+	if (!check_changing_channels(num_queues, num_queues)) {
+		pr_err("Deactivating a bound queue should be illegal");
+		goto err_unbind;
+	}
+
+	err = 0;
+	goto err_unbind;
+
+err_unbind:
+	ynl_sock_destroy(ys);
+err_reset_headersplit:
+	restore_ring_config(ring_config);
+err_reset_rss:
+	reset_rss();
+err_free_ring_config:
+	ethtool_rings_get_rsp_free(ring_config);
+err_free_mem:
+	provider->free(mem);
+	return err;
+}
+
+static uint64_t gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL);
+}
+
+static int do_poll(int fd)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.revents = 0;
+	pfd.fd = fd;
+
+	ret = poll(&pfd, 1, waittime_ms);
+	if (ret == -1) {
+		pr_err("poll");
+		return -1;
+	}
+
+	return ret && (pfd.revents & POLLERR);
+}
+
+static int wait_compl(int fd)
+{
+	int64_t tstop = gettimeofday_ms() + waittime_ms;
+	char control[CMSG_SPACE(100)] = {};
+	struct sock_extended_err *serr;
+	struct msghdr msg = {};
+	struct cmsghdr *cm;
+	__u32 hi, lo;
+	int ret;
+
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	while (gettimeofday_ms() < tstop) {
+		ret = do_poll(fd);
+		if (ret < 0)
+			return ret;
+		if (!ret)
+			continue;
+
+		ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+		if (ret < 0) {
+			if (errno == EAGAIN)
+				continue;
+			pr_err("recvmsg(MSG_ERRQUEUE)");
+			return -1;
+		}
+		if (msg.msg_flags & MSG_CTRUNC) {
+			pr_err("MSG_CTRUNC");
+			return -1;
+		}
+
+		for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+			if (cm->cmsg_level != SOL_IP &&
+			    cm->cmsg_level != SOL_IPV6)
+				continue;
+			if (cm->cmsg_level == SOL_IP &&
+			    cm->cmsg_type != IP_RECVERR)
+				continue;
+			if (cm->cmsg_level == SOL_IPV6 &&
+			    cm->cmsg_type != IPV6_RECVERR)
+				continue;
+
+			serr = (void *)CMSG_DATA(cm);
+			if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
+				pr_err("wrong origin %u", serr->ee_origin);
+				return -1;
+			}
+			if (serr->ee_errno != 0) {
+				pr_err("wrong errno %d", serr->ee_errno);
+				return -1;
+			}
+
+			hi = serr->ee_data;
+			lo = serr->ee_info;
+
+			fprintf(stderr, "tx complete [%d,%d]\n", lo, hi);
+			return 0;
+		}
+	}
+
+	pr_err("did not receive tx completion");
+	return -1;
+}
+
+static int do_client(struct memory_buffer *mem)
+{
+	char ctrl_data[CMSG_SPACE(sizeof(__u32))];
+	struct sockaddr_in6 server_sin;
+	struct sockaddr_in6 client_sin;
+	struct ynl_sock *ys = NULL;
+	struct iovec iov[MAX_IOV];
+	struct msghdr msg = {};
+	ssize_t line_size = 0;
+	struct cmsghdr *cmsg;
+	char *line = NULL;
+	int ret, err = -1;
+	size_t len = 0;
+	int socket_fd;
+	__u32 ddmabuf;
+	int opt = 1;
+
+	ret = parse_address(server_ip, atoi(port), &server_sin);
+	if (ret < 0) {
+		pr_err("parse server address");
+		return -1;
+	}
+
+	if (client_ip) {
+		ret = parse_address(client_ip, atoi(port), &client_sin);
+		if (ret < 0) {
+			pr_err("parse client address");
+			return ret;
+		}
+	}
+
+	socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (socket_fd < 0) {
+		pr_err("create socket");
+		return -1;
+	}
+
+	if (enable_reuseaddr(socket_fd))
+		goto err_close_socket;
+
+	ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname,
+			 strlen(ifname) + 1);
+	if (ret) {
+		pr_err("bindtodevice");
+		goto err_close_socket;
+	}
+
+	if (bind_tx_queue(ifindex, mem->fd, &ys)) {
+		pr_err("Failed to bind");
+		goto err_close_socket;
+	}
+
+	if (client_ip) {
+		ret = bind(socket_fd, &client_sin, sizeof(client_sin));
+		if (ret) {
+			pr_err("bind");
+			goto err_unbind;
+		}
+	}
+
+	ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt));
+	if (ret) {
+		pr_err("set sock opt");
+		goto err_unbind;
+	}
+
+	fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip,
+		ntohs(server_sin.sin6_port), ifname);
+
+	ret = connect(socket_fd, &server_sin, sizeof(server_sin));
+	if (ret) {
+		pr_err("connect");
+		goto err_unbind;
+	}
+
+	while (1) {
+		free(line);
+		line = NULL;
+		line_size = getline(&line, &len, stdin);
+
+		if (line_size < 0)
+			break;
+
+		if (max_chunk) {
+			msg.msg_iovlen =
+				(line_size + max_chunk - 1) / max_chunk;
+			if (msg.msg_iovlen > MAX_IOV) {
+				pr_err("can't partition %zd bytes into maximum of %d chunks",
+				       line_size, MAX_IOV);
+				goto err_free_line;
+			}
+
+			for (int i = 0; i < msg.msg_iovlen; i++) {
+				iov[i].iov_base = (void *)(i * max_chunk);
+				iov[i].iov_len = max_chunk;
+			}
+
+			iov[msg.msg_iovlen - 1].iov_len =
+				line_size - (msg.msg_iovlen - 1) * max_chunk;
+		} else {
+			iov[0].iov_base = 0;
+			iov[0].iov_len = line_size;
+			msg.msg_iovlen = 1;
+		}
+
+		msg.msg_iov = iov;
+		provider->memcpy_to_device(mem, 0, line, line_size);
+
+		msg.msg_control = ctrl_data;
+		msg.msg_controllen = sizeof(ctrl_data);
+
+		cmsg = CMSG_FIRSTHDR(&msg);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_DEVMEM_DMABUF;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+
+		ddmabuf = tx_dmabuf_id;
+
+		*((__u32 *)CMSG_DATA(cmsg)) = ddmabuf;
+
+		ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY);
+		if (ret < 0) {
+			pr_err("Failed sendmsg");
+			goto err_free_line;
+		}
+
+		fprintf(stderr, "sendmsg_ret=%d\n", ret);
+
+		if (ret != line_size) {
+			pr_err("Did not send all bytes %d vs %zd", ret, line_size);
+			goto err_free_line;
+		}
+
+		if (wait_compl(socket_fd))
+			goto err_free_line;
+	}
+
+	fprintf(stderr, "%s: tx ok\n", TEST_PREFIX);
+
+	err = 0;
+
+err_free_line:
+	free(line);
+err_unbind:
+	ynl_sock_destroy(ys);
+err_close_socket:
+	close(socket_fd);
+	return err;
+}
+
+int main(int argc, char *argv[])
+{
+	struct memory_buffer *mem;
+	int is_server = 0, opt;
+	int ret, err = 1;
+
+	while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) {
+		switch (opt) {
+		case 'l':
+			is_server = 1;
+			break;
+		case 's':
+			server_ip = optarg;
+			break;
+		case 'c':
+			client_ip = optarg;
+			break;
+		case 'p':
+			port = optarg;
+			break;
+		case 'v':
+			do_validation = atoll(optarg);
+			break;
+		case 'q':
+			num_queues = atoi(optarg);
+			break;
+		case 't':
+			start_queue = atoi(optarg);
+			break;
+		case 'f':
+			ifname = optarg;
+			break;
+		case 'z':
+			max_chunk = atoi(optarg);
+			break;
+		case '?':
+			fprintf(stderr, "unknown option: %c\n", optopt);
+			break;
+		}
+	}
+
+	if (!ifname) {
+		pr_err("Missing -f argument");
+		return 1;
+	}
+
+	ifindex = if_nametoindex(ifname);
+
+	fprintf(stderr, "using ifindex=%u\n", ifindex);
+
+	if (!server_ip && !client_ip) {
+		if (start_queue < 0 && num_queues < 0) {
+			num_queues = rxq_num(ifindex);
+			if (num_queues < 0) {
+				pr_err("couldn't detect number of queues");
+				return 1;
+			}
+			if (num_queues < 2) {
+				pr_err("number of device queues is too low");
+				return 1;
+			}
+			/* make sure can bind to multiple queues */
+			start_queue = num_queues / 2;
+			num_queues /= 2;
+		}
+
+		if (start_queue < 0 || num_queues < 0) {
+			pr_err("Both -t and -q are required");
+			return 1;
+		}
+
+		return run_devmem_tests();
+	}
+
+	if (start_queue < 0 && num_queues < 0) {
+		num_queues = rxq_num(ifindex);
+		if (num_queues < 2) {
+			pr_err("number of device queues is too low");
+			return 1;
+		}
+
+		num_queues = 1;
+		start_queue = rxq_num(ifindex) - num_queues;
+
+		if (start_queue < 0) {
+			pr_err("couldn't detect number of queues");
+			return 1;
+		}
+
+		fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues);
+	}
+
+	for (; optind < argc; optind++)
+		fprintf(stderr, "extra arguments: %s\n", argv[optind]);
+
+	if (start_queue < 0) {
+		pr_err("Missing -t argument");
+		return 1;
+	}
+
+	if (num_queues < 0) {
+		pr_err("Missing -q argument");
+		return 1;
+	}
+
+	if (!server_ip) {
+		pr_err("Missing -s argument");
+		return 1;
+	}
+
+	if (!port) {
+		pr_err("Missing -p argument");
+		return 1;
+	}
+
+	mem = provider->alloc(getpagesize() * NUM_PAGES);
+	if (!mem) {
+		pr_err("Failed to allocate memory buffer");
+		return 1;
+	}
+
+	ret = is_server ? do_server(mem) : do_client(mem);
+	if (ret)
+		goto err_free_mem;
+
+	err = 0;
+
+err_free_mem:
+	provider->free(mem);
+	return err;
+}
diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
new file mode 100755
index 000000000000..c1e943d53f19
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Tests related to configuration of HW timestamping
+"""
+
+import errno
+from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx
+from lib.py import NetDrvEnv, EthtoolFamily, NlError
+
+
+def __get_hwtimestamp_support(cfg):
+    """ Retrieve supported configuration information """
+
+    try:
+        tsinfo = cfg.ethnl.tsinfo_get({'header': {'dev-name': cfg.ifname}})
+    except NlError as e:
+        if e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("timestamping configuration is not supported") from e
+        raise
+
+    ctx = {}
+    tx = tsinfo.get('tx-types', {})
+    rx = tsinfo.get('rx-filters', {})
+
+    bits = tx.get('bits', {})
+    ctx['tx'] = bits.get('bit', [])
+    bits = rx.get('bits', {})
+    ctx['rx'] = bits.get('bit', [])
+    return ctx
+
+
+def __get_hwtimestamp_config(cfg):
+    """ Retrieve current TS configuration information """
+
+    try:
+        tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}})
+    except NlError as e:
+        if e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("timestamping configuration is not supported via netlink") from e
+        raise
+    return tscfg
+
+
+def __set_hwtimestamp_config(cfg, ts):
+    """ Setup new TS configuration information """
+
+    ts['header'] = {'dev-name': cfg.ifname}
+    try:
+        res = cfg.ethnl.tsconfig_set(ts)
+    except NlError as e:
+        if e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("timestamping configuration is not supported via netlink") from e
+        raise
+    return res
+
+
+def test_hwtstamp_tx(cfg):
+    """
+    Test TX timestamp configuration.
+    The driver should apply provided config and report back proper state.
+    """
+
+    orig_tscfg = __get_hwtimestamp_config(cfg)
+    ts = __get_hwtimestamp_support(cfg)
+    tx = ts['tx']
+    for t in tx:
+        tscfg = orig_tscfg
+        tscfg['tx-types']['bits']['bit'] = [t]
+        res = __set_hwtimestamp_config(cfg, tscfg)
+        if res is None:
+            res = __get_hwtimestamp_config(cfg)
+        ksft_eq(res['tx-types']['bits']['bit'], [t])
+    __set_hwtimestamp_config(cfg, orig_tscfg)
+
+
+def test_hwtstamp_rx(cfg):
+    """
+    Test RX timestamp configuration.
+    The filter configuration is taken from the list of supported filters.
+    The driver should apply the config without error and report back proper state.
+    Some extension of the timestamping scope is allowed for PTP filters.
+    """
+
+    orig_tscfg = __get_hwtimestamp_config(cfg)
+    ts = __get_hwtimestamp_support(cfg)
+    rx = ts['rx']
+    for r in rx:
+        tscfg = orig_tscfg
+        tscfg['rx-filters']['bits']['bit'] = [r]
+        res = __set_hwtimestamp_config(cfg, tscfg)
+        if res is None:
+            res = __get_hwtimestamp_config(cfg)
+        if r['index'] == 0 or r['index'] == 1:
+            ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index'])
+        else:
+            # the driver can fallback to some value which has higher coverage for timestamping
+            ksft_ge(res['rx-filters']['bits']['bit'][0]['index'], r['index'])
+    __set_hwtimestamp_config(cfg, orig_tscfg)
+
+
+def main() -> None:
+    """ Ksft boiler plate main """
+
+    with NetDrvEnv(__file__, nsim_test=False) as cfg:
+        cfg.ethnl = EthtoolFamily()
+        ksft_run([test_hwtstamp_tx, test_hwtstamp_rx], args=(cfg,))
+        ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
new file mode 100755
index 000000000000..2a51b60df8a1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Test driver resilience vs page pool allocation failures.
+"""
+
+import errno
+import time
+import math
+import os
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetdevFamily, NlError
+from lib.py import NetDrvEpEnv
+from lib.py import cmd, tool, GenerateTraffic
+
+
+def _write_fail_config(config):
+    for key, value in config.items():
+        path = "/sys/kernel/debug/fail_function/"
+        with open(path + key, "w", encoding='ascii') as fp:
+            fp.write(str(value) + "\n")
+
+
+def _enable_pp_allocation_fail():
+    if not os.path.exists("/sys/kernel/debug/fail_function"):
+        raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
+
+    if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
+        _write_fail_config({"inject": "page_pool_alloc_netmems"})
+
+    _write_fail_config({
+        "verbose": 0,
+        "interval": 511,
+        "probability": 100,
+        "times": -1,
+    })
+
+
+def _disable_pp_allocation_fail():
+    if not os.path.exists("/sys/kernel/debug/fail_function"):
+        return
+
+    if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
+        _write_fail_config({"inject": ""})
+
+    _write_fail_config({
+        "probability": 0,
+        "times": 0,
+    })
+
+
+def test_pp_alloc(cfg, netdevnl):
+    """
+    Configure page pool allocation fail injection while traffic is running.
+    """
+
+    def get_stats():
+        return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+    def check_traffic_flowing():
+        stat1 = get_stats()
+        time.sleep(1)
+        stat2 = get_stats()
+        if stat2['rx-packets'] - stat1['rx-packets'] < 4000:
+            raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])
+
+
+    try:
+        stats = get_stats()
+    except NlError as e:
+        if e.nl_msg.error == -errno.EOPNOTSUPP:
+            stats = {}
+        else:
+            raise
+    if 'rx-alloc-fail' not in stats:
+        raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats")
+
+    set_g = False
+    traffic = None
+    try:
+        traffic = GenerateTraffic(cfg)
+
+        check_traffic_flowing()
+
+        _enable_pp_allocation_fail()
+
+        s1 = get_stats()
+        time.sleep(3)
+        s2 = get_stats()
+
+        seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail']
+        if seen_fails < 1:
+            raise KsftSkipEx("Allocation failures not increasing")
+        pkts = s2['rx-packets'] - s1['rx-packets']
+        # Expecting one failure per 512 buffers, 3.1x safety margin
+        want_fails = math.floor(pkts / 512 / 3.1)
+        if seen_fails < want_fails:
+            raise KsftSkipEx("Allocation increasing too slowly", seen_fails,
+                             "packets:", pkts)
+        ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})")
+
+        # Basic failures are fine, try to wobble some settings to catch extra failures
+        check_traffic_flowing()
+        g = tool("ethtool", "-g " + cfg.ifname, json=True)[0]
+        if 'rx' in g and g["rx"] * 2 <= g["rx-max"]:
+            new_g = g['rx'] * 2
+        elif 'rx' in g:
+            new_g = g['rx'] // 2
+        else:
+            new_g = None
+
+        if new_g:
+            set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0
+            if set_g:
+                ksft_pr("ethtool -G change retval: success")
+            else:
+                ksft_pr("ethtool -G change retval: did not succeed", new_g)
+        else:
+            ksft_pr("ethtool -G change retval: did not try")
+
+        time.sleep(0.1)
+        check_traffic_flowing()
+    finally:
+        _disable_pp_allocation_fail()
+        if traffic:
+            traffic.stop()
+        time.sleep(0.1)
+        if set_g:
+            cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}")
+
+
+def main() -> None:
+    """ Ksft boiler plate main """
+    netdevnl = NetdevFamily()
+    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+
+        ksft_run([test_pp_alloc], args=(cfg, netdevnl, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py
new file mode 100755
index 000000000000..19847f3d4a00
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_api.py
@@ -0,0 +1,476 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+API level tests for RSS (mostly Netlink vs IOCTL).
+"""
+
+import errno
+import glob
+import random
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import defer, ethtool, CmdExitFailure
+from lib.py import EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+
+
+def _require_2qs(cfg):
+    qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
+    if qcnt < 2:
+        raise KsftSkipEx(f"Local has only {qcnt} queues")
+    return qcnt
+
+
+def _ethtool_create(cfg, act, opts):
+    output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
+    # Output will be something like: "New RSS context is 1" or
+    # "Added rule with ID 7", we want the integer from the end
+    return int(output.split()[-1])
+
+
+def _ethtool_get_cfg(cfg, fl_type, to_nl=False):
+    descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+
+    if to_nl:
+        converter = {
+            "IP SA": "ip-src",
+            "IP DA": "ip-dst",
+            "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1",
+            "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3",
+        }
+
+        ret = set()
+    else:
+        converter = {
+            "IP SA": "s",
+            "IP DA": "d",
+            "L3 proto": "t",
+            "L4 bytes 0 & 1 [TCP/UDP src port]": "f",
+            "L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
+        }
+
+        ret = ""
+
+    for line in descr.split("\n")[1:-2]:
+        # if this raises we probably need to add more keys to converter above
+        if to_nl:
+            ret.add(converter[line])
+        else:
+            ret += converter[line]
+    return ret
+
+
+def test_rxfh_nl_set_fail(cfg):
+    """
+    Test error path of Netlink SET.
+    """
+    _require_2qs(cfg)
+
+    ethnl = EthtoolFamily()
+    ethnl.ntf_subscribe("monitor")
+
+    with ksft_raises(NlError):
+        ethnl.rss_set({"header": {"dev-name": "lo"},
+                       "indir": None})
+
+    with ksft_raises(NlError):
+        ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+                       "indir": [100000]})
+    ntf = next(ethnl.poll_ntf(duration=0.2), None)
+    ksft_is(ntf, None)
+
+
+def test_rxfh_nl_set_indir(cfg):
+    """
+    Test setting indirection table via Netlink.
+    """
+    qcnt = _require_2qs(cfg)
+
+    # Test some SETs with a value
+    reset = defer(cfg.ethnl.rss_set,
+                  {"header": {"dev-index": cfg.ifindex}, "indir": None})
+    cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+                       "indir": [1]})
+    rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    ksft_eq(set(rss.get("indir", [-1])), {1})
+
+    cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+                       "indir": [0, 1]})
+    rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+    # Make sure we can't set the queue count below max queue used
+    with ksft_raises(CmdExitFailure):
+        ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+    with ksft_raises(CmdExitFailure):
+        ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+    # Test reset back to default
+    reset.exec()
+    rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt)))
+
+
+def test_rxfh_nl_set_indir_ctx(cfg):
+    """
+    Test setting indirection table for a custom context via Netlink.
+    """
+    _require_2qs(cfg)
+
+    # Get setting for ctx 0, we'll make sure they don't get clobbered
+    dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+
+    # Create context
+    ctx_id = _ethtool_create(cfg, "-X", "context new")
+    defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+    cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+                       "context": ctx_id, "indir": [1]})
+    rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+                             "context": ctx_id})
+    ksft_eq(set(rss.get("indir", [-1])), {1})
+
+    ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    ksft_eq(ctx0, dflt)
+
+    cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+                       "context": ctx_id, "indir": [0, 1]})
+    rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+                             "context": ctx_id})
+    ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+    ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    ksft_eq(ctx0, dflt)
+
+    # Make sure we can't set the queue count below max queue used
+    with ksft_raises(CmdExitFailure):
+        ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+    with ksft_raises(CmdExitFailure):
+        ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+
+def test_rxfh_indir_ntf(cfg):
+    """
+    Check that Netlink notifications are generated when RSS indirection
+    table was modified.
+    """
+    _require_2qs(cfg)
+
+    ethnl = EthtoolFamily()
+    ethnl.ntf_subscribe("monitor")
+
+    ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1")
+    reset = defer(ethtool, f"-X {cfg.ifname} default")
+
+    ntf = next(ethnl.poll_ntf(duration=0.2), None)
+    if ntf is None:
+        raise KsftFailEx("No notification received")
+    ksft_eq(ntf["name"], "rss-ntf")
+    ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+    reset.exec()
+    ntf = next(ethnl.poll_ntf(duration=0.2), None)
+    if ntf is None:
+        raise KsftFailEx("No notification received after reset")
+    ksft_eq(ntf["name"], "rss-ntf")
+    ksft_is(ntf["msg"].get("context"), None)
+    ksft_ne(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_indir_ctx_ntf(cfg):
+    """
+    Check that Netlink notifications are generated when RSS indirection
+    table was modified on an additional RSS context.
+    """
+    _require_2qs(cfg)
+
+    ctx_id = _ethtool_create(cfg, "-X", "context new")
+    defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+    ethnl = EthtoolFamily()
+    ethnl.ntf_subscribe("monitor")
+
+    ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1")
+
+    ntf = next(ethnl.poll_ntf(duration=0.2), None)
+    if ntf is None:
+        raise KsftFailEx("No notification received")
+    ksft_eq(ntf["name"], "rss-ntf")
+    ksft_eq(ntf["msg"].get("context"), ctx_id)
+    ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_nl_set_key(cfg):
+    """
+    Test setting hashing key via Netlink.
+    """
+
+    dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    defer(cfg.ethnl.rss_set,
+          {"header": {"dev-index": cfg.ifindex},
+           "hkey": dflt["hkey"], "indir": None})
+
+    # Empty key should error out
+    with ksft_raises(NlError) as cm:
+        cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+                           "hkey": None})
+    ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey')
+
+    # Set key to random
+    mod = random.randbytes(len(dflt["hkey"]))
+    cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+                       "hkey": mod})
+    rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    ksft_eq(rss.get("hkey", [-1]), mod)
+
+    # Set key to random and indir tbl to something at once
+    mod = random.randbytes(len(dflt["hkey"]))
+    cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+                       "indir": [0, 1], "hkey": mod})
+    rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    ksft_eq(rss.get("hkey", [-1]), mod)
+    ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+
+def test_rxfh_fields(cfg):
+    """
+    Test reading Rx Flow Hash over Netlink.
+    """
+
+    flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+    ethnl = EthtoolFamily()
+
+    cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    for fl_type in flow_types:
+        one = _ethtool_get_cfg(cfg, fl_type, to_nl=True)
+        ksft_eq(one, cfg_nl["flow-hash"][fl_type],
+                comment="Config for " + fl_type)
+
+
+def test_rxfh_fields_set(cfg):
+    """ Test configuring Rx Flow Hash over Netlink. """
+
+    flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+
+    # Collect current settings
+    cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    # symmetric hashing is config-order-sensitive make sure we leave
+    # symmetric mode, or make the flow-hash sym-compatible first
+    changes = [{"flow-hash": cfg_old["flow-hash"],},
+               {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+    if cfg_old.get("input-xfrm"):
+        changes = list(reversed(changes))
+    for old in changes:
+        defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+    # symmetric hashing prevents some of the configs below
+    if cfg_old.get("input-xfrm"):
+        cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+                           "input-xfrm": {}})
+
+    for fl_type in flow_types:
+        cur = _ethtool_get_cfg(cfg, fl_type)
+        if cur == "sdfn":
+            change_nl = {"ip-src", "ip-dst"}
+            change_ic = "sd"
+        else:
+            change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+            change_ic = "sdfn"
+
+        cfg.ethnl.rss_set({
+            "header": {"dev-index": cfg.ifindex},
+            "flow-hash": {fl_type: change_nl}
+        })
+        reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} "
+                      f"rx-flow-hash {fl_type} {cur}")
+
+        cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+        ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type],
+                comment=f"Config for {fl_type} over Netlink")
+        cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+        ksft_eq(change_ic, cfg_ic,
+                comment=f"Config for {fl_type} over IOCTL")
+
+        reset.exec()
+        cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+        ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type],
+                comment=f"Un-config for {fl_type} over Netlink")
+        cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+        ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL")
+
+    # Try to set multiple at once, the defer was already installed at the start
+    change = {"ip-src"}
+    if change == cfg_old["flow-hash"]["tcp4"]:
+        change = {"ip-dst"}
+    cfg.ethnl.rss_set({
+        "header": {"dev-index": cfg.ifindex},
+        "flow-hash": {x: change for x in flow_types}
+    })
+
+    cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    for fl_type in flow_types:
+        ksft_eq(change, cfg_nl["flow-hash"][fl_type],
+                comment=f"multi-config for {fl_type} over Netlink")
+
+
+def test_rxfh_fields_set_xfrm(cfg):
+    """ Test changing Rx Flow Hash vs xfrm_input at once.  """
+
+    def set_rss(cfg, xfrm, fh):
+        cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+                           "input-xfrm": xfrm, "flow-hash": fh})
+
+    # Install the reset handler
+    cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    # symmetric hashing is config-order-sensitive make sure we leave
+    # symmetric mode, or make the flow-hash sym-compatible first
+    changes = [{"flow-hash": cfg_old["flow-hash"],},
+               {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+    if cfg_old.get("input-xfrm"):
+        changes = list(reversed(changes))
+    for old in changes:
+        defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+    # Make sure we start with input-xfrm off, and tcp4 config non-sym
+    set_rss(cfg, {}, {})
+    set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+    # Setting sym and fixing tcp4 config not expected to pass right now
+    with ksft_raises(NlError):
+        set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}})
+    # One at a time should work, hopefully
+    set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}})
+    no_support = False
+    try:
+        set_rss(cfg, {"sym-xor"}, {})
+    except NlError:
+        try:
+            set_rss(cfg, {"sym-or-xor"}, {})
+        except NlError:
+            no_support = True
+    if no_support:
+        raise KsftSkipEx("no input-xfrm supported")
+    # Disabling two at once should not work either without kernel changes
+    with ksft_raises(NlError):
+        set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+
+def test_rxfh_fields_ntf(cfg):
+    """ Test Rx Flow Hash notifications. """
+
+    cur = _ethtool_get_cfg(cfg, "tcp4")
+    if cur == "sdfn":
+        change = {"ip-src", "ip-dst"}
+    else:
+        change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+
+    ethnl = EthtoolFamily()
+    ethnl.ntf_subscribe("monitor")
+
+    ethnl.rss_set({
+        "header": {"dev-index": cfg.ifindex},
+        "flow-hash": {"tcp4": change}
+    })
+    reset = defer(ethtool,
+                  f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}")
+
+    ntf = next(ethnl.poll_ntf(duration=0.2), None)
+    if ntf is None:
+        raise KsftFailEx("No notification received after IOCTL change")
+    ksft_eq(ntf["name"], "rss-ntf")
+    ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change)
+    ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+    reset.exec()
+    ntf = next(ethnl.poll_ntf(duration=0.2), None)
+    if ntf is None:
+        raise KsftFailEx("No notification received after Netlink change")
+    ksft_eq(ntf["name"], "rss-ntf")
+    ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change)
+    ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+
+def test_rss_ctx_add(cfg):
+    """ Test creating an additional RSS context via Netlink """
+
+    _require_2qs(cfg)
+
+    # Test basic creation
+    ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+    d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete")
+    ksft_ne(ctx.get("context", 0), 0)
+    ksft_ne(set(ctx.get("indir", [0])), {0},
+            comment="Driver should init the indirection table")
+
+    # Try requesting the ID we just got allocated
+    with ksft_raises(NlError) as cm:
+        ctx = cfg.ethnl.rss_create_act({
+            "header": {"dev-index": cfg.ifindex},
+            "context": ctx.get("context"),
+        })
+        ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+    d.exec()
+    ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY)
+
+    # Test creating with a specified RSS table, and context ID
+    ctx_id = ctx.get("context")
+    ctx = cfg.ethnl.rss_create_act({
+        "header": {"dev-index": cfg.ifindex},
+        "context": ctx_id,
+        "indir": [1],
+    })
+    ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+    ksft_eq(ctx.get("context"), ctx_id)
+    ksft_eq(set(ctx.get("indir", [0])), {1})
+
+
+def test_rss_ctx_ntf(cfg):
+    """ Test notifications for creating additional RSS contexts """
+
+    ethnl = EthtoolFamily()
+    ethnl.ntf_subscribe("monitor")
+
+    # Create / delete via Netlink
+    ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+    cfg.ethnl.rss_delete_act({
+        "header": {"dev-index": cfg.ifindex},
+        "context": ctx["context"],
+    })
+
+    ntf = next(ethnl.poll_ntf(duration=0.2), None)
+    if ntf is None:
+        raise KsftFailEx("[NL] No notification after context creation")
+    ksft_eq(ntf["name"], "rss-create-ntf")
+    ksft_eq(ctx, ntf["msg"])
+
+    ntf = next(ethnl.poll_ntf(duration=0.2), None)
+    if ntf is None:
+        raise KsftFailEx("[NL] No notification after context deletion")
+    ksft_eq(ntf["name"], "rss-delete-ntf")
+
+    # Create / deleve via IOCTL
+    ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new")
+    ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete")
+    ntf = next(ethnl.poll_ntf(duration=0.2), None)
+    if ntf is None:
+        raise KsftFailEx("[IOCTL] No notification after context creation")
+    ksft_eq(ntf["name"], "rss-create-ntf")
+
+    ntf = next(ethnl.poll_ntf(duration=0.2), None)
+    if ntf is None:
+        raise KsftFailEx("[IOCTL] No notification after context deletion")
+    ksft_eq(ntf["name"], "rss-delete-ntf")
+
+
+def main() -> None:
+    """ Ksft boiler plate main """
+
+    with NetDrvEnv(__file__, nsim_test=False) as cfg:
+        cfg.ethnl = EthtoolFamily()
+        ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
new file mode 100755
index 000000000000..ed7e405682f0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -0,0 +1,832 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import datetime
+import random
+import re
+from lib.py import ksft_run, ksft_pr, ksft_exit
+from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises
+from lib.py import NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import rand_port
+from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure
+
+
+def _rss_key_str(key):
+    return ":".join(["{:02x}".format(x) for x in key])
+
+
+def _rss_key_rand(length):
+    return [random.randint(0, 255) for _ in range(length)]
+
+
+def _rss_key_check(cfg, data=None, context=0):
+    if data is None:
+        data = get_rss(cfg, context=context)
+    if 'rss-hash-key' not in data:
+        return
+    non_zero = [x for x in data['rss-hash-key'] if x != 0]
+    ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}")
+
+
+def get_rss(cfg, context=0):
+    return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0]
+
+
+def get_drop_err_sum(cfg):
+    stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0]
+    cnt = 0
+    for key in ['errors', 'dropped', 'over_errors', 'fifo_errors',
+                'length_errors', 'crc_errors', 'missed_errors',
+                'frame_errors']:
+        cnt += stats["stats64"]["rx"][key]
+    return cnt, stats["stats64"]["tx"]["carrier_changes"]
+
+
+def ethtool_create(cfg, act, opts):
+    output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
+    # Output will be something like: "New RSS context is 1" or
+    # "Added rule with ID 7", we want the integer from the end
+    return int(output.split()[-1])
+
+
+def require_ntuple(cfg):
+    features = ethtool(f"-k {cfg.ifname}", json=True)[0]
+    if not features["ntuple-filters"]["active"]:
+        # ntuple is more of a capability than a config knob, don't bother
+        # trying to enable it (until some driver actually needs it).
+        raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"]))
+
+
+def require_context_cnt(cfg, need_cnt):
+    # There's no good API to get the context count, so the tests
+    # which try to add a lot opportunisitically set the count they
+    # discovered. Careful with test ordering!
+    if need_cnt and cfg.context_cnt and cfg.context_cnt < need_cnt:
+        raise KsftSkipEx(f"Test requires at least {need_cnt} contexts, but device only has {cfg.context_cnt}")
+
+
+# Get Rx packet counts for all queues, as a simple list of integers
+# if @prev is specified the prev counts will be subtracted
+def _get_rx_cnts(cfg, prev=None):
+    cfg.wait_hw_stats_settle()
+    data = cfg.netdevnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, dump=True)
+    data = [x for x in data if x['queue-type'] == "rx"]
+    max_q = max([x["queue-id"] for x in data])
+    queue_stats = [0] * (max_q + 1)
+    for q in data:
+        queue_stats[q["queue-id"]] = q["rx-packets"]
+        if prev and q["queue-id"] < len(prev):
+            queue_stats[q["queue-id"]] -= prev[q["queue-id"]]
+    return queue_stats
+
+
+def _send_traffic_check(cfg, port, name, params):
+    # params is a dict with 3 possible keys:
+    #  - "target": required, which queues we expect to get iperf traffic
+    #  - "empty": optional, which queues should see no traffic at all
+    #  - "noise": optional, which queues we expect to see low traffic;
+    #             used for queues of the main context, since some background
+    #             OS activity may use those queues while we're testing
+    # the value for each is a list, or some other iterable containing queue ids.
+
+    cnts = _get_rx_cnts(cfg)
+    GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+    cnts = _get_rx_cnts(cfg, prev=cnts)
+
+    directed = sum(cnts[i] for i in params['target'])
+
+    ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts))
+    if params.get('noise'):
+        ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2,
+                f"traffic on other queues ({name})':" + str(cnts))
+    if params.get('empty'):
+        ksft_eq(sum(cnts[i] for i in params['empty']), 0,
+                f"traffic on inactive queues ({name}): " + str(cnts))
+
+
+def _ntuple_rule_check(cfg, rule_id, ctx_id):
+    """Check that ntuple rule references RSS context ID"""
+    text = ethtool(f"-n {cfg.ifname} rule {rule_id}").stdout
+    pattern = f"RSS Context (ID: )?{ctx_id}"
+    ksft_true(re.search(pattern, text), "RSS context not referenced in ntuple rule")
+
+
+def test_rss_key_indir(cfg):
+    """Test basics like updating the main RSS key and indirection table."""
+
+    qcnt = len(_get_rx_cnts(cfg))
+    if qcnt < 3:
+        raise KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)")
+
+    data = get_rss(cfg)
+    want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table']
+    for k in want_keys:
+        if k not in data:
+            raise KsftFailEx("ethtool results missing key: " + k)
+        if not data[k]:
+            raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}")
+
+    _rss_key_check(cfg, data=data)
+    key_len = len(data['rss-hash-key'])
+
+    # Set the key
+    key = _rss_key_rand(key_len)
+    ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key))
+
+    data = get_rss(cfg)
+    ksft_eq(key, data['rss-hash-key'])
+
+    # Set the indirection table and the key together
+    key = _rss_key_rand(key_len)
+    ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key))
+    reset_indir = defer(ethtool, f"-X {cfg.ifname} default")
+
+    data = get_rss(cfg)
+    _rss_key_check(cfg, data=data)
+    ksft_eq(0, min(data['rss-indirection-table']))
+    ksft_eq(2, max(data['rss-indirection-table']))
+
+    # Reset indirection table and set the key
+    key = _rss_key_rand(key_len)
+    ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key))
+    data = get_rss(cfg)
+    _rss_key_check(cfg, data=data)
+    ksft_eq(0, min(data['rss-indirection-table']))
+    ksft_eq(qcnt - 1, max(data['rss-indirection-table']))
+
+    # Set the indirection table
+    ethtool(f"-X {cfg.ifname} equal 2")
+    data = get_rss(cfg)
+    ksft_eq(0, min(data['rss-indirection-table']))
+    ksft_eq(1, max(data['rss-indirection-table']))
+
+    # Check we only get traffic on the first 2 queues
+    cnts = _get_rx_cnts(cfg)
+    GenerateTraffic(cfg).wait_pkts_and_stop(20000)
+    cnts = _get_rx_cnts(cfg, prev=cnts)
+    # 2 queues, 20k packets, must be at least 5k per queue
+    ksft_ge(cnts[0], 5000, "traffic on main context (1/2): " + str(cnts))
+    ksft_ge(cnts[1], 5000, "traffic on main context (2/2): " + str(cnts))
+    # The other queues should be unused
+    ksft_eq(sum(cnts[2:]), 0, "traffic on unused queues: " + str(cnts))
+
+    # Restore, and check traffic gets spread again
+    reset_indir.exec()
+
+    cnts = _get_rx_cnts(cfg)
+    GenerateTraffic(cfg).wait_pkts_and_stop(20000)
+    cnts = _get_rx_cnts(cfg, prev=cnts)
+    if qcnt > 4:
+        # First two queues get less traffic than all the rest
+        ksft_lt(sum(cnts[:2]), sum(cnts[2:]),
+                "traffic distributed: " + str(cnts))
+    else:
+        # When queue count is low make sure third queue got significant pkts
+        ksft_ge(cnts[2], 3500, "traffic distributed: " + str(cnts))
+
+
+def test_rss_queue_reconfigure(cfg, main_ctx=True):
+    """Make sure queue changes can't override requested RSS config.
+
+    By default main RSS table should change to include all queues.
+    When user sets a specific RSS config the driver should preserve it,
+    even when queue count changes. Driver should refuse to deactivate
+    queues used in the user-set RSS config.
+    """
+
+    if not main_ctx:
+        require_ntuple(cfg)
+
+    # Start with 4 queues, an arbitrary known number.
+    try:
+        qcnt = len(_get_rx_cnts(cfg))
+        ethtool(f"-L {cfg.ifname} combined 4")
+        defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+    except:
+        raise KsftSkipEx("Not enough queues for the test or qstat not supported")
+
+    if main_ctx:
+        ctx_id = 0
+        ctx_ref = ""
+    else:
+        ctx_id = ethtool_create(cfg, "-X", "context new")
+        ctx_ref = f"context {ctx_id}"
+        defer(ethtool, f"-X {cfg.ifname} {ctx_ref} delete")
+
+    # Indirection table should be distributing to all queues.
+    data = get_rss(cfg, context=ctx_id)
+    ksft_eq(0, min(data['rss-indirection-table']))
+    ksft_eq(3, max(data['rss-indirection-table']))
+
+    # Increase queues, indirection table should be distributing to all queues.
+    # It's unclear whether tables of additional contexts should be reset, too.
+    if main_ctx:
+        ethtool(f"-L {cfg.ifname} combined 5")
+        data = get_rss(cfg)
+        ksft_eq(0, min(data['rss-indirection-table']))
+        ksft_eq(4, max(data['rss-indirection-table']))
+        ethtool(f"-L {cfg.ifname} combined 4")
+
+    # Configure the table explicitly
+    port = rand_port()
+    ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 0 1")
+    if main_ctx:
+        other_key = 'empty'
+        defer(ethtool, f"-X {cfg.ifname} default")
+    else:
+        other_key = 'noise'
+        flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}"
+        ntuple = ethtool_create(cfg, "-N", flow)
+        defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
+
+    _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
+                                              other_key: (1, 2) })
+
+    # We should be able to increase queues, but table should be left untouched
+    ethtool(f"-L {cfg.ifname} combined 5")
+    data = get_rss(cfg, context=ctx_id)
+    ksft_eq({0, 3}, set(data['rss-indirection-table']))
+
+    _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
+                                              other_key: (1, 2, 4) })
+
+    # Setting queue count to 3 should fail, queue 3 is used
+    try:
+        ethtool(f"-L {cfg.ifname} combined 3")
+    except CmdExitFailure:
+        pass
+    else:
+        raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})")
+
+    if not main_ctx:
+        ethtool(f"-L {cfg.ifname} combined 4")
+        flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 1"
+        try:
+            # this targets queue 4, which doesn't exist
+            ntuple2 = ethtool_create(cfg, "-N", flow)
+            defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
+        except CmdExitFailure:
+            pass
+        else:
+            raise Exception(f"Driver didn't prevent us from targeting a nonexistent queue (context {ctx_id})")
+        # change the table to target queues 0 and 2
+        ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0")
+        # ntuple rule therefore targets queues 1 and 3
+        try:
+            ntuple2 = ethtool_create(cfg, "-N", flow)
+        except CmdExitFailure:
+            ksft_pr("Driver does not support rss + queue offset")
+            return
+
+        defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
+        # should replace existing filter
+        ksft_eq(ntuple, ntuple2)
+        _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3),
+                                                  'noise' : (0, 2) })
+        # Setting queue count to 3 should fail, queue 3 is used
+        try:
+            ethtool(f"-L {cfg.ifname} combined 3")
+        except CmdExitFailure:
+            pass
+        else:
+            raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})")
+
+
+def test_rss_resize(cfg):
+    """Test resizing of the RSS table.
+
+    Some devices dynamically increase and decrease the size of the RSS
+    indirection table based on the number of enabled queues.
+    When that happens driver must maintain the balance of entries
+    (preferably duplicating the smaller table).
+    """
+
+    channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+    ch_max = channels['combined-max']
+    qcnt = channels['combined-count']
+
+    if ch_max < 2:
+        raise KsftSkipEx(f"Not enough queues for the test: {ch_max}")
+
+    ethtool(f"-L {cfg.ifname} combined 2")
+    defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+
+    ethtool(f"-X {cfg.ifname} weight 1 7")
+    defer(ethtool, f"-X {cfg.ifname} default")
+
+    ethtool(f"-L {cfg.ifname} combined {ch_max}")
+    data = get_rss(cfg)
+    ksft_eq(0, min(data['rss-indirection-table']))
+    ksft_eq(1, max(data['rss-indirection-table']))
+
+    ksft_eq(7,
+            data['rss-indirection-table'].count(1) /
+            data['rss-indirection-table'].count(0),
+            f"Table imbalance after resize: {data['rss-indirection-table']}")
+
+
+def test_hitless_key_update(cfg):
+    """Test that flows may be rehashed without impacting traffic.
+
+    Some workloads may want to rehash the flows in response to an imbalance.
+    Most effective way to do that is changing the RSS key. Check that changing
+    the key does not cause link flaps or traffic disruption.
+
+    Disrupting traffic for key update is not a bug, but makes the key
+    update unusable for rehashing under load.
+    """
+    data = get_rss(cfg)
+    key_len = len(data['rss-hash-key'])
+
+    ethnl = EthtoolFamily()
+    key = random.randbytes(key_len)
+
+    tgen = GenerateTraffic(cfg)
+    try:
+        errors0, carrier0 = get_drop_err_sum(cfg)
+        t0 = datetime.datetime.now()
+        ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key})
+        t1 = datetime.datetime.now()
+        errors1, carrier1 = get_drop_err_sum(cfg)
+    finally:
+        tgen.wait_pkts_and_stop(5000)
+
+    ksft_lt((t1 - t0).total_seconds(), 0.15)
+    ksft_eq(errors1 - errors1, 0)
+    ksft_eq(carrier1 - carrier0, 0)
+
+
+def test_rss_context_dump(cfg):
+    """
+    Test dumping RSS contexts. This tests mostly exercises the kernel APIs.
+    """
+
+    # Get a random key of the right size
+    data = get_rss(cfg)
+    if 'rss-hash-key' in data:
+        key_data = _rss_key_rand(len(data['rss-hash-key']))
+        key = _rss_key_str(key_data)
+    else:
+        key_data = []
+        key = "ba:ad"
+
+    ids = []
+    try:
+        ids.append(ethtool_create(cfg, "-X", f"context new"))
+        defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+
+        ids.append(ethtool_create(cfg, "-X", f"context new weight 1 1"))
+        defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+
+        ids.append(ethtool_create(cfg, "-X", f"context new hkey {key}"))
+        defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+    except CmdExitFailure:
+        if not ids:
+            raise KsftSkipEx("Unable to add any contexts")
+        ksft_pr(f"Added only {len(ids)} out of 3 contexts")
+
+    expect_tuples = set([(cfg.ifname, -1)] + [(cfg.ifname, ctx_id) for ctx_id in ids])
+
+    # Dump all
+    ctxs = cfg.ethnl.rss_get({}, dump=True)
+    tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+    ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump")
+    ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname])
+    ksft_eq(expect_tuples, ctx_tuples)
+
+    # Sanity-check the results
+    for data in ctxs:
+        ksft_ne(set(data.get('indir', [1])), {0}, "indir table is all zero")
+        ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero")
+
+        # More specific checks
+        if len(ids) > 1 and data.get('context') == ids[1]:
+            ksft_eq(set(data['indir']), {0, 1},
+                    "ctx1 - indir table mismatch")
+        if len(ids) > 2 and data.get('context') == ids[2]:
+            ksft_eq(data['hkey'], bytes(key_data), "ctx2 - key mismatch")
+
+    # Ifindex filter
+    ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True)
+    tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+    ctx_tuples = set(tuples)
+    ksft_eq(len(tuples), len(ctx_tuples), "duplicates in context dump")
+    ksft_eq(expect_tuples, ctx_tuples)
+
+    # Skip ctx 0
+    expect_tuples.remove((cfg.ifname, -1))
+
+    ctxs = cfg.ethnl.rss_get({'start-context': 1}, dump=True)
+    tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+    ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump")
+    ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname])
+    ksft_eq(expect_tuples, ctx_tuples)
+
+    # And finally both with ifindex and skip main
+    ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}, 'start-context': 1}, dump=True)
+    ctx_tuples = set([(c['header']['dev-name'], c.get('context', -1)) for c in ctxs])
+    ksft_eq(expect_tuples, ctx_tuples)
+
+
+def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
+    """
+    Test separating traffic into RSS contexts.
+    The queues will be allocated 2 for each context:
+     ctx0  ctx1  ctx2  ctx3
+    [0 1] [2 3] [4 5] [6 7] ...
+    """
+
+    require_ntuple(cfg)
+
+    requested_ctx_cnt = ctx_cnt
+
+    # Try to allocate more queues when necessary
+    qcnt = len(_get_rx_cnts(cfg))
+    if qcnt < 2 + 2 * ctx_cnt:
+        try:
+            ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
+            ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
+            defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+        except:
+            raise KsftSkipEx("Not enough queues for the test")
+
+    ports = []
+
+    # Use queues 0 and 1 for normal traffic
+    ethtool(f"-X {cfg.ifname} equal 2")
+    defer(ethtool, f"-X {cfg.ifname} default")
+
+    for i in range(ctx_cnt):
+        want_cfg = f"start {2 + i * 2} equal 2"
+        create_cfg = want_cfg if create_with_cfg else ""
+
+        try:
+            ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}")
+            defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+        except CmdExitFailure:
+            # try to carry on and skip at the end
+            if i == 0:
+                raise
+            ksft_pr(f"Failed to create context {i + 1}, trying to test what we got")
+            ctx_cnt = i
+            if cfg.context_cnt is None:
+                cfg.context_cnt = ctx_cnt
+            break
+
+        _rss_key_check(cfg, context=ctx_id)
+
+        if not create_with_cfg:
+            ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}")
+            _rss_key_check(cfg, context=ctx_id)
+
+        # Sanity check the context we just created
+        data = get_rss(cfg, ctx_id)
+        ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data))
+        ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data))
+
+        ports.append(rand_port())
+        flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}"
+        ntuple = ethtool_create(cfg, "-N", flow)
+        defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
+
+        _ntuple_rule_check(cfg, ntuple, ctx_id)
+
+    for i in range(ctx_cnt):
+        _send_traffic_check(cfg, ports[i], f"context {i}",
+                            { 'target': (2+i*2, 3+i*2),
+                              'noise': (0, 1),
+                              'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) })
+
+    if requested_ctx_cnt != ctx_cnt:
+        raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
+
+
+def test_rss_context4(cfg):
+    test_rss_context(cfg, 4)
+
+
+def test_rss_context32(cfg):
+    test_rss_context(cfg, 32)
+
+
+def test_rss_context4_create_with_cfg(cfg):
+    test_rss_context(cfg, 4, create_with_cfg=True)
+
+
+def test_rss_context_queue_reconfigure(cfg):
+    test_rss_queue_reconfigure(cfg, main_ctx=False)
+
+
+def test_rss_context_out_of_order(cfg, ctx_cnt=4):
+    """
+    Test separating traffic into RSS contexts.
+    Contexts are removed in semi-random order, and steering re-tested
+    to make sure removal doesn't break steering to surviving contexts.
+    Test requires 3 contexts to work.
+    """
+
+    require_ntuple(cfg)
+    require_context_cnt(cfg, 4)
+
+    # Try to allocate more queues when necessary
+    qcnt = len(_get_rx_cnts(cfg))
+    if qcnt < 2 + 2 * ctx_cnt:
+        try:
+            ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
+            ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
+            defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+        except:
+            raise KsftSkipEx("Not enough queues for the test")
+
+    ntuple = []
+    ctx = []
+    ports = []
+
+    def remove_ctx(idx):
+        ntuple[idx].exec()
+        ntuple[idx] = None
+        ctx[idx].exec()
+        ctx[idx] = None
+
+    def check_traffic():
+        for i in range(ctx_cnt):
+            if ctx[i]:
+                expected = {
+                    'target': (2+i*2, 3+i*2),
+                    'noise': (0, 1),
+                    'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt))
+                }
+            else:
+                expected = {
+                    'target': (0, 1),
+                    'empty':  range(2, 2+2*ctx_cnt)
+                }
+
+            _send_traffic_check(cfg, ports[i], f"context {i}", expected)
+
+    # Use queues 0 and 1 for normal traffic
+    ethtool(f"-X {cfg.ifname} equal 2")
+    defer(ethtool, f"-X {cfg.ifname} default")
+
+    for i in range(ctx_cnt):
+        ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2")
+        ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete"))
+
+        ports.append(rand_port())
+        flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}"
+        ntuple_id = ethtool_create(cfg, "-N", flow)
+        ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}"))
+
+    check_traffic()
+
+    # Remove middle context
+    remove_ctx(ctx_cnt // 2)
+    check_traffic()
+
+    # Remove first context
+    remove_ctx(0)
+    check_traffic()
+
+    # Remove last context
+    remove_ctx(-1)
+    check_traffic()
+
+
+def test_rss_context_overlap(cfg, other_ctx=0):
+    """
+    Test contexts overlapping with each other.
+    Use 4 queues for the main context, but only queues 2 and 3 for context 1.
+    """
+
+    require_ntuple(cfg)
+    if other_ctx:
+        require_context_cnt(cfg, 2)
+
+    queue_cnt = len(_get_rx_cnts(cfg))
+    if queue_cnt < 4:
+        try:
+            ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+            ethtool(f"-L {cfg.ifname} combined 4")
+            defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+        except:
+            raise KsftSkipEx("Not enough queues for the test")
+
+    if other_ctx == 0:
+        ethtool(f"-X {cfg.ifname} equal 4")
+        defer(ethtool, f"-X {cfg.ifname} default")
+    else:
+        other_ctx = ethtool_create(cfg, "-X", "context new")
+        ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4")
+        defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete")
+
+    ctx_id = ethtool_create(cfg, "-X", "context new")
+    ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2")
+    defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+    port = rand_port()
+    if other_ctx:
+        flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {other_ctx}"
+        ntuple_id = ethtool_create(cfg, "-N", flow)
+        ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+    # Test the main context
+    cnts = _get_rx_cnts(cfg)
+    GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+    cnts = _get_rx_cnts(cfg, prev=cnts)
+
+    ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts))
+    ksft_ge(sum(cnts[ :2]),  7000, "traffic on main context (1/2): " + str(cnts))
+    ksft_ge(sum(cnts[2:4]),  7000, "traffic on main context (2/2): " + str(cnts))
+    if other_ctx == 0:
+        ksft_eq(sum(cnts[4: ]),     0, "traffic on other queues: " + str(cnts))
+
+    # Now create a rule for context 1 and make sure traffic goes to a subset
+    if other_ctx:
+        ntuple.exec()
+    flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}"
+    ntuple_id = ethtool_create(cfg, "-N", flow)
+    defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+    cnts = _get_rx_cnts(cfg)
+    GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+    cnts = _get_rx_cnts(cfg, prev=cnts)
+
+    directed = sum(cnts[2:4])
+    ksft_lt(sum(cnts[ :2]), directed / 2, "traffic on main context: " + str(cnts))
+    ksft_ge(directed, 20000, "traffic on extra context: " + str(cnts))
+    if other_ctx == 0:
+        ksft_eq(sum(cnts[4: ]),     0, "traffic on other queues: " + str(cnts))
+
+
+def test_rss_context_overlap2(cfg):
+    test_rss_context_overlap(cfg, True)
+
+
+def test_flow_add_context_missing(cfg):
+    """
+    Test that we are not allowed to add a rule pointing to an RSS context
+    which was never created.
+    """
+
+    require_ntuple(cfg)
+
+    # Find a context which doesn't exist
+    for ctx_id in range(1, 100):
+        try:
+            get_rss(cfg, context=ctx_id)
+        except CmdExitFailure:
+            break
+
+    with ksft_raises(CmdExitFailure) as cm:
+        flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port 1234 context {ctx_id}"
+        ntuple_id = ethtool_create(cfg, "-N", flow)
+        ethtool(f"-N {cfg.ifname} delete {ntuple_id}")
+    if cm.exception:
+        ksft_in('Invalid argument', cm.exception.cmd.stderr)
+
+
+def test_delete_rss_context_busy(cfg):
+    """
+    Test that deletion returns -EBUSY when an rss context is being used
+    by an ntuple filter.
+    """
+
+    require_ntuple(cfg)
+
+    # create additional rss context
+    ctx_id = ethtool_create(cfg, "-X", "context new")
+    ctx_deleter = defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+    # utilize context from ntuple filter
+    port = rand_port()
+    flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}"
+    ntuple_id = ethtool_create(cfg, "-N", flow)
+    defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+    # attempt to delete in-use context
+    try:
+        ctx_deleter.exec_only()
+        ctx_deleter.cancel()
+        raise KsftFailEx(f"deleted context {ctx_id} used by rule {ntuple_id}")
+    except CmdExitFailure:
+        pass
+
+
+def test_rss_ntuple_addition(cfg):
+    """
+    Test that the queue offset (ring_cookie) of an ntuple rule is added
+    to the queue number read from the indirection table.
+    """
+
+    require_ntuple(cfg)
+
+    queue_cnt = len(_get_rx_cnts(cfg))
+    if queue_cnt < 4:
+        try:
+            ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+            ethtool(f"-L {cfg.ifname} combined 4")
+            defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+        except:
+            raise KsftSkipEx("Not enough queues for the test")
+
+    # Use queue 0 for normal traffic
+    ethtool(f"-X {cfg.ifname} equal 1")
+    defer(ethtool, f"-X {cfg.ifname} default")
+
+    # create additional rss context
+    ctx_id = ethtool_create(cfg, "-X", "context new equal 2")
+    defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+    # utilize context from ntuple filter
+    port = rand_port()
+    flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 2"
+    try:
+        ntuple_id = ethtool_create(cfg, "-N", flow)
+    except CmdExitFailure:
+        raise KsftSkipEx("Ntuple filter with RSS and nonzero action not supported")
+    defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+    _send_traffic_check(cfg, port, f"context {ctx_id}", { 'target': (2, 3),
+                                                          'empty' : (1,),
+                                                          'noise' : (0,) })
+
+
+def test_rss_default_context_rule(cfg):
+    """
+    Allocate a port, direct this port to context 0, then create a new RSS
+    context and steer all TCP traffic to it (context 1).  Verify that:
+      * Traffic to the specific port continues to use queues of the main
+        context (0/1).
+      * Traffic to any other TCP port is redirected to the new context
+        (queues 2/3).
+    """
+
+    require_ntuple(cfg)
+
+    queue_cnt = len(_get_rx_cnts(cfg))
+    if queue_cnt < 4:
+        try:
+            ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+            ethtool(f"-L {cfg.ifname} combined 4")
+            defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+        except Exception as exc:
+            raise KsftSkipEx("Not enough queues for the test") from exc
+
+    # Use queues 0 and 1 for the main context
+    ethtool(f"-X {cfg.ifname} equal 2")
+    defer(ethtool, f"-X {cfg.ifname} default")
+
+    # Create a new RSS context that uses queues 2 and 3
+    ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2")
+    defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+    # Generic low-priority rule: redirect all TCP traffic to the new context.
+    # Give it an explicit higher location number (lower priority).
+    flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1"
+    ethtool(f"-N {cfg.ifname} {flow_generic}")
+    defer(ethtool, f"-N {cfg.ifname} delete 1")
+
+    # Specific high-priority rule for a random port that should stay on context 0.
+    # Assign loc 0 so it is evaluated before the generic rule.
+    port_main = rand_port()
+    flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0"
+    ethtool(f"-N {cfg.ifname} {flow_main}")
+    defer(ethtool, f"-N {cfg.ifname} delete 0")
+
+    _ntuple_rule_check(cfg, 1, ctx_id)
+
+    # Verify that traffic matching the specific rule still goes to queues 0/1
+    _send_traffic_check(cfg, port_main, "context 0",
+                        { 'target': (0, 1),
+                          'empty' : (2, 3) })
+
+    # And that traffic for any other port is steered to the new context
+    port_other = rand_port()
+    _send_traffic_check(cfg, port_other, f"context {ctx_id}",
+                        { 'target': (2, 3),
+                          'noise' : (0, 1) })
+
+
+def main() -> None:
+    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+        cfg.context_cnt = None
+        cfg.ethnl = EthtoolFamily()
+        cfg.netdevnl = NetdevFamily()
+
+        ksft_run([test_rss_key_indir, test_rss_queue_reconfigure,
+                  test_rss_resize, test_hitless_key_update,
+                  test_rss_context, test_rss_context4, test_rss_context32,
+                  test_rss_context_dump, test_rss_context_queue_reconfigure,
+                  test_rss_context_overlap, test_rss_context_overlap2,
+                  test_rss_context_out_of_order, test_rss_context4_create_with_cfg,
+                  test_flow_add_context_missing,
+                  test_delete_rss_context_busy, test_rss_ntuple_addition,
+                  test_rss_default_context_rule],
+                 args=(cfg, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
new file mode 100755
index 000000000000..6fa95fe27c47
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Tests for RSS hashing on IPv6 Flow Label.
+"""
+
+import glob
+import os
+import socket
+from lib.py import CmdExitFailure
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \
+    ksft_not_in, ksft_raises, KsftSkipEx
+from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port
+from lib.py import NetDrvEpEnv
+
+
+def _check_system(cfg):
+    if not hasattr(socket, "SO_INCOMING_CPU"):
+        raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
+    qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
+    if qcnt < 2:
+        raise KsftSkipEx(f"Local has only {qcnt} queues")
+
+    for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt",
+              f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]:
+        try:
+            with open(f, 'r') as fp:
+                setting = fp.read().strip()
+                # CPU mask will be zeros and commas
+                if setting.replace("0", "").replace(",", ""):
+                    raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}")
+        except FileNotFoundError:
+            pass
+
+    # 1 is the default, if someone changed it we probably shouldn"t mess with it
+    af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout
+    if af.strip() != "1":
+        raise KsftSkipEx("Remote does not have auto_flowlabels enabled")
+
+
+def _ethtool_get_cfg(cfg, fl_type):
+    descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+
+    converter = {
+        "IP SA": "s",
+        "IP DA": "d",
+        "L3 proto": "t",
+        "L4 bytes 0 & 1 [TCP/UDP src port]": "f",
+        "L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
+        "IPv6 Flow Label": "l",
+    }
+
+    ret = ""
+    for line in descr.split("\n")[1:-2]:
+        # if this raises we probably need to add more keys to converter above
+        ret += converter[line]
+    return ret
+
+
+def _traffic(cfg, one_sock, one_cpu):
+    local_port  = rand_port(socket.SOCK_DGRAM)
+    remote_port = rand_port(socket.SOCK_DGRAM)
+
+    sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
+    sock.bind(("", local_port))
+    sock.connect((cfg.remote_addr_v["6"], 0))
+    if one_sock:
+        send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \
+                "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-"
+    else:
+        send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \
+              f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done"
+
+    cpus = set()
+    with bkg(send, shell=True, host=cfg.remote, exit_wait=True):
+        for _ in range(20):
+            fd_read_timeout(sock.fileno(), 1)
+            cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
+            cpus.add(cpu)
+
+    if one_cpu:
+        ksft_eq(len(cpus), 1,
+                f"{one_sock=} - expected one CPU, got traffic on: {cpus=}")
+    else:
+        ksft_ge(len(cpus), 2,
+                f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}")
+
+
+def test_rss_flow_label(cfg):
+    """
+    Test hashing on IPv6 flow label. Send traffic over a single socket
+    and over multiple sockets. Depend on the remote having auto-label
+    enabled so that it randomizes the label per socket.
+    """
+
+    cfg.require_ipver("6")
+    cfg.require_cmd("socat", remote=True)
+    _check_system(cfg)
+
+    # Enable flow label hashing for UDP6
+    initial = _ethtool_get_cfg(cfg, "udp6")
+    no_lbl = initial.replace("l", "")
+    if "l" not in initial:
+        try:
+            cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}")
+        except CmdExitFailure as exc:
+            raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc
+
+        defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
+
+    _traffic(cfg, one_sock=True, one_cpu=True)
+    _traffic(cfg, one_sock=False, one_cpu=False)
+
+    # Disable it, we should see no hashing (reset was already defer()ed)
+    cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}")
+
+    _traffic(cfg, one_sock=False, one_cpu=True)
+
+
+def _check_v4_flow_types(cfg):
+    for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]:
+        try:
+            cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+            ksft_not_in("Flow Label", cur,
+                        comment=f"{fl_type=} has Flow Label:" + cur)
+        except CmdExitFailure:
+            # Probably does not support this flow type
+            pass
+
+
+def test_rss_flow_label_6only(cfg):
+    """
+    Test interactions with IPv4 flow types. It should not be possible to set
+    IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also
+    not appear in the IPv4 "current config".
+    """
+
+    with ksft_raises(CmdExitFailure) as cm:
+        cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl")
+    ksft_in("Invalid argument", cm.exception.cmd.stderr)
+
+    _check_v4_flow_types(cfg)
+
+    # Try to enable Flow Labels and check again, in case it leaks thru
+    initial = _ethtool_get_cfg(cfg, "udp6")
+    changed = initial.replace("l", "") if "l" in initial else initial + "l"
+
+    cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}")
+    restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
+
+    _check_v4_flow_types(cfg)
+    restore.exec()
+    _check_v4_flow_types(cfg)
+
+
+def main() -> None:
+    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+        ksft_run([test_rss_flow_label,
+                  test_rss_flow_label_6only],
+                 args=(cfg, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
new file mode 100755
index 000000000000..72880e388478
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import multiprocessing
+import socket
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout
+from lib.py import NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import rand_port
+
+
+def traffic(cfg, local_port, remote_port, ipver):
+    af_inet = socket.AF_INET if ipver == "4" else socket.AF_INET6
+    sock = socket.socket(af_inet, socket.SOCK_DGRAM)
+    sock.bind(("", local_port))
+    sock.connect((cfg.remote_addr_v[ipver], remote_port))
+    tgt = f"{ipver}:[{cfg.addr_v[ipver]}]:{local_port},sourceport={remote_port}"
+    cmd("echo a | socat - UDP" + tgt, host=cfg.remote)
+    fd_read_timeout(sock.fileno(), 5)
+    return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
+
+
+def test_rss_input_xfrm(cfg, ipver):
+    """
+    Test symmetric input_xfrm.
+    If symmetric RSS hash is configured, send traffic twice, swapping the
+    src/dst UDP ports, and verify that the same queue is receiving the traffic
+    in both cases (IPs are constant).
+    """
+
+    if multiprocessing.cpu_count() < 2:
+        raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
+
+    cfg.require_cmd("socat", local=False, remote=True)
+
+    if not hasattr(socket, "SO_INCOMING_CPU"):
+        raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
+    rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}})
+    input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {})))
+
+    # Check for symmetric xor/or-xor
+    if not input_xfrm:
+        raise KsftSkipEx("Symmetric RSS hash not requested")
+
+    cpus = set()
+    successful = 0
+    for _ in range(100):
+        try:
+            port1 = rand_port(socket.SOCK_DGRAM)
+            port2 = rand_port(socket.SOCK_DGRAM)
+            cpu1 = traffic(cfg, port1, port2, ipver)
+            cpu2 = traffic(cfg, port2, port1, ipver)
+            cpus.update([cpu1, cpu2])
+            ksft_eq(
+                cpu1, cpu2, comment=f"Received traffic on different cpus with ports ({port1 = }, {port2 = }) while symmetric hash is configured")
+
+            successful += 1
+            if successful == 10:
+                break
+        except:
+            continue
+    else:
+        raise KsftFailEx("Failed to run traffic")
+
+    ksft_ge(len(cpus), 2,
+            comment=f"Received traffic on less than two cpus {cpus = }")
+
+
+def test_rss_input_xfrm_ipv4(cfg):
+    cfg.require_ipver("4")
+    test_rss_input_xfrm(cfg, "4")
+
+
+def test_rss_input_xfrm_ipv6(cfg):
+    cfg.require_ipver("6")
+    test_rss_input_xfrm(cfg, "6")
+
+
+def main() -> None:
+    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+        cfg.ethnl = EthtoolFamily()
+        cfg.netdevnl = NetdevFamily()
+
+        ksft_run([test_rss_input_xfrm_ipv4, test_rss_input_xfrm_ipv6],
+                 args=(cfg, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/settings b/tools/testing/selftests/drivers/net/hw/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c
new file mode 100644
index 000000000000..d23b3b0c20a3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <ynl.h>
+#include "ethtool-user.h"
+
+#include "kselftest.h"
+#include "../../../net/lib/ksft.h"
+
+#define TOEPLITZ_KEY_MIN_LEN	40
+#define TOEPLITZ_KEY_MAX_LEN	60
+
+#define TOEPLITZ_STR_LEN(K)	(((K) * 3) - 1)	/* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN	TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN	TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN	((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16)	/* real constraint is PACKET_FANOUT_MAX */
+#define RSS_MAX_INDIR	(1 << 16)
+
+#define RPS_MAX_CPUS 16UL	/* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport =	8000;
+static int cfg_family =		AF_INET6;
+static char *cfg_ifname =	"eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type =		SOCK_STREAM;
+static int cfg_timeout_msec =	1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...)	do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+	int fd;
+	char *mmap;
+	int idx;
+	int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS];	/* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static unsigned int rss_indir_tbl[RSS_MAX_INDIR];
+static unsigned int rss_indir_tbl_size;
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+				const unsigned char *key)
+{
+	int i, bit, ret = 0;
+	uint32_t key32;
+
+	key32 = ntohl(*((uint32_t *)key));
+	key += 4;
+
+	for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+		for (bit = 7; bit >= 0; bit--) {
+			if (four_tuple[i] & (1 << bit))
+				ret ^= key32;
+
+			key32 <<= 1;
+			key32 |= !!(key[0] & (1 << bit));
+		}
+		key++;
+	}
+
+	return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+	int queue;
+
+	if (rss_indir_tbl_size)
+		queue = rss_indir_tbl[rx_hash % rss_indir_tbl_size];
+	else
+		queue = rx_hash % cfg_num_queues;
+
+	log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+	if (rx_irq_cpus[queue] != cpu) {
+		log_verbose(". error: rss cpu mismatch (%d)", cpu);
+		frames_error++;
+	}
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+	int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+	log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+	if (rps_silo_to_cpu[silo] != cpu) {
+		log_verbose(". error: rps cpu mismatch (%d)", cpu);
+		frames_error++;
+	}
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+		       const char *addrs, int addr_len)
+{
+	char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+	uint16_t *ports;
+
+	if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+	    !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+		error(1, 0, "address parse error");
+
+	ports = (void *)addrs + (addr_len * 2);
+	log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+		    cpu, rx_hash, saddr, daddr,
+		    ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+	unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+	uint32_t rx_hash_sw;
+	const char *addrs;
+	int addr_len;
+
+	if (cfg_family == AF_INET) {
+		addr_len = sizeof(struct in_addr);
+		addrs = pkt + offsetof(struct iphdr, saddr);
+	} else {
+		addr_len = sizeof(struct in6_addr);
+		addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+	}
+
+	memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+	rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+	if (cfg_verbose)
+		log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+	if (rx_hash != rx_hash_sw) {
+		log_verbose(" != expected 0x%x\n", rx_hash_sw);
+		frames_error++;
+		return;
+	}
+
+	log_verbose(" OK");
+	if (cfg_num_queues)
+		verify_rss(rx_hash, cpu);
+	else if (cfg_num_rps_cpus)
+		verify_rps(rx_hash, cpu);
+	log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+	struct tpacket3_hdr *hdr = (void *)frame;
+
+	if (hdr->hv1.tp_rxhash)
+		verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+			      ring->cpu);
+	else
+		frames_nohash++;
+
+	return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static bool recv_block(struct ring_state *ring)
+{
+	struct tpacket_block_desc *block;
+	char *frame;
+	int i;
+
+	block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+	if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+		return false;
+
+	frame = (char *)block;
+	frame += block->hdr.bh1.offset_to_first_pkt;
+
+	for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+		frame = recv_frame(ring, frame);
+		frames_received++;
+	}
+
+	block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+	ring->idx = (ring->idx + 1) % ring_block_nr;
+
+	return true;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+	int i;
+
+	usleep(1000 * cfg_timeout_msec);
+
+	for (i = 0; i < num_cpus; i++)
+		do {} while (recv_block(&rings[i]));
+
+	fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+		frames_received - frames_nohash - frames_error,
+		frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+	struct tpacket_req3 req3 = {0};
+	void *ring;
+
+	req3.tp_retire_blk_tov = cfg_timeout_msec / 8;
+	req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+	req3.tp_frame_size = 2048;
+	req3.tp_frame_nr = 1 << 10;
+	req3.tp_block_nr = 16;
+
+	req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+	req3.tp_block_size /= req3.tp_block_nr;
+
+	if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+		error(1, errno, "setsockopt PACKET_RX_RING");
+
+	ring_block_sz = req3.tp_block_size;
+	ring_block_nr = req3.tp_block_nr;
+
+	ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+		    PROT_READ | PROT_WRITE,
+		    MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+	if (ring == MAP_FAILED)
+		error(1, 0, "mmap failed");
+
+	return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+		BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, off_proto),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+		BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, off_dport),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+		BPF_STMT(BPF_RET + BPF_K, 0),
+		BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+	};
+	struct sock_fprog prog = {};
+
+	prog.filter = filter;
+	prog.len = ARRAY_SIZE(filter);
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+		error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+	const int off_dport = offsetof(struct tcphdr, dest);	/* same for udp */
+	uint8_t proto;
+
+	proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+	if (cfg_family == AF_INET)
+		__set_filter(fd, offsetof(struct iphdr, protocol), proto,
+			     sizeof(struct iphdr) + off_dport);
+	else
+		__set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+			     sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET + BPF_K, 0),
+	};
+	struct sock_fprog prog = {};
+
+	prog.filter = filter;
+	prog.len = ARRAY_SIZE(filter);
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+		error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+	struct fanout_args args = {
+		.id = 1,
+		.type_flags = PACKET_FANOUT_CPU,
+		.max_num_members = RSS_MAX_CPUS
+	};
+	struct sockaddr_ll ll = { 0 };
+	int fd, val;
+
+	fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket creation failed");
+
+	val = TPACKET_V3;
+	if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+		error(1, errno, "setsockopt PACKET_VERSION");
+	*ring = setup_ring(fd);
+
+	/* block packets until all rings are added to the fanout group:
+	 * else packets can arrive during setup and get misclassified
+	 */
+	set_filter_null(fd);
+
+	ll.sll_family = AF_PACKET;
+	ll.sll_ifindex = if_nametoindex(cfg_ifname);
+	ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+						  htons(ETH_P_IPV6);
+	if (bind(fd, (void *)&ll, sizeof(ll)))
+		error(1, errno, "bind");
+
+	/* must come after bind: verifies all programs in group match */
+	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+		/* on failure, retry using old API if that is sufficient:
+		 * it has a hard limit of 256 sockets, so only try if
+		 * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+		 * in this API, the third argument is left implicit.
+		 */
+		if (cfg_num_queues || num_cpus > 256 ||
+		    setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+			       &args, sizeof(uint32_t)))
+			error(1, errno, "setsockopt PACKET_FANOUT cpu");
+	}
+
+	return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+	int fd, val;
+
+	fd = socket(cfg_family, cfg_type, 0);
+	if (fd == -1)
+		error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+	val = 1 << 20;
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+		error(1, errno, "setsockopt rcvbuf");
+
+	return fd;
+}
+
+static void setup_rings(void)
+{
+	int i;
+
+	for (i = 0; i < num_cpus; i++) {
+		rings[i].cpu = i;
+		rings[i].fd = create_ring(&rings[i].mmap);
+	}
+
+	/* accept packets once all rings in the fanout group are up */
+	for (i = 0; i < num_cpus; i++)
+		set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+	int i;
+
+	for (i = 0; i < num_cpus; i++) {
+		if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+			error(1, errno, "munmap");
+		if (close(rings[i].fd))
+			error(1, errno, "close");
+	}
+}
+
+static void parse_cpulist(const char *arg)
+{
+	do {
+		rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+		arg = strchr(arg, ',');
+		if (!arg)
+			break;
+		arg++;			// skip ','
+	} while (1);
+}
+
+static void show_cpulist(void)
+{
+	int i;
+
+	for (i = 0; i < cfg_num_queues; i++)
+		fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+	int i;
+
+	for (i = 0; i < cfg_num_rps_cpus; i++)
+		fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+	int i, ret, off;
+
+	if (slen < TOEPLITZ_STR_MIN_LEN ||
+	    slen > TOEPLITZ_STR_MAX_LEN + 1)
+		error(1, 0, "invalid toeplitz key");
+
+	for (i = 0, off = 0; off < slen; i++, off += 3) {
+		ret = sscanf(str + off, "%hhx", &key[i]);
+		if (ret != 1)
+			error(1, 0, "key parse error at %d off %d len %d",
+			      i, off, slen);
+	}
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+	unsigned long bitmap;
+	int i;
+
+	bitmap = strtoul(arg, NULL, 0);
+
+	if (bitmap & ~(RPS_MAX_CPUS - 1))
+		error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+		      bitmap, RPS_MAX_CPUS - 1);
+
+	for (i = 0; i < RPS_MAX_CPUS; i++)
+		if (bitmap & 1UL << i)
+			rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void read_rss_dev_info_ynl(void)
+{
+	struct ethtool_rss_get_req *req;
+	struct ethtool_rss_get_rsp *rsp;
+	struct ynl_sock *ys;
+
+	ys = ynl_sock_create(&ynl_ethtool_family, NULL);
+	if (!ys)
+		error(1, errno, "ynl_sock_create failed");
+
+	req = ethtool_rss_get_req_alloc();
+	if (!req)
+		error(1, errno, "ethtool_rss_get_req_alloc failed");
+
+	ethtool_rss_get_req_set_header_dev_name(req, cfg_ifname);
+
+	rsp = ethtool_rss_get(ys, req);
+	if (!rsp)
+		error(1, ys->err.code, "YNL: %s", ys->err.msg);
+
+	if (!rsp->_len.hkey)
+		error(1, 0, "RSS key not available for %s", cfg_ifname);
+
+	if (rsp->_len.hkey < TOEPLITZ_KEY_MIN_LEN ||
+	    rsp->_len.hkey > TOEPLITZ_KEY_MAX_LEN)
+		error(1, 0, "RSS key length %u out of bounds [%u, %u]",
+		      rsp->_len.hkey, TOEPLITZ_KEY_MIN_LEN,
+		      TOEPLITZ_KEY_MAX_LEN);
+
+	memcpy(toeplitz_key, rsp->hkey, rsp->_len.hkey);
+
+	if (rsp->_count.indir > RSS_MAX_INDIR)
+		error(1, 0, "RSS indirection table too large (%u > %u)",
+		      rsp->_count.indir, RSS_MAX_INDIR);
+
+	/* If indir table not available we'll fallback to simple modulo math */
+	if (rsp->_count.indir) {
+		memcpy(rss_indir_tbl, rsp->indir,
+		       rsp->_count.indir * sizeof(rss_indir_tbl[0]));
+		rss_indir_tbl_size = rsp->_count.indir;
+
+		log_verbose("RSS indirection table size: %u\n",
+			    rss_indir_tbl_size);
+	}
+
+	ethtool_rss_get_rsp_free(rsp);
+	ethtool_rss_get_req_free(req);
+	ynl_sock_destroy(ys);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	static struct option long_options[] = {
+	    {"dport",	required_argument, 0, 'd'},
+	    {"cpus",	required_argument, 0, 'C'},
+	    {"key",	required_argument, 0, 'k'},
+	    {"iface",	required_argument, 0, 'i'},
+	    {"ipv4",	no_argument, 0, '4'},
+	    {"ipv6",	no_argument, 0, '6'},
+	    {"sink",	no_argument, 0, 's'},
+	    {"tcp",	no_argument, 0, 't'},
+	    {"timeout",	required_argument, 0, 'T'},
+	    {"udp",	no_argument, 0, 'u'},
+	    {"verbose",	no_argument, 0, 'v'},
+	    {"rps",	required_argument, 0, 'r'},
+	    {0, 0, 0, 0}
+	};
+	bool have_toeplitz = false;
+	int index, c;
+
+	while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) {
+		switch (c) {
+		case '4':
+			cfg_family = AF_INET;
+			break;
+		case '6':
+			cfg_family = AF_INET6;
+			break;
+		case 'C':
+			parse_cpulist(optarg);
+			break;
+		case 'd':
+			cfg_dport = strtol(optarg, NULL, 0);
+			break;
+		case 'i':
+			cfg_ifname = optarg;
+			break;
+		case 'k':
+			parse_toeplitz_key(optarg, strlen(optarg),
+					   toeplitz_key);
+			have_toeplitz = true;
+			break;
+		case 'r':
+			parse_rps_bitmap(optarg);
+			break;
+		case 's':
+			cfg_sink = true;
+			break;
+		case 't':
+			cfg_type = SOCK_STREAM;
+			break;
+		case 'T':
+			cfg_timeout_msec = strtol(optarg, NULL, 0);
+			break;
+		case 'u':
+			cfg_type = SOCK_DGRAM;
+			break;
+		case 'v':
+			cfg_verbose = true;
+			break;
+
+		default:
+			error(1, 0, "unknown option %c", optopt);
+			break;
+		}
+	}
+
+	if (!have_toeplitz)
+		read_rss_dev_info_ynl();
+
+	num_cpus = get_nprocs();
+	if (num_cpus > RSS_MAX_CPUS)
+		error(1, 0, "increase RSS_MAX_CPUS");
+
+	if (cfg_num_queues && cfg_num_rps_cpus)
+		error(1, 0,
+		      "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+	if (cfg_verbose) {
+		show_cpulist();
+		show_silos();
+	}
+}
+
+int main(int argc, char **argv)
+{
+	const int min_tests = 10;
+	int fd_sink = -1;
+
+	parse_opts(argc, argv);
+
+	if (cfg_sink)
+		fd_sink = setup_sink();
+
+	setup_rings();
+
+	/* Signal to test framework that we're ready to receive */
+	ksft_ready();
+
+	process_rings();
+	cleanup_rings();
+
+	if (cfg_sink && close(fd_sink))
+		error(1, errno, "close sink");
+
+	if (frames_received - frames_nohash < min_tests)
+		error(1, 0, "too few frames for verification");
+
+	return frames_error;
+}
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.py b/tools/testing/selftests/drivers/net/hw/toeplitz.py
new file mode 100755
index 000000000000..d2db5ee9e358
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Toeplitz Rx hashing test:
+ - rxhash (the hash value calculation itself);
+ - RSS mapping from rxhash to rx queue;
+ - RPS mapping from rxhash to cpu.
+"""
+
+import glob
+import os
+import socket
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import NetDrvEpEnv, EthtoolFamily, NetdevFamily
+from lib.py import cmd, bkg, rand_port, defer
+from lib.py import ksft_in
+from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, KsftFailEx
+
+# "define" for the ID of the Toeplitz hash function
+ETH_RSS_HASH_TOP = 1
+
+
+def _check_rps_and_rfs_not_configured(cfg):
+    """Verify that RPS is not already configured."""
+
+    for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
+        with open(rps_file, "r", encoding="utf-8") as fp:
+            val = fp.read().strip()
+            if set(val) - {"0", ","}:
+                raise KsftSkipEx(f"RPS already configured on {rps_file}: {val}")
+
+    rfs_file = "/proc/sys/net/core/rps_sock_flow_entries"
+    with open(rfs_file, "r", encoding="utf-8") as fp:
+        val = fp.read().strip()
+        if val != "0":
+            raise KsftSkipEx(f"RFS already configured {rfs_file}: {val}")
+
+
+def _get_cpu_for_irq(irq):
+    with open(f"/proc/irq/{irq}/smp_affinity_list", "r",
+              encoding="utf-8") as fp:
+        data = fp.read().strip()
+        if "," in data or "-" in data:
+            raise KsftFailEx(f"IRQ{irq} is not mapped to a single core: {data}")
+        return int(data)
+
+
+def _get_irq_cpus(cfg):
+    """
+    Read the list of IRQs for the device Rx queues.
+    """
+    queues = cfg.netnl.queue_get({"ifindex": cfg.ifindex}, dump=True)
+    napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+
+    # Remap into ID-based dicts
+    napis = {n["id"]: n for n in napis}
+    queues = {f"{q['type']}{q['id']}": q for q in queues}
+
+    cpus = []
+    for rx in range(9999):
+        name = f"rx{rx}"
+        if name not in queues:
+            break
+        cpus.append(_get_cpu_for_irq(napis[queues[name]["napi-id"]]["irq"]))
+
+    return cpus
+
+
+def _get_unused_cpus(cfg, count=2):
+    """
+    Get CPUs that are not used by Rx queues.
+    Returns a list of at least 'count' CPU numbers.
+    """
+
+    # Get CPUs used by Rx queues
+    rx_cpus = set(_get_irq_cpus(cfg))
+
+    # Get total number of CPUs
+    num_cpus = os.cpu_count()
+
+    # Find unused CPUs
+    unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus]
+
+    if len(unused_cpus) < count:
+        raise KsftSkipEx(f"Need at {count} CPUs not used by Rx queues, found {len(unused_cpus)}")
+
+    return unused_cpus[:count]
+
+
+def _configure_rps(cfg, rps_cpus):
+    """Configure RPS for all Rx queues."""
+
+    mask = 0
+    for cpu in rps_cpus:
+        mask |= (1 << cpu)
+    mask = hex(mask)[2:]
+
+    # Set RPS bitmap for all rx queues
+    for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
+        with open(rps_file, "w", encoding="utf-8") as fp:
+            fp.write(mask)
+
+    return mask
+
+
+def _send_traffic(cfg, proto_flag, ipver, port):
+    """Send 20 packets of requested type."""
+
+    # Determine protocol and IP version for socat
+    if proto_flag == "-u":
+        proto = "UDP"
+    else:
+        proto = "TCP"
+
+    baddr = f"[{cfg.addr_v['6']}]" if ipver == "6" else cfg.addr_v["4"]
+
+    # Run socat in a loop to send traffic periodically
+    # Use sh -c with a loop similar to toeplitz_client.sh
+    socat_cmd = f"""
+    for i in `seq 20`; do
+        echo "msg $i" | socat -{ipver} -t 0.1 - {proto}:{baddr}:{port};
+        sleep 0.001;
+    done
+    """
+
+    cmd(socat_cmd, shell=True, host=cfg.remote)
+
+
+def _test_variants():
+    for grp in ["", "rss", "rps"]:
+        for l4 in ["tcp", "udp"]:
+            for l3 in ["4", "6"]:
+                name = f"{l4}_ipv{l3}"
+                if grp:
+                    name = f"{grp}_{name}"
+                yield KsftNamedVariant(name, "-" + l4[0], l3, grp)
+
+
+@ksft_variants(_test_variants())
+def test(cfg, proto_flag, ipver, grp):
+    """Run a single toeplitz test."""
+
+    cfg.require_ipver(ipver)
+
+    # Check that rxhash is enabled
+    ksft_in("receive-hashing: on", cmd(f"ethtool -k {cfg.ifname}").stdout)
+
+    rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    # Make sure NIC is configured to use Toeplitz hash, and no key xfrm.
+    if rss.get('hfunc') != ETH_RSS_HASH_TOP or rss.get('input-xfrm'):
+        cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+                           "hfunc": ETH_RSS_HASH_TOP,
+                           "input-xfrm": {}})
+        defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},
+                                  "hfunc": rss.get('hfunc'),
+                                  "input-xfrm": rss.get('input-xfrm', {})
+                                  })
+
+    port = rand_port(socket.SOCK_DGRAM)
+
+    toeplitz_path = cfg.test_dir / "toeplitz"
+    rx_cmd = [
+        str(toeplitz_path),
+        "-" + ipver,
+        proto_flag,
+        "-d", str(port),
+        "-i", cfg.ifname,
+        "-T", "4000",
+        "-s",
+        "-v"
+    ]
+
+    if grp:
+        _check_rps_and_rfs_not_configured(cfg)
+    if grp == "rss":
+        irq_cpus = ",".join([str(x) for x in _get_irq_cpus(cfg)])
+        rx_cmd += ["-C", irq_cpus]
+        ksft_pr(f"RSS using CPUs: {irq_cpus}")
+    elif grp == "rps":
+        # Get CPUs not used by Rx queues and configure them for RPS
+        rps_cpus = _get_unused_cpus(cfg, count=2)
+        rps_mask = _configure_rps(cfg, rps_cpus)
+        defer(_configure_rps, cfg, [])
+        rx_cmd += ["-r", rps_mask]
+        ksft_pr(f"RPS using CPUs: {rps_cpus}, mask: {rps_mask}")
+
+    # Run rx in background, it will exit once it has seen enough packets
+    with bkg(" ".join(rx_cmd), ksft_ready=True, exit_wait=True) as rx_proc:
+        while rx_proc.proc.poll() is None:
+            _send_traffic(cfg, proto_flag, ipver, port)
+
+    # Check rx result
+    ksft_pr("Receiver output:")
+    ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
+    if rx_proc.stderr:
+        ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+
+
+def main() -> None:
+    """Ksft boilerplate main."""
+
+    with NetDrvEpEnv(__file__) as cfg:
+        cfg.ethnl = EthtoolFamily()
+        cfg.netnl = NetdevFamily()
+        ksft_run(cases=[test], args=(cfg,))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
new file mode 100755
index 000000000000..0998e68ebaf0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Run the tools/testing/selftests/net/csum testsuite."""
+
+import fcntl
+import socket
+import struct
+import termios
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_eq, ksft_ge, ksft_lt
+from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen
+
+
+def sock_wait_drain(sock, max_wait=1000):
+    """Wait for all pending write data on the socket to get ACKed."""
+    for _ in range(max_wait):
+        one = b'\0' * 4
+        outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one)
+        outq = struct.unpack("I", outq)[0]
+        if outq == 0:
+            break
+        time.sleep(0.01)
+    ksft_eq(outq, 0)
+
+
+def tcp_sock_get_retrans(sock):
+    """Get the number of retransmissions for the TCP socket."""
+    info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512)
+    return struct.unpack("I", info[100:104])[0]
+
+
+def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
+    cfg.require_cmd("socat", local=False, remote=True)
+
+    port = rand_port()
+    listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof"
+
+    with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
+        wait_port_listen(port, host=cfg.remote)
+
+        if ipver == "4":
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.connect((remote_v4, port))
+        else:
+            sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+            sock.connect((remote_v6, port))
+
+        # Small send to make sure the connection is working.
+        sock.send("ping".encode())
+        sock_wait_drain(sock)
+
+        # Send 4MB of data, record the LSO packet count.
+        qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+        buf = b"0" * 1024 * 1024 * 4
+        sock.send(buf)
+        sock_wait_drain(sock)
+        qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+        # Check that at least 90% of the data was sent as LSO packets.
+        # System noise may cause false negatives. Also header overheads
+        # will add up to 5% of extra packes... The check is best effort.
+        total_lso_wire  = len(buf) * 0.90 // cfg.dev["mtu"]
+        total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"]
+
+        # Make sure we have order of magnitude more LSO packets than
+        # retransmits, in case TCP retransmitted all the LSO packets.
+        ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4)
+        sock.close()
+
+        if should_lso:
+            if cfg.have_stat_super_count:
+                ksft_ge(qstat_new['tx-hw-gso-packets'] -
+                        qstat_old['tx-hw-gso-packets'],
+                        total_lso_super,
+                        comment="Number of LSO super-packets with LSO enabled")
+            if cfg.have_stat_wire_count:
+                ksft_ge(qstat_new['tx-hw-gso-wire-packets'] -
+                        qstat_old['tx-hw-gso-wire-packets'],
+                        total_lso_wire,
+                        comment="Number of LSO wire-packets with LSO enabled")
+        else:
+            if cfg.have_stat_super_count:
+                ksft_lt(qstat_new['tx-hw-gso-packets'] -
+                        qstat_old['tx-hw-gso-packets'],
+                        15, comment="Number of LSO super-packets with LSO disabled")
+            if cfg.have_stat_wire_count:
+                ksft_lt(qstat_new['tx-hw-gso-wire-packets'] -
+                        qstat_old['tx-hw-gso-wire-packets'],
+                        500, comment="Number of LSO wire-packets with LSO disabled")
+
+
+def build_tunnel(cfg, outer_ipver, tun_info):
+    local_v4  = NetDrvEpEnv.nsim_v4_pfx + "1"
+    local_v6  = NetDrvEpEnv.nsim_v6_pfx + "1"
+    remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2"
+    remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2"
+
+    local_addr  = cfg.addr_v[outer_ipver]
+    remote_addr = cfg.remote_addr_v[outer_ipver]
+
+    tun_type = tun_info[0]
+    tun_arg  = tun_info[1]
+    ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}")
+    defer(ip, f"link del {tun_type}-ksft")
+    ip(f"link set dev {tun_type}-ksft up")
+    ip(f"addr add {local_v4}/24 dev {tun_type}-ksft")
+    ip(f"addr add {local_v6}/64 dev {tun_type}-ksft")
+
+    ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}",
+        host=cfg.remote)
+    defer(ip, f"link del {tun_type}-ksft", host=cfg.remote)
+    ip(f"link set dev {tun_type}-ksft up", host=cfg.remote)
+    ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote)
+    ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote)
+
+    return remote_v4, remote_v6
+
+
+def restore_wanted_features(cfg):
+    features_cmd = ""
+    for feature in cfg.hw_features:
+        setting = "on" if feature in cfg.wanted_features else "off"
+        features_cmd += f" {feature} {setting}"
+    try:
+        ethtool(f"-K {cfg.ifname} {features_cmd}")
+    except Exception as e:
+        ksft_pr(f"WARNING: failure restoring wanted features: {e}")
+
+
+def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
+    """Construct specific tests from the common template."""
+    def f(cfg):
+        cfg.require_ipver(outer_ipver)
+        defer(restore_wanted_features, cfg)
+
+        if not cfg.have_stat_super_count and \
+           not cfg.have_stat_wire_count:
+            raise KsftSkipEx(f"Device does not support LSO queue stats")
+
+        if feature not in cfg.hw_features:
+            raise KsftSkipEx(f"Device does not support {feature}")
+
+        ipver = outer_ipver
+        if tun:
+            remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun)
+            ipver = inner_ipver
+        else:
+            remote_v4 = cfg.remote_addr_v["4"]
+            remote_v6 = cfg.remote_addr_v["6"]
+
+        # First test without the feature enabled.
+        ethtool(f"-K {cfg.ifname} {feature} off")
+        run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False)
+
+        ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+        ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
+        if feature in cfg.partial_features:
+            ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+            if ipver == "4":
+                ksft_pr("Testing with mangleid enabled")
+                ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
+
+        # Full feature enabled.
+        ethtool(f"-K {cfg.ifname} {feature} on")
+        run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
+
+    f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver
+    return f
+
+
+def query_nic_features(cfg) -> None:
+    """Query and cache the NIC features."""
+    cfg.have_stat_super_count = False
+    cfg.have_stat_wire_count = False
+
+    features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+
+    cfg.wanted_features = set()
+    for f in features["wanted"]["bits"]["bit"]:
+        cfg.wanted_features.add(f["name"])
+
+    cfg.hw_features = set()
+    hw_all_features_cmd = ""
+    for f in features["hw"]["bits"]["bit"]:
+        if f.get("value", False):
+            feature = f["name"]
+            cfg.hw_features.add(feature)
+            hw_all_features_cmd += f" {feature} on"
+    try:
+        ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}")
+    except Exception as e:
+        ksft_pr(f"WARNING: failure enabling all hw features: {e}")
+        ksft_pr("partial gso feature detection may be impacted")
+
+    # Check which features are supported via GSO partial
+    cfg.partial_features = set()
+    if 'tx-gso-partial' in cfg.hw_features:
+        ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+
+        no_partial = set()
+        features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+        for f in features["active"]["bits"]["bit"]:
+            no_partial.add(f["name"])
+        cfg.partial_features = cfg.hw_features - no_partial
+        ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+
+    restore_wanted_features(cfg)
+
+    stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)
+    if stats:
+        if 'tx-hw-gso-packets' in stats[0]:
+            ksft_pr("Detected qstat for LSO super-packets")
+            cfg.have_stat_super_count = True
+        if 'tx-hw-gso-wire-packets' in stats[0]:
+            ksft_pr("Detected qstat for LSO wire-packets")
+            cfg.have_stat_wire_count = True
+
+
+def main() -> None:
+    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+        cfg.ethnl = EthtoolFamily()
+        cfg.netnl = NetdevFamily()
+
+        query_nic_features(cfg)
+
+        test_info = (
+            # name,       v4/v6  ethtool_feature               tun:(type, args, inner ip versions)
+            ("",           "4", "tx-tcp-segmentation",         None),
+            ("",           "6", "tx-tcp6-segmentation",        None),
+            ("vxlan",      "4", "tx-udp_tnl-segmentation",     ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))),
+            ("vxlan",      "6", "tx-udp_tnl-segmentation",     ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))),
+            ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))),
+            ("gre",        "4", "tx-gre-segmentation",         ("gre",   "", ("4", "6"))),
+            ("gre",        "6", "tx-gre-segmentation",         ("ip6gre","", ("4", "6"))),
+        )
+
+        cases = []
+        for outer_ipver in ["4", "6"]:
+            for info in test_info:
+                # Skip if test which only works for a specific IP version
+                if info[1] and outer_ipver != info[1]:
+                    continue
+
+                if info[3]:
+                    cases += [
+                        test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver)
+                        for inner_ipver in info[3][2]
+                    ]
+                else:
+                    cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver))
+
+        ksft_run(cases=cases, args=(cfg, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
new file mode 100755
index 000000000000..d19d1d518208
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+# This is intended to be run on a virtio-net guest interface.
+# The test binds the XDP socket to the interface without setting
+# the fill ring to trigger delayed refill_work. This helps to
+# make it easier to reproduce the deadlock when XDP program,
+# XDP socket bind/unbind, rx ring resize race with refill_work on
+# the buggy kernel.
+#
+# The Qemu command to setup virtio-net
+# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no
+# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on
+
+from lib.py import ksft_exit, ksft_run
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetDrvEnv
+from lib.py import bkg, ip, cmd, ethtool
+import time
+
+def _get_rx_ring_entries(cfg):
+    output = ethtool(f"-g {cfg.ifname}", json=True)
+    return output[0]["rx"]
+
+def setup_xsk(cfg, xdp_queue_id = 0) -> bkg:
+    # Probe for support
+    xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
+    if xdp.ret == 255:
+        raise KsftSkipEx('AF_XDP unsupported')
+    elif xdp.ret > 0:
+        raise KsftFailEx('unable to create AF_XDP socket')
+
+    try:
+        return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \
+                   '{xdp_queue_id} -z', ksft_wait=3)
+    except:
+        raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \
+                         'Please consider adding iommu_platform=on ' \
+                         'when setting up virtio-net-pci')
+
+def check_xdp_bind(cfg):
+    with setup_xsk(cfg):
+        ip(f"link set dev %s xdp obj %s sec xdp" %
+           (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
+        ip(f"link set dev %s xdp off" % cfg.ifname)
+
+def check_rx_resize(cfg):
+    with setup_xsk(cfg):
+        rx_ring = _get_rx_ring_entries(cfg)
+        ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2))
+        ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring))
+
+def main():
+    with NetDrvEnv(__file__, nsim_test=False) as cfg:
+        ksft_run([check_xdp_bind, check_rx_resize],
+                 args=(cfg, ))
+    ksft_exit()
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
new file mode 100644
index 000000000000..8b75faa9af6d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Driver test environment.
+NetDrvEnv and NetDrvEpEnv are the main environment classes.
+Former is for local host only tests, latter creates / connects
+to a remote endpoint. See NIPA wiki for more information about
+running and writing driver tests.
+"""
+
+import sys
+from pathlib import Path
+
+KSFT_DIR = (Path(__file__).parent / "../../../..").resolve()
+
+try:
+    sys.path.append(KSFT_DIR.as_posix())
+
+    # Import one by one to avoid pylint false positives
+    from net.lib.py import NetNS, NetNSEnter, NetdevSimDev
+    from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+        NlError, RtnlFamily, DevlinkFamily, PSPFamily
+    from net.lib.py import CmdExitFailure
+    from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
+        fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
+    from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+    from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+        ksft_setup, ksft_variants, KsftNamedVariant
+    from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+        ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
+
+    __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
+               "EthtoolFamily", "NetdevFamily", "NetshaperFamily",
+               "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily",
+               "CmdExitFailure",
+               "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
+               "fd_read_timeout", "ip", "rand_port",
+               "wait_port_listen", "wait_file",
+               "KsftSkipEx", "KsftFailEx", "KsftXfailEx",
+               "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
+               "ksft_setup", "ksft_variants", "KsftNamedVariant",
+               "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
+               "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
+               "ksft_not_none", "ksft_not_none"]
+
+    from .env import NetDrvEnv, NetDrvEpEnv
+    from .load import GenerateTraffic, Iperf3Runner
+    from .remote import Remote
+
+    __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+                "Iperf3Runner"]
+except ModuleNotFoundError as e:
+    print("Failed importing `net` library from kernel sources")
+    print(str(e))
+    sys.exit(4)
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
new file mode 100644
index 000000000000..8b644fd84ff2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -0,0 +1,287 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import time
+from pathlib import Path
+from lib.py import KsftSkipEx, KsftXfailEx
+from lib.py import ksft_setup, wait_file
+from lib.py import cmd, ethtool, ip, CmdExitFailure
+from lib.py import NetNS, NetdevSimDev
+from .remote import Remote
+
+
+class NetDrvEnvBase:
+    """
+    Base class for a NIC / host environments
+
+    Attributes:
+      test_dir: Path to the source directory of the test
+      net_lib_dir: Path to the net/lib directory
+    """
+    def __init__(self, src_path):
+        self.src_path = Path(src_path)
+        self.test_dir = self.src_path.parent.resolve()
+        self.net_lib_dir = (Path(__file__).parent / "../../../../net/lib").resolve()
+
+        self.env = self._load_env_file()
+
+        # Following attrs must be set be inheriting classes
+        self.dev = None
+
+    def _load_env_file(self):
+        env = os.environ.copy()
+
+        src_dir = Path(self.src_path).parent.resolve()
+        if not (src_dir / "net.config").exists():
+            return ksft_setup(env)
+
+        with open((src_dir / "net.config").as_posix(), 'r') as fp:
+            for line in fp.readlines():
+                full_file = line
+                # Strip comments
+                pos = line.find("#")
+                if pos >= 0:
+                    line = line[:pos]
+                line = line.strip()
+                if not line:
+                    continue
+                pair = line.split('=', maxsplit=1)
+                if len(pair) != 2:
+                    raise Exception("Can't parse configuration line:", full_file)
+                env[pair[0]] = pair[1]
+        return ksft_setup(env)
+
+    def __del__(self):
+        pass
+
+    def __enter__(self):
+        ip(f"link set dev {self.dev['ifname']} up")
+        wait_file(f"/sys/class/net/{self.dev['ifname']}/carrier",
+                  lambda x: x.strip() == "1")
+
+        return self
+
+    def __exit__(self, ex_type, ex_value, ex_tb):
+        """
+        __exit__ gets called at the end of a "with" block.
+        """
+        self.__del__()
+
+
+class NetDrvEnv(NetDrvEnvBase):
+    """
+    Class for a single NIC / host env, with no remote end
+    """
+    def __init__(self, src_path, nsim_test=None, **kwargs):
+        super().__init__(src_path)
+
+        self._ns = None
+
+        if 'NETIF' in self.env:
+            if nsim_test is True:
+                raise KsftXfailEx("Test only works on netdevsim")
+
+            self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0]
+        else:
+            if nsim_test is False:
+                raise KsftXfailEx("Test does not work on netdevsim")
+
+            self._ns = NetdevSimDev(**kwargs)
+            self.dev = self._ns.nsims[0].dev
+        self.ifname = self.dev['ifname']
+        self.ifindex = self.dev['ifindex']
+
+    def __del__(self):
+        if self._ns:
+            self._ns.remove()
+            self._ns = None
+
+
+class NetDrvEpEnv(NetDrvEnvBase):
+    """
+    Class for an environment with a local device and "remote endpoint"
+    which can be used to send traffic in.
+
+    For local testing it creates two network namespaces and a pair
+    of netdevsim devices.
+    """
+
+    # Network prefixes used for local tests
+    nsim_v4_pfx = "192.0.2."
+    nsim_v6_pfx = "2001:db8::"
+
+    def __init__(self, src_path, nsim_test=None):
+        super().__init__(src_path)
+
+        self._stats_settle_time = None
+
+        # Things we try to destroy
+        self.remote = None
+        # These are for local testing state
+        self._netns = None
+        self._ns = None
+        self._ns_peer = None
+
+        self.addr_v        = { "4": None, "6": None }
+        self.remote_addr_v = { "4": None, "6": None }
+
+        if "NETIF" in self.env:
+            if nsim_test is True:
+                raise KsftXfailEx("Test only works on netdevsim")
+            self._check_env()
+
+            self.dev = ip("-d link show dev " + self.env['NETIF'], json=True)[0]
+
+            self.addr_v["4"] = self.env.get("LOCAL_V4")
+            self.addr_v["6"] = self.env.get("LOCAL_V6")
+            self.remote_addr_v["4"] = self.env.get("REMOTE_V4")
+            self.remote_addr_v["6"] = self.env.get("REMOTE_V6")
+            kind = self.env["REMOTE_TYPE"]
+            args = self.env["REMOTE_ARGS"]
+        else:
+            if nsim_test is False:
+                raise KsftXfailEx("Test does not work on netdevsim")
+
+            self.create_local()
+
+            self.dev = self._ns.nsims[0].dev
+
+            self.addr_v["4"] = self.nsim_v4_pfx + "1"
+            self.addr_v["6"] = self.nsim_v6_pfx + "1"
+            self.remote_addr_v["4"] = self.nsim_v4_pfx + "2"
+            self.remote_addr_v["6"] = self.nsim_v6_pfx + "2"
+            kind = "netns"
+            args = self._netns.name
+
+        self.remote = Remote(kind, args, src_path)
+
+        self.addr_ipver = "6" if self.addr_v["6"] else "4"
+        self.addr = self.addr_v[self.addr_ipver]
+        self.remote_addr = self.remote_addr_v[self.addr_ipver]
+
+        # Bracketed addresses, some commands need IPv6 to be inside []
+        self.baddr = f"[{self.addr_v['6']}]" if self.addr_v["6"] else self.addr_v["4"]
+        self.remote_baddr = f"[{self.remote_addr_v['6']}]" if self.remote_addr_v["6"] else self.remote_addr_v["4"]
+
+        self.ifname = self.dev['ifname']
+        self.ifindex = self.dev['ifindex']
+
+        # resolve remote interface name
+        self.remote_ifname = self.resolve_remote_ifc()
+        self.remote_dev = ip("-d link show dev " + self.remote_ifname,
+                             host=self.remote, json=True)[0]
+
+        self._required_cmd = {}
+
+    def create_local(self):
+        self._netns = NetNS()
+        self._ns = NetdevSimDev()
+        self._ns_peer = NetdevSimDev(ns=self._netns)
+
+        with open("/proc/self/ns/net") as nsfd0, \
+             open("/var/run/netns/" + self._netns.name) as nsfd1:
+            ifi0 = self._ns.nsims[0].ifindex
+            ifi1 = self._ns_peer.nsims[0].ifindex
+            NetdevSimDev.ctrl_write('link_device',
+                                    f'{nsfd0.fileno()}:{ifi0} {nsfd1.fileno()}:{ifi1}')
+
+        ip(f"   addr add dev {self._ns.nsims[0].ifname} {self.nsim_v4_pfx}1/24")
+        ip(f"-6 addr add dev {self._ns.nsims[0].ifname} {self.nsim_v6_pfx}1/64 nodad")
+        ip(f"   link set dev {self._ns.nsims[0].ifname} up")
+
+        ip(f"   addr add dev {self._ns_peer.nsims[0].ifname} {self.nsim_v4_pfx}2/24", ns=self._netns)
+        ip(f"-6 addr add dev {self._ns_peer.nsims[0].ifname} {self.nsim_v6_pfx}2/64 nodad", ns=self._netns)
+        ip(f"   link set dev {self._ns_peer.nsims[0].ifname} up", ns=self._netns)
+
+    def _check_env(self):
+        vars_needed = [
+            ["LOCAL_V4", "LOCAL_V6"],
+            ["REMOTE_V4", "REMOTE_V6"],
+            ["REMOTE_TYPE"],
+            ["REMOTE_ARGS"]
+        ]
+        missing = []
+
+        for choice in vars_needed:
+            for entry in choice:
+                if entry in self.env:
+                    break
+            else:
+                missing.append(choice)
+        # Make sure v4 / v6 configs are symmetric
+        if ("LOCAL_V6" in self.env) != ("REMOTE_V6" in self.env):
+            missing.append(["LOCAL_V6", "REMOTE_V6"])
+        if ("LOCAL_V4" in self.env) != ("REMOTE_V4" in self.env):
+            missing.append(["LOCAL_V4", "REMOTE_V4"])
+        if missing:
+            raise Exception("Invalid environment, missing configuration:", missing,
+                            "Please see tools/testing/selftests/drivers/net/README.rst")
+
+    def resolve_remote_ifc(self):
+        v4 = v6 = None
+        if self.remote_addr_v["4"]:
+            v4 = ip("addr show to " + self.remote_addr_v["4"], json=True, host=self.remote)
+        if self.remote_addr_v["6"]:
+            v6 = ip("addr show to " + self.remote_addr_v["6"], json=True, host=self.remote)
+        if v4 and v6 and v4[0]["ifname"] != v6[0]["ifname"]:
+            raise Exception("Can't resolve remote interface name, v4 and v6 don't match")
+        if (v4 and len(v4) > 1) or (v6 and len(v6) > 1):
+            raise Exception("Can't resolve remote interface name, multiple interfaces match")
+        return v6[0]["ifname"] if v6 else v4[0]["ifname"]
+
+    def __del__(self):
+        if self._ns:
+            self._ns.remove()
+            self._ns = None
+        if self._ns_peer:
+            self._ns_peer.remove()
+            self._ns_peer = None
+        if self._netns:
+            del self._netns
+            self._netns = None
+        if self.remote:
+            del self.remote
+            self.remote = None
+
+    def require_ipver(self, ipver):
+        if not self.addr_v[ipver] or not self.remote_addr_v[ipver]:
+            raise KsftSkipEx(f"Test requires IPv{ipver} connectivity")
+
+    def require_nsim(self):
+        if self._ns is None:
+            raise KsftXfailEx("Test only works on netdevsim")
+
+    def _require_cmd(self, comm, key, host=None):
+        cached = self._required_cmd.get(comm, {})
+        if cached.get(key) is None:
+            cached[key] = cmd("command -v -- " + comm, fail=False,
+                              shell=True, host=host).ret == 0
+        self._required_cmd[comm] = cached
+        return cached[key]
+
+    def require_cmd(self, comm, local=True, remote=False):
+        if local:
+            if not self._require_cmd(comm, "local"):
+                raise KsftSkipEx("Test requires command: " + comm)
+        if remote:
+            if not self._require_cmd(comm, "remote", host=self.remote):
+                raise KsftSkipEx("Test requires (remote) command: " + comm)
+
+    def wait_hw_stats_settle(self):
+        """
+        Wait for HW stats to become consistent, some devices DMA HW stats
+        periodically so events won't be reflected until next sync.
+        Good drivers will tell us via ethtool what their sync period is.
+        """
+        if self._stats_settle_time is None:
+            data = {}
+            try:
+                data = ethtool("-c " + self.ifname, json=True)[0]
+            except CmdExitFailure as e:
+                if "Operation not supported" not in e.cmd.stderr:
+                    raise
+
+            self._stats_settle_time = 0.025 + \
+                data.get('stats-block-usecs', 0) / 1000 / 1000
+
+        time.sleep(self._stats_settle_time)
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
new file mode 100644
index 000000000000..f181fa2d38fc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -0,0 +1,139 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import re
+import time
+import json
+
+from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
+
+
+class Iperf3Runner:
+    """
+    Sets up and runs iperf3 traffic.
+    """
+    def __init__(self, env, port=None, server_ip=None, client_ip=None):
+        env.require_cmd("iperf3", local=True, remote=True)
+        self.env = env
+        self.port = rand_port() if port is None else port
+        self.server_ip = server_ip
+        self.client_ip = client_ip
+
+    def _build_server(self):
+        cmdline = f"iperf3 -s -1 -p {self.port}"
+        if self.server_ip:
+            cmdline += f" -B {self.server_ip}"
+        return cmdline
+
+    def _build_client(self, streams, duration, reverse):
+        host = self.env.addr if self.server_ip is None else self.server_ip
+        cmdline = f"iperf3 -c {host} -p {self.port} -P {streams} -t {duration} -J"
+        if self.client_ip:
+            cmdline += f" -B {self.client_ip}"
+        if reverse:
+            cmdline += " --reverse"
+        return cmdline
+
+    def start_server(self):
+        """
+        Starts an iperf3 server with optional bind IP.
+        """
+        cmdline = self._build_server()
+        proc = cmd(cmdline, background=True)
+        wait_port_listen(self.port)
+        time.sleep(0.1)
+        return proc
+
+    def start_client(self, background=False, streams=1, duration=10, reverse=False):
+        """
+        Starts the iperf3 client with the configured options.
+        """
+        cmdline = self._build_client(streams, duration, reverse)
+        return cmd(cmdline, background=background, host=self.env.remote)
+
+    def measure_bandwidth(self, reverse=False):
+        """
+        Runs an iperf3 measurement and returns the average bandwidth (Gbps).
+        Discards the first and last few reporting intervals and uses only the
+        middle part of the run where throughput is typically stable.
+        """
+        self.start_server()
+        result = self.start_client(duration=10, reverse=reverse)
+
+        if result.ret != 0:
+            raise RuntimeError("iperf3 failed to run successfully")
+        try:
+            out = json.loads(result.stdout)
+        except json.JSONDecodeError as exc:
+            raise ValueError("Failed to parse iperf3 JSON output") from exc
+
+        intervals = out.get("intervals", [])
+        samples = [i["sum"]["bits_per_second"] / 1e9 for i in intervals]
+        if len(samples) < 10:
+            raise ValueError(f"iperf3 returned too few intervals: {len(samples)}")
+        # Discard potentially unstable first and last 3 seconds.
+        stable = samples[3:-3]
+
+        avg = sum(stable) / len(stable)
+
+        return avg
+
+
+class GenerateTraffic:
+    def __init__(self, env, port=None):
+        self.env = env
+        self.runner = Iperf3Runner(env, port)
+
+        self._iperf_server = self.runner.start_server()
+        self._iperf_client = self.runner.start_client(background=True, streams=16, duration=86400)
+
+        # Wait for traffic to ramp up
+        if not self._wait_pkts(pps=1000):
+            self.stop(verbose=True)
+            raise Exception("iperf3 traffic did not ramp up")
+
+    def _wait_pkts(self, pkt_cnt=None, pps=None):
+        """
+        Wait until we've seen pkt_cnt or until traffic ramps up to pps.
+        Only one of pkt_cnt or pss can be specified.
+        """
+        pkt_start = ip("-s link show dev " + self.env.ifname, json=True)[0]["stats64"]["rx"]["packets"]
+        for _ in range(50):
+            time.sleep(0.1)
+            pkt_now = ip("-s link show dev " + self.env.ifname, json=True)[0]["stats64"]["rx"]["packets"]
+            if pps:
+                if pkt_now - pkt_start > pps / 10:
+                    return True
+                pkt_start = pkt_now
+            elif pkt_cnt:
+                if pkt_now - pkt_start > pkt_cnt:
+                    return True
+        return False
+
+    def wait_pkts_and_stop(self, pkt_cnt):
+        failed = not self._wait_pkts(pkt_cnt=pkt_cnt)
+        self.stop(verbose=failed)
+
+    def stop(self, verbose=None):
+        self._iperf_client.process(terminate=True)
+        if verbose:
+            ksft_pr(">> Client:")
+            ksft_pr(self._iperf_client.stdout)
+            ksft_pr(self._iperf_client.stderr)
+        self._iperf_server.process(terminate=True)
+        if verbose:
+            ksft_pr(">> Server:")
+            ksft_pr(self._iperf_server.stdout)
+            ksft_pr(self._iperf_server.stderr)
+        self._wait_client_stopped()
+
+    def _wait_client_stopped(self, sleep=0.005, timeout=5):
+        end = time.monotonic() + timeout
+
+        live_port_pattern = re.compile(fr":{self.runner.port:04X} 0[^6] ")
+
+        while time.monotonic() < end:
+            data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout
+            if not live_port_pattern.search(data):
+                return
+            time.sleep(sleep)
+        raise Exception(f"Waiting for client to stop timed out after {timeout}s")
diff --git a/tools/testing/selftests/drivers/net/lib/py/remote.py b/tools/testing/selftests/drivers/net/lib/py/remote.py
new file mode 100644
index 000000000000..b1780b987722
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/remote.py
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import importlib
+
+_modules = {}
+
+def Remote(kind, args, src_path):
+    global _modules
+
+    if kind not in _modules:
+        _modules[kind] = importlib.import_module("..remote_" + kind, __name__)
+
+    dir_path = os.path.abspath(src_path + "/../")
+    return getattr(_modules[kind], "Remote")(args, dir_path)
diff --git a/tools/testing/selftests/drivers/net/lib/py/remote_netns.py b/tools/testing/selftests/drivers/net/lib/py/remote_netns.py
new file mode 100644
index 000000000000..7d5eeb0271bc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/remote_netns.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import subprocess
+
+from lib.py import cmd
+
+
+class Remote:
+    def __init__(self, name, dir_path):
+        self.name = name
+        self.dir_path = dir_path
+
+    def cmd(self, comm):
+        return subprocess.Popen(["ip", "netns", "exec", self.name, "bash", "-c", comm],
+                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+    def deploy(self, what):
+        if os.path.isabs(what):
+            return what
+        return os.path.abspath(self.dir_path + "/" + what)
diff --git a/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py b/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py
new file mode 100644
index 000000000000..924addde19a3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import string
+import subprocess
+import random
+
+from lib.py import cmd
+
+
+class Remote:
+    def __init__(self, name, dir_path):
+        self.name = name
+        self.dir_path = dir_path
+        self._tmpdir = None
+
+    def __del__(self):
+        if self._tmpdir:
+            cmd("rm -rf " + self._tmpdir, host=self)
+            self._tmpdir = None
+
+    def cmd(self, comm):
+        return subprocess.Popen(["ssh", "-q", self.name, comm],
+                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+    def _mktmp(self):
+        return ''.join(random.choice(string.ascii_lowercase) for _ in range(8))
+
+    def deploy(self, what):
+        if not self._tmpdir:
+            self._tmpdir = "/tmp/" + self._mktmp()
+            cmd("mkdir " + self._tmpdir, host=self)
+        file_name = self._tmpdir + "/" + self._mktmp() + os.path.basename(what)
+
+        if not os.path.isabs(what):
+            what = os.path.abspath(self.dir_path + "/" + what)
+
+        cmd(f"scp {what} {self.name}:{file_name}")
+        return file_name
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
new file mode 100644
index 000000000000..ae8abff4be40
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -0,0 +1,419 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This file contains functions and helpers to support the netconsole
+# selftests
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+SRCIF="" # to be populated later
+SRCIP="" # to be populated later
+SRCIP4="192.0.2.1"
+SRCIP6="fc00::1"
+DSTIF="" # to be populated later
+DSTIP="" # to be populated later
+DSTIP4="192.0.2.2"
+DSTIP6="fc00::2"
+
+PORT="6666"
+MSG="netconsole selftest"
+USERDATA_KEY="key"
+USERDATA_VALUE="value"
+TARGET=$(mktemp -u netcons_XXXXX)
+DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk)
+NETCONS_CONFIGFS="/sys/kernel/config/netconsole"
+NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
+# NAMESPACE will be populated by setup_ns with a random value
+NAMESPACE=""
+
+# IDs for netdevsim. We either use NSIM_DEV_{1,2}_ID for standard test
+# or NSIM_BOND_{T,R}X_{1,2} for the bonding tests. Not both at the
+# same time.
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_BOND_TX_1=$((768 + RANDOM % 256))
+NSIM_BOND_TX_2=$((1024 + RANDOM % 256))
+NSIM_BOND_RX_1=$((1280 + RANDOM % 256))
+NSIM_BOND_RX_2=$((1536 + RANDOM % 256))
+NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
+NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
+
+# Used to create and delete namespaces
+source "${LIBDIR}"/../../../../net/lib.sh
+
+# Create netdevsim interfaces
+create_ifaces() {
+	echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
+	echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
+	udevadm settle 2> /dev/null || true
+
+	local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID"
+	local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID"
+
+	# These are global variables
+	SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \
+		-path "$NSIM1"/net -exec basename {} \;)
+	DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \
+		-path "$NSIM2"/net -exec basename {} \;)
+}
+
+link_ifaces() {
+	local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
+	local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex)
+	local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex)
+
+	exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}"
+	exec {INITNS_FD}</proc/self/ns/net
+
+	# Bind the dst interface to namespace
+	ip link set "${DSTIF}" netns "${NAMESPACE}"
+
+	# Linking one device to the other one (on the other namespace}
+	if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX"  > $NSIM_DEV_SYS_LINK
+	then
+		echo "linking netdevsim1 with netdevsim2 should succeed"
+		cleanup
+		exit "${ksft_skip}"
+	fi
+}
+
+function configure_ip() {
+	# Configure the IPs for both interfaces
+	ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}"
+	ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up
+
+	ip addr add "${SRCIP}"/24 dev "${SRCIF}"
+	ip link set "${SRCIF}" up
+}
+
+function select_ipv4_or_ipv6()
+{
+	local VERSION=${1}
+
+	if [[ "$VERSION" == "ipv6" ]]
+	then
+		DSTIP="${DSTIP6}"
+		SRCIP="${SRCIP6}"
+	else
+		DSTIP="${DSTIP4}"
+		SRCIP="${SRCIP4}"
+	fi
+}
+
+function set_network() {
+	local IP_VERSION=${1:-"ipv4"}
+
+	# setup_ns function is coming from lib.sh
+	setup_ns NAMESPACE
+
+	# Create both interfaces, and assign the destination to a different
+	# namespace
+	create_ifaces
+
+	# Link both interfaces back to back
+	link_ifaces
+
+	select_ipv4_or_ipv6 "${IP_VERSION}"
+	configure_ip
+}
+
+function _create_dynamic_target() {
+	local FORMAT="${1:?FORMAT parameter required}"
+	local NCPATH="${2:?NCPATH parameter required}"
+
+	DSTMAC=$(ip netns exec "${NAMESPACE}" \
+		 ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+
+	# Create a dynamic target
+	mkdir "${NCPATH}"
+
+	echo "${DSTIP}" > "${NCPATH}"/remote_ip
+	echo "${SRCIP}" > "${NCPATH}"/local_ip
+	echo "${DSTMAC}" > "${NCPATH}"/remote_mac
+	echo "${SRCIF}" > "${NCPATH}"/dev_name
+
+	if [ "${FORMAT}" == "basic" ]
+	then
+		# Basic target does not support release
+		echo 0 > "${NCPATH}"/release
+		echo 0 > "${NCPATH}"/extended
+	elif [ "${FORMAT}" == "extended" ]
+	then
+		echo 1 > "${NCPATH}"/extended
+	fi
+}
+
+function create_dynamic_target() {
+	local FORMAT=${1:-"extended"}
+	local NCPATH=${2:-"$NETCONS_PATH"}
+	_create_dynamic_target "${FORMAT}" "${NCPATH}"
+
+	echo 1 > "${NCPATH}"/enabled
+
+	# This will make sure that the kernel was able to
+	# load the netconsole driver configuration. The console message
+	# gets more organized/sequential as well.
+	sleep 1
+}
+
+# Generate the command line argument for netconsole following:
+#  netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
+function create_cmdline_str() {
+	local BINDMODE=${1:-"ifname"}
+	if [ "${BINDMODE}" == "ifname" ]
+	then
+		SRCDEV=${SRCIF}
+	else
+		SRCDEV=$(mac_get "${SRCIF}")
+	fi
+
+	DSTMAC=$(ip netns exec "${NAMESPACE}" \
+		 ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+	SRCPORT="1514"
+	TGTPORT="6666"
+
+	echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCDEV},${TGTPORT}@${DSTIP}/${DSTMAC}\""
+}
+
+# Do not append the release to the header of the message
+function disable_release_append() {
+	echo 0 > "${NETCONS_PATH}"/enabled
+	echo 0 > "${NETCONS_PATH}"/release
+	echo 1 > "${NETCONS_PATH}"/enabled
+}
+
+function do_cleanup() {
+	local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
+
+	# Delete netdevsim devices
+	echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
+	echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
+
+	# this is coming from lib.sh
+	cleanup_all_ns
+
+	# Restoring printk configurations
+	echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
+}
+
+function cleanup_netcons() {
+	# delete netconsole dynamic reconfiguration
+	# do not fail if the target is already disabled
+	if [[ ! -d "${NETCONS_PATH}" ]]
+	then
+		# in some cases this is called before netcons path is created
+		return
+	fi
+	if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]]
+	then
+		echo 0 > "${NETCONS_PATH}"/enabled || true
+	fi
+	# Remove all the keys that got created during the selftest
+	find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
+	# Remove the configfs entry
+	rmdir "${NETCONS_PATH}"
+}
+
+function cleanup() {
+	cleanup_netcons
+	do_cleanup
+}
+
+function set_user_data() {
+	if [[ ! -d "${NETCONS_PATH}""/userdata" ]]
+	then
+		echo "Userdata path not available in ${NETCONS_PATH}/userdata"
+		exit "${ksft_skip}"
+	fi
+
+	KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}"
+	mkdir -p "${KEY_PATH}"
+	VALUE_PATH="${KEY_PATH}""/value"
+	echo "${USERDATA_VALUE}" > "${VALUE_PATH}"
+}
+
+function listen_port_and_save_to() {
+	local OUTPUT=${1}
+	local IPVERSION=${2:-"ipv4"}
+
+	if [ "${IPVERSION}" == "ipv4" ]
+	then
+		SOCAT_MODE="UDP-LISTEN"
+	else
+		SOCAT_MODE="UDP6-LISTEN"
+	fi
+
+	# Just wait for 2 seconds
+	timeout 2 ip netns exec "${NAMESPACE}" \
+		socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" 2> /dev/null
+}
+
+# Only validate that the message arrived properly
+function validate_msg() {
+	local TMPFILENAME="$1"
+
+	# Check if the file exists
+	if [ ! -f "$TMPFILENAME" ]; then
+		echo "FAIL: File was not generated." >&2
+		exit "${ksft_fail}"
+	fi
+
+	if ! grep -q "${MSG}" "${TMPFILENAME}"; then
+		echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2
+		cat "${TMPFILENAME}" >&2
+		exit "${ksft_fail}"
+	fi
+}
+
+# Validate the message and userdata
+function validate_result() {
+	local TMPFILENAME="$1"
+
+	# TMPFILENAME will contain something like:
+	# 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+	#  key=value
+
+	validate_msg "${TMPFILENAME}"
+
+	# userdata is not supported on basic format target,
+	# thus, do not validate it.
+	if [ "${FORMAT}" != "basic" ];
+	then
+		if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
+			echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
+			cat "${TMPFILENAME}" >&2
+			exit "${ksft_fail}"
+		fi
+	fi
+
+	# Delete the file once it is validated, otherwise keep it
+	# for debugging purposes
+	rm "${TMPFILENAME}"
+}
+
+function check_for_dependencies() {
+	if [ "$(id -u)" -ne 0 ]; then
+		echo "This test must be run as root" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ! which socat > /dev/null ; then
+		echo "SKIP: socat(1) is not available" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ! which ip > /dev/null ; then
+		echo "SKIP: ip(1) is not available" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ! which udevadm > /dev/null ; then
+		echo "SKIP: udevadm(1) is not available" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if [ ! -f /proc/net/if_inet6 ]; then
+		echo "SKIP: IPv6 not configured. Check if CONFIG_IPV6 is enabled" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then
+		echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if [ ! -d "${NETCONS_CONFIGFS}" ]; then
+		echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ip link show "${DSTIF}" 2> /dev/null; then
+		echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2
+		exit "${ksft_skip}"
+	fi
+
+	REGEXP4="inet.*(${SRCIP4}|${DSTIP4})"
+	REGEXP6="inet.*(${SRCIP6}|${DSTIP6})"
+	if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then
+		echo "SKIP: IPv4s already in use. Skipping it" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then
+		echo "SKIP: IPv6s already in use. Skipping it" >&2
+		exit "${ksft_skip}"
+	fi
+}
+
+function check_for_taskset() {
+	if ! which taskset > /dev/null ; then
+		echo "SKIP: taskset(1) is not available" >&2
+		exit "${ksft_skip}"
+	fi
+}
+
+# This is necessary if running multiple tests in a row
+function pkill_socat() {
+	PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
+	PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}"
+	# socat runs under timeout(1), kill it if it is still alive
+	# do not fail if socat doesn't exist anymore
+	set +e
+	pkill -f "${PROCESS_NAME4}"
+	pkill -f "${PROCESS_NAME6}"
+	set -e
+}
+
+# Check if netconsole was compiled as a module, otherwise exit
+function check_netconsole_module() {
+	if modinfo netconsole | grep filename: | grep -q builtin
+	then
+		echo "SKIP: netconsole should be compiled as a module" >&2
+		exit "${ksft_skip}"
+	fi
+}
+
+# A wrapper to translate protocol version to udp version
+function wait_for_port() {
+	local NAMESPACE=${1}
+	local PORT=${2}
+	IP_VERSION=${3}
+
+	if [ "${IP_VERSION}" == "ipv6" ]
+	then
+		PROTOCOL="udp6"
+	else
+		PROTOCOL="udp"
+	fi
+
+	wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}"
+	# even after the port is open, let's wait 1 second before writing
+	# otherwise the packet could be missed, and the test will fail. Happens
+	# more frequently on IPv6
+	sleep 1
+}
+
+# Clean up netdevsim ifaces created for bonding test
+function cleanup_bond_nsim() {
+	ip -n "${TXNS}" \
+		link delete "${BOND_TX_MAIN_IF}" type bond || true
+	ip -n "${RXNS}" \
+		link delete "${BOND_RX_MAIN_IF}" type bond || true
+
+	cleanup_netdevsim "$NSIM_BOND_TX_1"
+	cleanup_netdevsim "$NSIM_BOND_TX_2"
+	cleanup_netdevsim "$NSIM_BOND_RX_1"
+	cleanup_netdevsim "$NSIM_BOND_RX_2"
+}
+
+# cleanup tests that use bonding interfaces
+function cleanup_bond() {
+	cleanup_netcons
+	cleanup_bond_nsim
+	cleanup_all_ns
+	ip link delete "${VETH0}" || true
+}
diff --git a/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh b/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh
new file mode 100755
index 000000000000..82be5d013330
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh
@@ -0,0 +1,668 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+
+# The script is adopted to work with the Microchip KSZ switch driver.
+
+ETH_FCS_LEN=4
+
+WAIT_TIME=1
+NUM_NETIFS=4
+REQUIRE_JQ="yes"
+REQUIRE_MZ="yes"
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command dcb
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# On h1_ and h2_create do not set IP addresses to avoid interaction with the
+# system, to keep packet counters clean.
+h1_create()
+{
+	simple_if_init $h1
+	sysctl_set net.ipv6.conf.${h1}.disable_ipv6 1
+	# Get the MAC address of the interface to use it with mausezahn
+	h1_mac=$(ip -j link show dev ${h1} | jq -e '.[].address')
+}
+
+h1_destroy()
+{
+	sysctl_restore net.ipv6.conf.${h1}.disable_ipv6
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	sysctl_set net.ipv6.conf.${h2}.disable_ipv6 1
+	h2_mac=$(ip -j link show dev ${h2} | jq -e '.[].address')
+}
+
+h2_destroy()
+{
+	sysctl_restore net.ipv6.conf.${h2}.disable_ipv6
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	ip link set ${swp1} up
+	ip link set ${swp2} up
+	sysctl_set net.ipv6.conf.${swp1}.disable_ipv6 1
+	sysctl_set net.ipv6.conf.${swp2}.disable_ipv6 1
+
+	# Ports should trust VLAN PCP even with vlan_filtering=0
+	ip link add br0 type bridge
+	ip link set ${swp1} master br0
+	ip link set ${swp2} master br0
+	ip link set br0 up
+	sysctl_set net.ipv6.conf.br0.disable_ipv6 1
+}
+
+switch_destroy()
+{
+	sysctl_restore net.ipv6.conf.${swp2}.disable_ipv6
+	sysctl_restore net.ipv6.conf.${swp1}.disable_ipv6
+
+	ip link del br0
+}
+
+setup_prepare()
+{
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+	switch_destroy
+
+	vrf_cleanup
+}
+
+set_apptrust_order()
+{
+	local if_name=$1
+	local order=$2
+
+	dcb apptrust set dev ${if_name} order ${order}
+}
+
+# Function to extract a specified field from a given JSON stats string
+extract_network_stat() {
+	local stats_json=$1
+	local field_name=$2
+
+	echo $(echo "$stats_json" | jq -r "$field_name")
+}
+
+run_test()
+{
+	local test_name=$1;
+	local apptrust_order=$2;
+	local port_prio=$3;
+	local dscp_ipv=$4;
+	local dscp=$5;
+	local have_vlan=$6;
+	local pcp_ipv=$7;
+	local vlan_pcp=$8;
+	local ip_v6=$9
+
+	local rx_ipv
+	local tx_ipv
+
+	RET=0
+
+	# Send some packet to populate the switch MAC table
+	$MZ ${h2} -a ${h2_mac} -b ${h1_mac} -p 64 -t icmp echores -c 1
+
+	# Based on the apptrust order, set the expected Internal Priority values
+	# for the RX and TX paths.
+	if [ "${apptrust_order}" == "" ]; then
+		echo "Apptrust order not set."
+		rx_ipv=${port_prio}
+		tx_ipv=${port_prio}
+	elif [ "${apptrust_order}" == "dscp" ]; then
+		echo "Apptrust order is DSCP."
+		rx_ipv=${dscp_ipv}
+		tx_ipv=${dscp_ipv}
+	elif [ "${apptrust_order}" == "pcp" ]; then
+		echo "Apptrust order is PCP."
+		rx_ipv=${pcp_ipv}
+		tx_ipv=${pcp_ipv}
+	elif [ "${apptrust_order}" == "pcp dscp" ]; then
+		echo "Apptrust order is PCP DSCP."
+		if [ ${have_vlan} -eq 1 ]; then
+			rx_ipv=$((dscp_ipv > pcp_ipv ? dscp_ipv : pcp_ipv))
+			tx_ipv=${pcp_ipv}
+		else
+			rx_ipv=${dscp_ipv}
+			tx_ipv=${dscp_ipv}
+		fi
+	else
+		RET=1
+		echo "Error: Unknown apptrust order ${apptrust_order}"
+		log_test "${test_name}"
+		return
+	fi
+
+	# Most/all? of the KSZ switches do not provide per-TC counters. There
+	# are only tx_hi and rx_hi counters, which are used to count packets
+	# which are considered as high priority and most likely not assigned
+	# to the queue 0.
+	# On the ingress path, packets seem to get high priority status
+	# independently of the DSCP or PCP global mapping. On the egress path,
+	# the high priority status is assigned based on the DSCP or PCP global
+	# map configuration.
+	# The thresholds for the high priority status are not documented, but
+	# it seems that the switch considers packets as high priority on the
+	# ingress path if detected Internal Priority is greater than 0. On the
+	# egress path, the switch considers packets as high priority if
+	# detected Internal Priority is greater than 1.
+	if [ ${rx_ipv} -ge 1 ]; then
+		local expect_rx_high_prio=1
+	else
+		local expect_rx_high_prio=0
+	fi
+
+	if [ ${tx_ipv} -ge 2 ]; then
+		local expect_tx_high_prio=1
+	else
+		local expect_tx_high_prio=0
+	fi
+
+	# Use ip tool to get the current switch packet counters. ethool stats
+	# need to be recalculated to get the correct values.
+	local swp1_stats=$(ip -s -j link show dev ${swp1})
+	local swp2_stats=$(ip -s -j link show dev ${swp2})
+	local swp1_rx_packets_before=$(extract_network_stat "$swp1_stats" \
+				       '.[0].stats64.rx.packets')
+	local swp1_rx_bytes_before=$(extract_network_stat "$swp1_stats" \
+				     '.[0].stats64.rx.bytes')
+	local swp2_tx_packets_before=$(extract_network_stat "$swp2_stats" \
+				       '.[0].stats64.tx.packets')
+	local swp2_tx_bytes_before=$(extract_network_stat "$swp2_stats" \
+				     '.[0].stats64.tx.bytes')
+	local swp1_rx_hi_before=$(ethtool_stats_get ${swp1} "rx_hi")
+	local swp2_tx_hi_before=$(ethtool_stats_get ${swp2} "tx_hi")
+
+	# Assamble the mausezahn command based on the test parameters
+	# For the testis with ipv4 or ipv6, use icmp response packets,
+	# to avoid interaction with the system, to keep packet counters
+	# clean.
+	if [ ${ip_v6} -eq 0 ]; then
+		local ip="-a ${h1_mac} -b ${h2_mac} -A ${H1_IPV4} \
+			  -B ${H2_IPV4} -t icmp unreach,code=1,dscp=${dscp}"
+	else
+		local ip="-6 -a ${h1_mac} -b ${h2_mac} -A ${H1_IPV6} \
+			  -B ${H2_IPV6} -t icmp6 type=1,code=0,dscp=${dscp}"
+	fi
+
+	if [ ${have_vlan} -eq 1 ]; then
+		local vlan_pcp_opt="-Q ${vlan_pcp}:0"
+	else
+		local vlan_pcp_opt=""
+	fi
+	$MZ ${h1} ${ip} -c ${PING_COUNT} -d 10msec ${vlan_pcp_opt}
+
+	# Wait until the switch packet counters are updated
+	sleep 6
+
+	local swp1_stats=$(ip -s -j link show dev ${swp1})
+	local swp2_stats=$(ip -s -j link show dev ${swp2})
+
+	local swp1_rx_packets_after=$(extract_network_stat "$swp1_stats" \
+				      '.[0].stats64.rx.packets')
+	local swp1_rx_bytes_after=$(extract_network_stat "$swp1_stats" \
+				    '.[0].stats64.rx.bytes')
+	local swp2_tx_packets_after=$(extract_network_stat "$swp2_stats" \
+				      '.[0].stats64.tx.packets')
+	local swp2_tx_bytes_after=$(extract_network_stat "$swp2_stats" \
+				    '.[0].stats64.tx.bytes')
+
+	local swp1_rx_packets_diff=$((${swp1_rx_packets_after} - \
+				      ${swp1_rx_packets_before}))
+	local swp2_tx_packets_diff=$((${swp2_tx_packets_after} - \
+				      ${swp2_tx_packets_before}))
+
+	local swp1_rx_hi_after=$(ethtool_stats_get ${swp1} "rx_hi")
+	local swp2_tx_hi_after=$(ethtool_stats_get ${swp2} "tx_hi")
+
+	# Test if any packets were received on swp1, we will rx before and after
+	if [ ${swp1_rx_packets_diff} -lt ${PING_COUNT} ]; then
+		echo "Not expected amount of received packets on ${swp1}"
+		echo "before ${swp1_rx_packets_before} after ${swp1_rx_packets_after}"
+		RET=1
+	fi
+
+	# Test if any packets were transmitted on swp2, we will tx before and after
+	if [ ${swp2_tx_packets_diff} -lt ${PING_COUNT} ]; then
+		echo "Not expected amount of transmitted packets on ${swp2}"
+		echo "before ${swp2_tx_packets_before} after ${swp2_tx_packets_after}"
+		RET=1
+	fi
+
+	# tx/rx_hi counted in bytes. So, we need to compare the difference in bytes
+	local swp1_rx_bytes_diff=$(($swp1_rx_bytes_after - $swp1_rx_bytes_before))
+	local swp2_tx_bytes_diff=$(($swp2_tx_bytes_after - $swp2_tx_bytes_before))
+	local swp1_rx_hi_diff=$(($swp1_rx_hi_after - $swp1_rx_hi_before))
+	local swp2_tx_hi_diff=$(($swp2_tx_hi_after - $swp2_tx_hi_before))
+
+	if [ ${expect_rx_high_prio} -eq 1 ]; then
+		swp1_rx_hi_diff=$((${swp1_rx_hi_diff} - \
+				   ${swp1_rx_packets_diff} * ${ETH_FCS_LEN}))
+		if [ ${swp1_rx_hi_diff} -ne ${swp1_rx_bytes_diff} ]; then
+			echo "Not expected amount of high priority packets received on ${swp1}"
+			echo "RX hi diff: ${swp1_rx_hi_diff}, expected RX bytes diff: ${swp1_rx_bytes_diff}"
+			RET=1
+		fi
+	else
+		if [ ${swp1_rx_hi_diff} -ne 0 ]; then
+			echo "Unexpected amount of high priority packets received on ${swp1}"
+			echo "RX hi diff: ${swp1_rx_hi_diff}, expected 0"
+			RET=1
+		fi
+	fi
+
+	if [ ${expect_tx_high_prio} -eq 1 ]; then
+		swp2_tx_hi_diff=$((${swp2_tx_hi_diff} - \
+				   ${swp2_tx_packets_diff} * ${ETH_FCS_LEN}))
+		if [ ${swp2_tx_hi_diff} -ne ${swp2_tx_bytes_diff} ]; then
+			echo "Not expected amount of high priority packets transmitted on ${swp2}"
+			echo "TX hi diff: ${swp2_tx_hi_diff}, expected TX bytes diff: ${swp2_tx_bytes_diff}"
+			RET=1
+		fi
+	else
+		if [ ${swp2_tx_hi_diff} -ne 0 ]; then
+			echo "Unexpected amount of high priority packets transmitted on ${swp2}"
+			echo "TX hi diff: ${swp2_tx_hi_diff}, expected 0"
+			RET=1
+		fi
+	fi
+
+	log_test "${test_name}"
+}
+
+run_test_dscp()
+{
+	# IPv4 test
+	run_test "$1" "$2" "$3" "$4" "$5" 0 0 0 0
+	# IPv6 test
+	run_test "$1" "$2" "$3" "$4" "$5" 0 0 0 1
+}
+
+run_test_dscp_pcp()
+{
+	# IPv4 test
+	run_test "$1" "$2" "$3" "$4" "$5" 1 "$6" "$7" 0
+	# IPv6 test
+	run_test "$1" "$2" "$3" "$4" "$5" 1 "$6" "$7" 1
+}
+
+port_default_prio_get()
+{
+	local if_name=$1
+	local prio
+
+	prio="$(dcb -j app show dev ${if_name} default-prio | \
+		jq '.default_prio[]')"
+	if [ -z "${prio}" ]; then
+		prio=0
+	fi
+
+	echo ${prio}
+}
+
+test_port_default()
+{
+	local orig_apptrust=$(port_get_default_apptrust ${swp1})
+	local orig_prio=$(port_default_prio_get ${swp1})
+	local apptrust_order=""
+
+	RET=0
+
+	# Make sure no other priority sources will interfere with the test
+	set_apptrust_order ${swp1} "${apptrust_order}"
+
+	for val in $(seq 0 7); do
+		dcb app replace dev ${swp1} default-prio ${val}
+		if [ $val -ne $(port_default_prio_get ${swp1}) ]; then
+			RET=1
+			break
+		fi
+
+		run_test_dscp "Port-default QoS classification, prio: ${val}" \
+			"${apptrust_order}" ${val} 0 0
+	done
+
+	set_apptrust_order ${swp1} "${orig_apptrust}"
+	if [[ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]]; then
+		RET=1
+	fi
+
+	dcb app replace dev ${swp1} default-prio ${orig_prio}
+	if [ $orig_prio -ne $(port_default_prio_get ${swp1}) ]; then
+		RET=1
+	fi
+
+	log_test "Port-default QoS classification"
+}
+
+port_get_default_apptrust()
+{
+	local if_name=$1
+
+	dcb -j apptrust show dev ${if_name} | jq -r '.order[]' | \
+		tr '\n' ' ' | xargs
+}
+
+test_port_apptrust()
+{
+	local original_dscp_prios_swp1=$(get_dscp_prios ${swp1})
+	local orig_apptrust=$(port_get_default_apptrust ${swp1})
+	local orig_port_prio=$(port_default_prio_get ${swp1})
+	local order_variants=("pcp dscp" "dscp" "pcp")
+	local apptrust_order
+	local port_prio
+	local dscp_prio
+	local pcp_prio
+	local dscp
+	local pcp
+
+	RET=0
+
+	# First, test if apptrust configuration as taken by the kernel
+	for order in "${order_variants[@]}"; do
+		set_apptrust_order ${swp1} "${order}"
+		if [[ "$order" != "$(port_get_default_apptrust ${swp1})" ]]; then
+			RET=1
+			break
+		fi
+	done
+
+	log_test "Apptrust, supported variants"
+
+	# To test if the apptrust configuration is working as expected, we need
+	# to set DSCP priorities for the switch port.
+	init_dscp_prios "${swp1}" "${original_dscp_prios_swp1}"
+
+	# Start with a simple test where all apptrust sources are disabled
+	# default port priority is 0, DSCP priority is mapped to 7.
+	# No high priority packets should be received or transmitted.
+	port_prio=0
+	dscp_prio=7
+	dscp=4
+
+	dcb app replace dev ${swp1} default-prio ${port_prio}
+	dcb app replace dev ${swp1} dscp-prio ${dscp}:${dscp_prio}
+
+	apptrust_order=""
+	set_apptrust_order ${swp1} "${apptrust_order}"
+	# Test with apptrust sources disabled, Packets should get port default
+	# priority which is 0
+	run_test_dscp "Apptrust, all disabled. DSCP-prio ${dscp}:${dscp_prio}" \
+		"${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+	apptrust_order="pcp"
+	set_apptrust_order ${swp1} "${apptrust_order}"
+	# If PCP is enabled, packets should get PCP priority, which is not
+	# set in this test (no VLAN tags are present in the packet). No high
+	# priority packets should be received or transmitted.
+	run_test_dscp "Apptrust, PCP enabled. DSCP-prio ${dscp}:${dscp_prio}" \
+		"${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+	apptrust_order="dscp"
+	set_apptrust_order ${swp1} "${apptrust_order}"
+	# If DSCP is enabled, packets should get DSCP priority which is set to 7
+	# in this test. High priority packets should be received and transmitted.
+	run_test_dscp "Apptrust, DSCP enabled. DSCP-prio ${dscp}:${dscp_prio}" \
+		"${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+	apptrust_order="pcp dscp"
+	set_apptrust_order ${swp1} "${apptrust_order}"
+	# If PCP and DSCP are enabled, PCP would have higher apptrust priority
+	# so packets should get PCP priority. But in this test VLAN PCP is not
+	# set, so it should get DSCP priority which is set to 7. High priority
+	# packets should be received and transmitted.
+	run_test_dscp "Apptrust, PCP and DSCP are enabled. DSCP-prio ${dscp}:${dscp_prio}" \
+		"${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+	# If VLAN PCP is set, it should have higher apptrust priority than DSCP
+	# so packets should get VLAN PCP priority. Send packets with VLAN PCP
+	# set to 0, DSCP set to 7. Packets should get VLAN PCP priority.
+	# No high priority packets should be transmitted. Due to nature of the
+	# switch, high priority packets will be received.
+	pcp_prio=0
+	pcp=0
+	run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+		"${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+
+	# If VLAN PCP is set to 7, it should have higher apptrust priority than
+	# DSCP so packets should get VLAN PCP priority. Send packets with VLAN
+	# PCP set to 7, DSCP set to 7. Packets should get VLAN PCP priority.
+	# High priority packets should be received and transmitted.
+	pcp_prio=7
+	pcp=7
+	run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+		"${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+	# Now make sure that the switch is able to handle the case where DSCP
+	# priority is set to 0 and PCP priority is set to 7. Packets should get
+	# PCP priority. High priority packets should be received and transmitted.
+	dscp_prio=0
+	dcb app replace dev ${swp1} dscp-prio ${dscp}:${dscp_prio}
+	run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+		"${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+	# If both VLAN PCP and DSCP are set to 0, packets should get 0 priority.
+	# No high priority packets should be received or transmitted.
+	pcp_prio=0
+	pcp=0
+	run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+		"${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+
+	# Restore original priorities
+	if ! restore_priorities "${swp1}" "${original_dscp_prios_swp1}"; then
+		RET=1
+	fi
+
+	set_apptrust_order ${swp1} "${orig_apptrust}"
+	if [ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]; then
+		RET=1
+	fi
+
+	dcb app replace dev ${swp1} default-prio ${orig_port_prio}
+	if [ $orig_port_prio -ne $(port_default_prio_get ${swp1}) ]; then
+		RET=1
+	fi
+
+	log_test "Apptrust, restore original settings"
+}
+
+# Function to get current DSCP priorities
+get_dscp_prios() {
+	local if_name=$1
+	dcb -j app show dev ${if_name} | jq -c '.dscp_prio'
+}
+
+# Function to set a specific DSCP priority on a device
+replace_dscp_prio() {
+	local if_name=$1
+	local dscp=$2
+	local prio=$3
+	dcb app replace dev ${if_name} dscp-prio ${dscp}:${prio}
+}
+
+# Function to compare DSCP maps
+compare_dscp_maps() {
+	local old_json=$1
+	local new_json=$2
+	local dscp=$3
+	local prio=$4
+
+	# Create a modified old_json with the expected change for comparison
+	local modified_old_json=$(echo "$old_json" |
+		jq --argjson dscp $dscp --argjson prio $prio \
+			'map(if .[0] == $dscp then [$dscp, $prio] else . end)' |
+		tr -d " \n")
+
+	# Compare new_json with the modified_old_json
+	if [[ "$modified_old_json" == "$new_json" ]]; then
+		return 0
+	else
+		return 1
+	fi
+}
+
+# Function to set DSCP priorities
+set_and_verify_dscp() {
+	local port=$1
+	local dscp=$2
+	local new_prio=$3
+
+	local old_prios=$(get_dscp_prios $port)
+
+	replace_dscp_prio "$port" $dscp $new_prio
+
+	# Fetch current settings and compare
+	local current_prios=$(get_dscp_prios $port)
+	if ! compare_dscp_maps "$old_prios" "$current_prios" $dscp $new_prio; then
+		echo "Error: Unintended changes detected in DSCP map for $port after setting DSCP $dscp to $new_prio."
+		return 1
+	fi
+	return 0
+}
+
+# Function to restore original priorities
+restore_priorities() {
+	local port=$1
+	local original_prios=$2
+
+	echo "Removing test artifacts for $port"
+	local current_prios=$(get_dscp_prios $port)
+	local prio_str=$(echo "$current_prios" |
+		jq -r 'map("\(.[0]):\(.[1])") | join(" ")')
+	dcb app del dev $port dscp-prio $prio_str
+
+	echo "Restoring original DSCP priorities for $port"
+	local restore_str=$(echo "$original_prios" |
+		jq -r 'map("\(.[0]):\(.[1])") | join(" ")')
+	dcb app add dev $port dscp-prio $restore_str
+
+	local current_prios=$(get_dscp_prios $port)
+	if [[ "$original_prios" != "$current_prios" ]]; then
+		echo "Error: Failed to restore original DSCP priorities for $port"
+		return 1
+	fi
+	return 0
+}
+
+# Initialize DSCP priorities. Set them to predictable values for testing.
+init_dscp_prios() {
+	local port=$1
+	local original_prios=$2
+
+	echo "Removing any existing DSCP priority mappins for $port"
+	local prio_str=$(echo "$original_prios" |
+		jq -r 'map("\(.[0]):\(.[1])") | join(" ")')
+	dcb app del dev $port dscp-prio $prio_str
+
+	# Initialize DSCP priorities list
+	local dscp_prios=""
+	for dscp in {0..63}; do
+		dscp_prios+=("$dscp:0")
+	done
+
+	echo "Setting initial DSCP priorities map to 0 for $port"
+	dcb app add dev $port dscp-prio ${dscp_prios[@]}
+}
+
+# Main function to test global DSCP map across specified ports
+test_global_dscp_map() {
+	local ports=("$swp1" "$swp2")
+	local original_dscp_prios_port0=$(get_dscp_prios ${ports[0]})
+	local orig_apptrust=$(port_get_default_apptrust ${swp1})
+	local orig_port_prio=$(port_default_prio_get ${swp1})
+	local apptrust_order="dscp"
+	local port_prio=0
+	local dscp_prio
+	local dscp
+
+	RET=0
+
+	set_apptrust_order ${swp1} "${apptrust_order}"
+	dcb app replace dev ${swp1} default-prio ${port_prio}
+
+	# Initialize DSCP priorities
+	init_dscp_prios "${ports[0]}" "$original_dscp_prios_port0"
+
+	# Loop over each DSCP index
+	for dscp in {0..63}; do
+		# and test each Internal Priority value
+		for dscp_prio in {0..7}; do
+			# do it for each port. This is to test if the global DSCP map
+			# is accessible from all ports.
+			for port in "${ports[@]}"; do
+				if ! set_and_verify_dscp "$port" $dscp $dscp_prio; then
+					RET=1
+				fi
+			done
+
+			# Test if the DSCP priority is correctly applied to the packets
+			run_test_dscp "DSCP (${dscp}) QoS classification, prio: ${dscp_prio}" \
+				"${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+			if [ ${RET} -eq 1 ]; then
+				break
+			fi
+		done
+	done
+
+	# Restore original priorities
+	if ! restore_priorities "${ports[0]}" "${original_dscp_prios_port0}"; then
+		RET=1
+	fi
+
+	set_apptrust_order ${swp1} "${orig_apptrust}"
+	if [[ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]]; then
+		RET=1
+	fi
+
+	dcb app replace dev ${swp1} default-prio ${orig_port_prio}
+	if [ $orig_port_prio -ne $(port_default_prio_get ${swp1}) ]; then
+		RET=1
+	fi
+
+	log_test "DSCP global map"
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+	test_port_default
+	test_port_apptrust
+	test_global_dscp_map
+"
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
new file mode 100755
index 000000000000..bdffe698e1d1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that blackhole routes are marked as offloaded and that packets hitting
+# them are dropped by the ASIC and not by the kernel.
+#
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    | 2001:db8:1::1/64           |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# |    |  default via 2001:db8:1::2 |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |        2001:db8:1::2/64                                                   |
+# |                                                                           |
+# |        2001:db8:2::2/64                                                   |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |  default via 2001:db8:2::2 |
+# |    |                            |
+# |    | 2001:db8:2::1/64           |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	blackhole_ipv4
+	blackhole_ipv6
+"
+NUM_NETIFS=4
+: ${TIMEOUT:=20000} # ms
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	tc qdisc add dev $rp1 clsact
+
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	__addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	__addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+	__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+	tc qdisc del dev $rp1 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.1 ": h1->h2"
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::1 ": h1->h2"
+}
+
+blackhole_ipv4()
+{
+	# Transmit packets from H1 to H2 and make sure they are dropped by the
+	# ASIC and not by the kernel
+	RET=0
+
+	ip -4 route add blackhole 198.51.100.0/30
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_hw dst_ip 198.51.100.1 src_ip 192.0.2.1 ip_proto icmp \
+		action pass
+
+	busywait "$TIMEOUT" wait_for_offload ip -4 route show 198.51.100.0/30
+	check_err $? "route not marked as offloaded when should"
+
+	ping_do $h1 198.51.100.1
+	check_fail $? "ping passed when should not"
+
+	tc_check_packets "dev $rp1 ingress" 101 0
+	check_err $? "packets trapped and not dropped by ASIC"
+
+	log_test "IPv4 blackhole route"
+
+	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+	ip -4 route del blackhole 198.51.100.0/30
+}
+
+blackhole_ipv6()
+{
+	RET=0
+
+	ip -6 route add blackhole 2001:db8:2::/120
+	tc filter add dev $rp1 ingress protocol ipv6 pref 1 handle 101 flower \
+		skip_hw dst_ip 2001:db8:2::1 src_ip 2001:db8:1::1 \
+		ip_proto icmpv6 action pass
+
+	busywait "$TIMEOUT" wait_for_offload ip -6 route show 2001:db8:2::/120
+	check_err $? "route not marked as offloaded when should"
+
+	ping6_do $h1 2001:db8:2::1
+	check_fail $? "ping passed when should not"
+
+	tc_check_packets "dev $rp1 ingress" 101 0
+	check_err $? "packets trapped and not dropped by ASIC"
+
+	log_test "IPv6 blackhole route"
+
+	tc filter del dev $rp1 ingress protocol ipv6 pref 1 handle 101 flower
+	ip -6 route del blackhole 2001:db8:2::/120
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
new file mode 100755
index 000000000000..224ca3695c89
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
@@ -0,0 +1,334 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# In addition to the common variables, user might use:
+# LC_SLOT - If not set, all probed line cards are going to be tested,
+#	    with an exception of the "activation_16x100G_test".
+#	    It set, only the selected line card is going to be used
+#	    for tests, including "activation_16x100G_test".
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	unprovision_test
+	provision_test
+	activation_16x100G_test
+"
+
+NUM_NETIFS=0
+
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+until_lc_state_is()
+{
+	local state=$1; shift
+	local current=$("$@")
+
+	echo "$current"
+	[ "$current" == "$state" ]
+}
+
+until_lc_state_is_not()
+{
+	! until_lc_state_is "$@"
+}
+
+lc_state_get()
+{
+	local lc=$1
+
+	devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].state"
+}
+
+lc_wait_until_state_changes()
+{
+	local lc=$1
+	local state=$2
+	local timeout=$3 # ms
+
+	busywait "$timeout" until_lc_state_is_not "$state" lc_state_get "$lc"
+}
+
+lc_wait_until_state_becomes()
+{
+	local lc=$1
+	local state=$2
+	local timeout=$3 # ms
+
+	busywait "$timeout" until_lc_state_is "$state" lc_state_get "$lc"
+}
+
+until_lc_port_count_is()
+{
+	local port_count=$1; shift
+	local current=$("$@")
+
+	echo "$current"
+	[ $current == $port_count ]
+}
+
+lc_port_count_get()
+{
+	local lc=$1
+
+	devlink port -j | jq -e -r ".[][] | select(.lc==$lc) | .port" | wc -l
+}
+
+lc_wait_until_port_count_is()
+{
+	local lc=$1
+	local port_count=$2
+	local timeout=$3 # ms
+
+	busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc"
+}
+
+lc_nested_devlink_dev_get()
+{
+	local lc=$1
+
+	devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].nested_devlink"
+}
+
+PROV_UNPROV_TIMEOUT=8000 # ms
+POST_PROV_ACT_TIMEOUT=2000 # ms
+PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms
+
+unprovision_one()
+{
+	local lc=$1
+	local state
+
+	state=$(lc_state_get $lc)
+	check_err $? "Failed to get state of linecard $lc"
+	if [[ "$state" == "unprovisioned" ]]; then
+		return
+	fi
+
+	log_info "Unprovisioning linecard $lc"
+
+	devlink lc set $DEVLINK_DEV lc $lc notype
+	check_err $? "Failed to trigger linecard $lc unprovisioning"
+
+	state=$(lc_wait_until_state_changes $lc "unprovisioning" \
+		$PROV_UNPROV_TIMEOUT)
+	check_err $? "Failed to unprovision linecard $lc (timeout)"
+
+	[ "$state" == "unprovisioned" ]
+	check_err $? "Failed to unprovision linecard $lc (state=$state)"
+}
+
+provision_one()
+{
+	local lc=$1
+	local type=$2
+	local state
+
+	log_info "Provisioning linecard $lc"
+
+	devlink lc set $DEVLINK_DEV lc $lc type $type
+	check_err $? "Failed trigger linecard $lc provisioning"
+
+	state=$(lc_wait_until_state_changes $lc "provisioning" \
+		$PROV_UNPROV_TIMEOUT)
+	check_err $? "Failed to provision linecard $lc (timeout)"
+
+	[ "$state" == "provisioned" ] || [ "$state" == "active" ]
+	check_err $? "Failed to provision linecard $lc (state=$state)"
+
+	provisioned_type=$(devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].type")
+	[ "$provisioned_type" == "$type" ]
+	check_err $? "Wrong provision type returned for linecard $lc (got \"$provisioned_type\", expected \"$type\")"
+
+	# Wait for possible activation to make sure the state
+	# won't change after return from this function.
+	state=$(lc_wait_until_state_becomes $lc "active" \
+		$POST_PROV_ACT_TIMEOUT)
+}
+
+unprovision_test()
+{
+	RET=0
+	local lc
+
+	lc=$LC_SLOT
+	unprovision_one $lc
+	log_test "Unprovision"
+}
+
+LC_16X100G_TYPE="16x100G"
+LC_16X100G_PORT_COUNT=16
+
+supported_types_check()
+{
+	local lc=$1
+	local supported_types_count
+	local type_index
+	local lc_16x100_found=false
+
+	supported_types_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+				jq -e -r ".[][][].supported_types | length")
+	[ $supported_types_count != 0 ]
+	check_err $? "No supported types found for linecard $lc"
+	for (( type_index=0; type_index<$supported_types_count; type_index++ ))
+	do
+		type=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+		       jq -e -r ".[][][].supported_types[$type_index]")
+		if [[ "$type" == "$LC_16X100G_TYPE" ]]; then
+			lc_16x100_found=true
+			break
+		fi
+	done
+	[ $lc_16x100_found = true ]
+	check_err $? "16X100G not found between supported types of linecard $lc"
+}
+
+ports_check()
+{
+	local lc=$1
+	local expected_port_count=$2
+	local port_count
+
+	port_count=$(lc_wait_until_port_count_is $lc $expected_port_count \
+		$PROV_PORTS_INSTANTIATION_TIMEOUT)
+	[ $port_count != 0 ]
+	check_err $? "No port associated with linecard $lc"
+	[ $port_count == $expected_port_count ]
+	check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)"
+}
+
+lc_dev_info_provisioned_check()
+{
+	local lc=$1
+	local nested_devlink_dev=$2
+	local fixed_hw_revision
+	local running_ini_version
+
+	fixed_hw_revision=$(devlink dev info $nested_devlink_dev -j | \
+			    jq -e -r '.[][].versions.fixed."hw.revision"')
+	check_err $? "Failed to get linecard $lc fixed.hw.revision"
+	log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\""
+	running_ini_version=$(devlink dev info $nested_devlink_dev -j | \
+			      jq -e -r '.[][].versions.running."ini.version"')
+	check_err $? "Failed to get linecard $lc running.ini.version"
+	log_info "Linecard $lc running.ini.version: \"$running_ini_version\""
+}
+
+provision_test()
+{
+	RET=0
+	local lc
+	local type
+	local state
+	local nested_devlink_dev
+
+	lc=$LC_SLOT
+	supported_types_check $lc
+	state=$(lc_state_get $lc)
+	check_err $? "Failed to get state of linecard $lc"
+	if [[ "$state" != "unprovisioned" ]]; then
+		unprovision_one $lc
+	fi
+	provision_one $lc $LC_16X100G_TYPE
+	ports_check $lc $LC_16X100G_PORT_COUNT
+
+	nested_devlink_dev=$(lc_nested_devlink_dev_get $lc)
+	check_err $? "Failed to get nested devlink handle of linecard $lc"
+	lc_dev_info_provisioned_check $lc $nested_devlink_dev
+
+	log_test "Provision"
+}
+
+ACTIVATION_TIMEOUT=20000 # ms
+
+interface_check()
+{
+	ip link set $h1 up
+	ip link set $h2 up
+	ifaces_upped=true
+	setup_wait
+}
+
+lc_dev_info_active_check()
+{
+	local lc=$1
+	local nested_devlink_dev=$2
+	local fixed_device_fw_psid
+	local running_device_fw
+
+	fixed_device_fw_psid=$(devlink dev info $nested_devlink_dev -j | \
+			       jq -e -r ".[][].versions.fixed" | \
+			       jq -e -r '."fw.psid"')
+	check_err $? "Failed to get linecard $lc fixed fw PSID"
+	log_info "Linecard $lc fixed.fw.psid: \"$fixed_device_fw_psid\""
+
+	running_device_fw=$(devlink dev info $nested_devlink_dev -j | \
+			    jq -e -r ".[][].versions.running.fw")
+	check_err $? "Failed to get linecard $lc running.fw.version"
+	log_info "Linecard $lc running.fw: \"$running_device_fw\""
+}
+
+activation_16x100G_test()
+{
+	RET=0
+	local lc
+	local type
+	local state
+	local nested_devlink_dev
+
+	lc=$LC_SLOT
+	type=$LC_16X100G_TYPE
+
+	unprovision_one $lc
+	provision_one $lc $type
+	state=$(lc_wait_until_state_becomes $lc "active" \
+		$ACTIVATION_TIMEOUT)
+	check_err $? "Failed to get linecard $lc activated (timeout)"
+
+	interface_check
+
+	nested_devlink_dev=$(lc_nested_devlink_dev_get $lc)
+	check_err $? "Failed to get nested devlink handle of linecard $lc"
+	lc_dev_info_active_check $lc $nested_devlink_dev
+
+	log_test "Activation 16x100G"
+}
+
+setup_prepare()
+{
+	local lc_num=$(devlink lc show -j | jq -e -r ".[][\"$DEVLINK_DEV\"] |length")
+	if [[ $? -ne 0 ]] || [[ $lc_num -eq 0 ]]; then
+		echo "SKIP: No linecard support found"
+		exit $ksft_skip
+	fi
+
+	if [ -z "$LC_SLOT" ]; then
+		echo "SKIP: \"LC_SLOT\" variable not provided"
+		exit $ksft_skip
+	fi
+
+	# Interfaces are not present during the script start,
+	# that's why we define NUM_NETIFS here so dummy
+	# implicit veth pairs are not created.
+	NUM_NETIFS=2
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	ifaces_upped=false
+}
+
+cleanup()
+{
+	if [ "$ifaces_upped" = true ] ; then
+		ip link set $h1 down
+		ip link set $h2 down
+	fi
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh
new file mode 100755
index 000000000000..36055279ba92
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test generic devlink-trap functionality over mlxsw. These tests are not
+# specific to a single trap, but do not check the devlink-trap common
+# infrastructure either.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	dev_del_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+dev_del_test()
+{
+	local trap_name="source_mac_is_multicast"
+	local smac=01:02:03:04:05:06
+	local num_iter=5
+	local mz_pid
+	local i
+
+	$MZ $h1 -c 0 -p 100 -a $smac -b bcast -t ip -q &
+	mz_pid=$!
+
+	# The purpose of this test is to make sure we correctly dismantle a
+	# port while packets are trapped from it. This is done by reloading the
+	# the driver while the 'ingress_smac_mc_drop' trap is triggered.
+	RET=0
+
+	for i in $(seq 1 $num_iter); do
+		log_info "Iteration $i / $num_iter"
+
+		devlink_trap_action_set $trap_name "trap"
+		sleep 1
+
+		devlink_reload
+		# Allow netdevices to be re-created following the reload
+		sleep 20
+
+		cleanup
+		setup_prepare
+		setup_wait
+	done
+
+	log_test "Device delete"
+
+	kill_process $mz_pid
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
new file mode 100755
index 000000000000..b32ba5fec59d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap ACL drops functionality over mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ingress_flow_action_drop_test
+	egress_flow_action_drop_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ingress_flow_action_drop_test()
+{
+	local mz_pid
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower src_mac $h1mac action pass
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+		flower dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -d 1msec -q &
+	mz_pid=$!
+
+	RET=0
+
+	devlink_trap_drop_test ingress_flow_action_drop $swp2 101
+
+	log_test "ingress_flow_action_drop"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+}
+
+egress_flow_action_drop_test()
+{
+	local mz_pid
+
+	tc filter add dev $swp2 egress protocol ip pref 2 handle 102 \
+		flower src_mac $h1mac action pass
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -d 1msec -q &
+	mz_pid=$!
+
+	RET=0
+
+	devlink_trap_drop_test egress_flow_action_drop $swp2 102
+
+	log_test "egress_flow_action_drop"
+
+	tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 2 102
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
new file mode 100755
index 000000000000..64153bbf95df
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
@@ -0,0 +1,709 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap control trap functionality over mlxsw. Each registered
+# control packet trap is tested to make sure it is triggered under the right
+# conditions.
+#
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    | 2001:db8:1::1/64           |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# |    |  default via 2001:db8:1::2 |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |        2001:db8:1::2/64                                                   |
+# |                                                                           |
+# |        2001:db8:2::2/64                                                   |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |  default via 2001:db8:2::2 |
+# |    |                            |
+# |    | 2001:db8:2::1/64           |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	stp_test
+	lacp_test
+	lldp_test
+	igmp_query_test
+	igmp_v1_report_test
+	igmp_v2_report_test
+	igmp_v3_report_test
+	igmp_v2_leave_test
+	mld_query_test
+	mld_v1_report_test
+	mld_v2_report_test
+	mld_v1_done_test
+	ipv4_dhcp_test
+	ipv6_dhcp_test
+	arp_request_test
+	arp_response_test
+	ipv6_neigh_solicit_test
+	ipv6_neigh_advert_test
+	ipv4_bfd_test
+	ipv6_bfd_test
+	ipv4_ospf_test
+	ipv6_ospf_test
+	ipv4_bgp_test
+	ipv6_bgp_test
+	ipv4_vrrp_test
+	ipv6_vrrp_test
+	ipv4_pim_test
+	ipv6_pim_test
+	uc_loopback_test
+	local_route_test
+	external_route_test
+	ipv6_uc_dip_link_local_scope_test
+	ipv4_router_alert_test
+	ipv6_router_alert_test
+	ipv6_dip_all_nodes_test
+	ipv6_dip_all_routers_test
+	ipv6_router_solicit_test
+	ipv6_router_advert_test
+	ipv6_redirect_test
+	ptp_event_test
+	ptp_general_test
+	flow_action_sample_test
+	flow_action_trap_test
+	eapol_test
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	__addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	__addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+	__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+stp_test()
+{
+	devlink_trap_stats_test "STP" "stp" $MZ $h1 -c 1 -t bpdu -q
+}
+
+lacp_payload_get()
+{
+	local source_mac=$1; shift
+	local p
+
+	p=$(:
+		)"01:80:C2:00:00:02:"$(       : ETH daddr
+		)"$source_mac:"$(             : ETH saddr
+		)"88:09:"$(                   : ETH type
+		)
+	echo $p
+}
+
+lacp_test()
+{
+	local h1mac=$(mac_get $h1)
+
+	devlink_trap_stats_test "LACP" "lacp" $MZ $h1 -c 1 \
+		$(lacp_payload_get $h1mac) -p 100 -q
+}
+
+lldp_payload_get()
+{
+	local source_mac=$1; shift
+	local p
+
+	p=$(:
+		)"01:80:C2:00:00:0E:"$(       : ETH daddr
+		)"$source_mac:"$(             : ETH saddr
+		)"88:CC:"$(                   : ETH type
+		)
+	echo $p
+}
+
+lldp_test()
+{
+	local h1mac=$(mac_get $h1)
+
+	devlink_trap_stats_test "LLDP" "lldp" $MZ $h1 -c 1 \
+		$(lldp_payload_get $h1mac) -p 100 -q
+}
+
+igmp_query_test()
+{
+	# IGMP (IP Protocol 2) Membership Query (Type 0x11)
+	devlink_trap_stats_test "IGMP Membership Query" "igmp_query" \
+		$MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 224.0.0.1 -t ip proto=2,p=11 -p 100 -q
+}
+
+igmp_v1_report_test()
+{
+	# IGMP (IP Protocol 2) Version 1 Membership Report (Type 0x12)
+	devlink_trap_stats_test "IGMP Version 1 Membership Report" \
+		"igmp_v1_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=12 -p 100 -q
+}
+
+igmp_v2_report_test()
+{
+	# IGMP (IP Protocol 2) Version 2 Membership Report (Type 0x16)
+	devlink_trap_stats_test "IGMP Version 2 Membership Report" \
+		"igmp_v2_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=16 -p 100 -q
+}
+
+igmp_v3_report_test()
+{
+	# IGMP (IP Protocol 2) Version 3 Membership Report (Type 0x22)
+	devlink_trap_stats_test "IGMP Version 3 Membership Report" \
+		"igmp_v3_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=22 -p 100 -q
+}
+
+igmp_v2_leave_test()
+{
+	# IGMP (IP Protocol 2) Version 2 Leave Group (Type 0x17)
+	devlink_trap_stats_test "IGMP Version 2 Leave Group" \
+		"igmp_v2_leave" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:02 \
+		-A 192.0.2.1 -B 224.0.0.2 -t ip proto=2,p=17 -p 100 -q
+}
+
+mld_payload_get()
+{
+	local type=$1; shift
+	local p
+
+	type=$(printf "%x" $type)
+	p=$(:
+		)"3A:"$(			: Next Header - ICMPv6
+		)"00:"$(			: Hdr Ext Len
+		)"00:00:00:00:00:00:"$(		: Options and Padding
+		)"$type:"$(			: ICMPv6.type
+		)"00:"$(			: ICMPv6.code
+		)"00:"$(			: ICMPv6.checksum
+		)
+	echo $p
+}
+
+mld_query_test()
+{
+	# MLD Multicast Listener Query (Type 130)
+	devlink_trap_stats_test "MLD Multicast Listener Query" "mld_query" \
+		$MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 130) -p 100 -q
+}
+
+mld_v1_report_test()
+{
+	# MLD Version 1 Multicast Listener Report (Type 131)
+	devlink_trap_stats_test "MLD Version 1 Multicast Listener Report" \
+		"mld_v1_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 131) -p 100 -q
+}
+
+mld_v2_report_test()
+{
+	# MLD Version 2 Multicast Listener Report (Type 143)
+	devlink_trap_stats_test "MLD Version 2 Multicast Listener Report" \
+		"mld_v2_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 143) -p 100 -q
+}
+
+mld_v1_done_test()
+{
+	# MLD Version 1 Multicast Listener Done (Type 132)
+	devlink_trap_stats_test "MLD Version 1 Multicast Listener Done" \
+		"mld_v1_done" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 132) -p 100 -q
+}
+
+ipv4_dhcp_test()
+{
+	devlink_trap_stats_test "IPv4 DHCP Port 67" "ipv4_dhcp" \
+		$MZ $h1 -c 1 -a own -b bcast -A 0.0.0.0 -B 255.255.255.255 \
+		-t udp sp=68,dp=67 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 DHCP Port 68" "ipv4_dhcp" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) -A 192.0.2.1 \
+		-B 255.255.255.255 -t udp sp=67,dp=68 -p 100 -q
+}
+
+ipv6_dhcp_test()
+{
+	devlink_trap_stats_test "IPv6 DHCP Port 547" "ipv6_dhcp" \
+		$MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=546,dp=547 \
+		-p 100 -q
+
+	devlink_trap_stats_test "IPv6 DHCP Port 546" "ipv6_dhcp" \
+		$MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=547,dp=546 \
+		-p 100 -q
+}
+
+arp_request_test()
+{
+	devlink_trap_stats_test "ARP Request" "arp_request" \
+		$MZ $h1 -c 1 -a own -b bcast -t arp request -p 100 -q
+}
+
+arp_response_test()
+{
+	devlink_trap_stats_test "ARP Response" "arp_response" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) -t arp reply -p 100 -q
+}
+
+icmpv6_header_get()
+{
+	local type=$1; shift
+	local p
+
+	type=$(printf "%x" $type)
+	p=$(:
+		)"$type:"$(			: ICMPv6.type
+		)"00:"$(			: ICMPv6.code
+		)"00:"$(			: ICMPv6.checksum
+		)
+	echo $p
+}
+
+ipv6_neigh_solicit_test()
+{
+	devlink_trap_stats_test "IPv6 Neighbour Solicitation" \
+		"ipv6_neigh_solicit" $MZ $h1 -6 -c 1 \
+		-A fe80::1 -B ff02::1:ff00:02 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 135) -p 100 -q
+}
+
+ipv6_neigh_advert_test()
+{
+	devlink_trap_stats_test "IPv6 Neighbour Advertisement" \
+		"ipv6_neigh_advert" $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B 2001:db8:1::2 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 136) -p 100 -q
+}
+
+ipv4_bfd_test()
+{
+	devlink_trap_stats_test "IPv4 BFD Control - Port 3784" "ipv4_bfd" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3784 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 BFD Echo - Port 3785" "ipv4_bfd" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3785 -p 100 -q
+}
+
+ipv6_bfd_test()
+{
+	devlink_trap_stats_test "IPv6 BFD Control - Port 3784" "ipv6_bfd" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 \
+		-t udp sp=49153,dp=3784 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 BFD Echo - Port 3785" "ipv6_bfd" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 \
+		-t udp sp=49153,dp=3785 -p 100 -q
+}
+
+ipv4_ospf_test()
+{
+	devlink_trap_stats_test "IPv4 OSPF - Multicast" "ipv4_ospf" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:00:05 \
+		-A 192.0.2.1 -B 224.0.0.5 -t ip proto=89 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 OSPF - Unicast" "ipv4_ospf" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t ip proto=89 -p 100 -q
+}
+
+ipv6_ospf_test()
+{
+	devlink_trap_stats_test "IPv6 OSPF - Multicast" "ipv6_ospf" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:05 \
+		-A fe80::1 -B ff02::5 -t ip next=89 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 OSPF - Unicast" "ipv6_ospf" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 -t ip next=89 -p 100 -q
+}
+
+ipv4_bgp_test()
+{
+	devlink_trap_stats_test "IPv4 BGP" "ipv4_bgp" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t tcp sp=54321,dp=179,flags=rst \
+		-p 100 -q
+}
+
+ipv6_bgp_test()
+{
+	devlink_trap_stats_test "IPv6 BGP" "ipv6_bgp" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 \
+		-t tcp sp=54321,dp=179,flags=rst -p 100 -q
+}
+
+ipv4_vrrp_test()
+{
+	devlink_trap_stats_test "IPv4 VRRP" "ipv4_vrrp" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:00:12 \
+		-A 192.0.2.1 -B 224.0.0.18 -t ip proto=112 -p 100 -q
+}
+
+ipv6_vrrp_test()
+{
+	devlink_trap_stats_test "IPv6 VRRP" "ipv6_vrrp" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:12 \
+		-A fe80::1 -B ff02::12 -t ip next=112 -p 100 -q
+}
+
+ipv4_pim_test()
+{
+	devlink_trap_stats_test "IPv4 PIM - Multicast" "ipv4_pim" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:00:0d \
+		-A 192.0.2.1 -B 224.0.0.13 -t ip proto=103 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 PIM - Unicast" "ipv4_pim" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t ip proto=103 -p 100 -q
+}
+
+ipv6_pim_test()
+{
+	devlink_trap_stats_test "IPv6 PIM - Multicast" "ipv6_pim" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:0d \
+		-A fe80::1 -B ff02::d -t ip next=103 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 PIM - Unicast" "ipv6_pim" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B 2001:db8:1::2 -t ip next=103 -p 100 -q
+}
+
+uc_loopback_test()
+{
+	# Add neighbours to the fake destination IPs, so that the packets are
+	# routed in the device and not trapped due to an unresolved neighbour
+	# exception.
+	ip -4 neigh add 192.0.2.3 lladdr 00:11:22:33:44:55 nud permanent \
+		dev $rp1
+	ip -6 neigh add 2001:db8:1::3 lladdr 00:11:22:33:44:55 nud permanent \
+		dev $rp1
+
+	devlink_trap_stats_test "IPv4 Unicast Loopback" "uc_loopback" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.3 -t udp sp=54321,dp=12345 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 Unicast Loopback" "uc_loopback" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::3 -t udp sp=54321,dp=12345 \
+		-p 100 -q
+
+	ip -6 neigh del 2001:db8:1::3 dev $rp1
+	ip -4 neigh del 192.0.2.3 dev $rp1
+}
+
+local_route_test()
+{
+	# Use a fake source IP to prevent the trap from being triggered twice
+	# when the router sends back a port unreachable message.
+	devlink_trap_stats_test "IPv4 Local Route" "local_route" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.3 -B 192.0.2.2 -t udp sp=54321,dp=12345 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 Local Route" "local_route" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::3 -B 2001:db8:1::2 -t udp sp=54321,sp=12345 \
+		-p 100 -q
+}
+
+external_route_test()
+{
+	# Add a dummy device through which the incoming packets should be
+	# routed.
+	ip link add name dummy10 up type dummy
+	ip address add 203.0.113.1/24 dev dummy10
+	ip -6 address add 2001:db8:10::1/64 dev dummy10
+
+	devlink_trap_stats_test "IPv4 External Route" "external_route" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 203.0.113.2 -t udp sp=54321,dp=12345 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 External Route" "external_route" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:10::2 -t udp sp=54321,sp=12345 \
+		-p 100 -q
+
+	ip -6 address del 2001:db8:10::1/64 dev dummy10
+	ip address del 203.0.113.1/24 dev dummy10
+	ip link del dev dummy10
+}
+
+ipv6_uc_dip_link_local_scope_test()
+{
+	# Add a dummy link-local prefix route to allow the packet to be routed.
+	ip -6 route add fe80:1::/64 dev $rp2
+
+	devlink_trap_stats_test \
+		"IPv6 Unicast Destination IP With Link-Local Scope" \
+		"ipv6_uc_dip_link_local_scope" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B fe80:1::2 -t udp sp=54321,sp=12345 \
+		-p 100 -q
+
+	ip -6 route del fe80:1::/64 dev $rp2
+}
+
+ipv4_router_alert_get()
+{
+	local p
+
+	# https://en.wikipedia.org/wiki/IPv4#Options
+	p=$(:
+		)"94:"$(			: Option Number
+		)"04:"$(			: Option Length
+		)"00:00:"$(			: Option Data
+		)
+	echo $p
+}
+
+ipv4_router_alert_test()
+{
+	devlink_trap_stats_test "IPv4 Router Alert" "ipv4_router_alert" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 198.51.100.3 \
+		-t ip option=$(ipv4_router_alert_get) -p 100 -q
+}
+
+ipv6_router_alert_get()
+{
+	local p
+
+	# https://en.wikipedia.org/wiki/IPv6_packet#Hop-by-hop_options_and_destination_options
+	# https://tools.ietf.org/html/rfc2711#section-2.1
+	p=$(:
+		)"11:"$(			: Next Header - UDP
+		)"00:"$(			: Hdr Ext Len
+		)"05:02:00:00:00:00:"$(		: Option Data
+		)
+	echo $p
+}
+
+ipv6_router_alert_test()
+{
+	devlink_trap_stats_test "IPv6 Router Alert" "ipv6_router_alert" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::3 \
+		-t ip next=0,payload=$(ipv6_router_alert_get) -p 100 -q
+}
+
+ipv6_dip_all_nodes_test()
+{
+	devlink_trap_stats_test "IPv6 Destination IP \"All Nodes Address\"" \
+		"ipv6_dip_all_nodes" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \
+		-A 2001:db8:1::1 -B ff02::1 -t udp sp=12345,dp=54321 -p 100 -q
+}
+
+ipv6_dip_all_routers_test()
+{
+	devlink_trap_stats_test "IPv6 Destination IP \"All Routers Address\"" \
+		"ipv6_dip_all_routers" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \
+		-A 2001:db8:1::1 -B ff02::2 -t udp sp=12345,dp=54321 -p 100 -q
+}
+
+ipv6_router_solicit_test()
+{
+	devlink_trap_stats_test "IPv6 Router Solicitation" \
+		"ipv6_router_solicit" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \
+		-A fe80::1 -B ff02::2 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 133) -p 100 -q
+}
+
+ipv6_router_advert_test()
+{
+	devlink_trap_stats_test "IPv6 Router Advertisement" \
+		"ipv6_router_advert" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \
+		-A fe80::1 -B ff02::1 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 134) -p 100 -q
+}
+
+ipv6_redirect_test()
+{
+	devlink_trap_stats_test "IPv6 Redirect Message" \
+		"ipv6_redirect" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B 2001:db8:1::2 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 137) -p 100 -q
+}
+
+ptp_event_test()
+{
+	mlxsw_only_on_spectrum 1 || return
+
+	# PTP Sync (0)
+	devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \
+		-A 192.0.2.1 -B 224.0.1.129 \
+		-t udp sp=12345,dp=319,payload=10 -p 100 -q
+}
+
+ptp_general_test()
+{
+	mlxsw_only_on_spectrum 1 || return
+
+	# PTP Announce (b)
+	devlink_trap_stats_test "PTP General Message" "ptp_general" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \
+		-A 192.0.2.1 -B 224.0.1.129 \
+		-t udp sp=12345,dp=320,payload=1b -p 100 -q
+}
+
+flow_action_sample_test()
+{
+	# Install a filter that samples every incoming packet.
+	tc qdisc add dev $rp1 clsact
+	tc filter add dev $rp1 ingress proto all pref 1 handle 101 matchall \
+		skip_sw action sample rate 1 group 1
+
+	devlink_trap_stats_test "Flow Sampling" "flow_action_sample" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q
+
+	tc filter del dev $rp1 ingress proto all pref 1 handle 101 matchall
+	tc qdisc del dev $rp1 clsact
+}
+
+flow_action_trap_test()
+{
+	# Install a filter that traps a specific flow.
+	tc qdisc add dev $rp1 clsact
+	tc filter add dev $rp1 ingress proto ip pref 1 handle 101 flower \
+		skip_sw ip_proto udp src_port 12345 dst_port 54321 action trap
+
+	devlink_trap_stats_test "Flow Trapping (Logging)" "flow_action_trap" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q
+
+	tc filter del dev $rp1 ingress proto ip pref 1 handle 101 flower
+	tc qdisc del dev $rp1 clsact
+}
+
+eapol_payload_get()
+{
+	local source_mac=$1; shift
+	local p
+
+	p=$(:
+		)"01:80:C2:00:00:03:"$(       : ETH daddr
+		)"$source_mac:"$(             : ETH saddr
+		)"88:8E:"$(                   : ETH type
+		)
+	echo $p
+}
+
+eapol_test()
+{
+	local h1mac=$(mac_get $h1)
+
+	devlink_trap_stats_test "EAPOL" "eapol" $MZ $h1 -c 1 \
+		$(eapol_payload_get $h1mac) -p 100 -q
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
new file mode 100755
index 000000000000..8d4b2c6265b3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -0,0 +1,535 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap L2 drops functionality over mlxsw. Each registered L2 drop
+# packet trap is tested to make sure it is triggered under the right
+# conditions.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	source_mac_is_multicast_test
+	vlan_tag_mismatch_test
+	ingress_vlan_filter_test
+	ingress_stp_filter_test
+	port_list_is_empty_test
+	port_loopback_filter_test
+	locked_port_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+source_mac_is_multicast_test()
+{
+	local trap_name="source_mac_is_multicast"
+	local smac=01:02:03:04:05:06
+	local mz_pid
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower src_mac $smac action drop
+
+	$MZ $h1 -c 0 -p 100 -a $smac -b bcast -t ip -d 1msec -q &
+	mz_pid=$!
+
+	RET=0
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	log_test "Source MAC is multicast"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+}
+
+__vlan_tag_mismatch_test()
+{
+	local trap_name="vlan_tag_mismatch"
+	local dmac=de:ad:be:ef:13:37
+	local opt=$1; shift
+	local mz_pid
+
+	# Remove PVID flag. This should prevent untagged and prio-tagged
+	# packets from entering the bridge.
+	bridge vlan add vid 1 dev $swp1 untagged master
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_mac $dmac action drop
+
+	$MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	# Add PVID and make sure packets are no longer dropped.
+	bridge vlan add vid 1 dev $swp1 pvid untagged master
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_idle_test $trap_name
+	check_err $? "Trap stats not idle when packets should not be dropped"
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
+	check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_fail $? "Packets not forwarded when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+}
+
+vlan_tag_mismatch_untagged_test()
+{
+	RET=0
+
+	__vlan_tag_mismatch_test
+
+	log_test "VLAN tag mismatch - untagged packets"
+}
+
+vlan_tag_mismatch_vid_0_test()
+{
+	RET=0
+
+	__vlan_tag_mismatch_test "-Q 0"
+
+	log_test "VLAN tag mismatch - prio-tagged packets"
+}
+
+vlan_tag_mismatch_test()
+{
+	vlan_tag_mismatch_untagged_test
+	vlan_tag_mismatch_vid_0_test
+}
+
+ingress_vlan_filter_test()
+{
+	local trap_name="ingress_vlan_filter"
+	local dmac=de:ad:be:ef:13:37
+	local mz_pid
+	local vid=10
+
+	bridge vlan add vid $vid dev $swp2 master
+
+	RET=0
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_mac $dmac action drop
+
+	$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	# Add the VLAN on the bridge port and make sure packets are no longer
+	# dropped.
+	bridge vlan add vid $vid dev $swp1 master
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_idle_test $trap_name
+	check_err $? "Trap stats not idle when packets should not be dropped"
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
+	check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_fail $? "Packets not forwarded when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	log_test "Ingress VLAN filter"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+
+	bridge vlan del vid $vid dev $swp1 master
+	bridge vlan del vid $vid dev $swp2 master
+}
+
+__ingress_stp_filter_test()
+{
+	local trap_name="ingress_spanning_tree_filter"
+	local dmac=de:ad:be:ef:13:37
+	local state=$1; shift
+	local mz_pid
+	local vid=20
+
+	bridge vlan add vid $vid dev $swp2 master
+	bridge vlan add vid $vid dev $swp1 master
+	ip link set dev $swp1 type bridge_slave state $state
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_mac $dmac action drop
+
+	$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	# Change STP state to forwarding and make sure packets are no longer
+	# dropped.
+	ip link set dev $swp1 type bridge_slave state 3
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_idle_test $trap_name
+	check_err $? "Trap stats not idle when packets should not be dropped"
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
+	check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_fail $? "Packets not forwarded when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+
+	bridge vlan del vid $vid dev $swp1 master
+	bridge vlan del vid $vid dev $swp2 master
+}
+
+ingress_stp_filter_listening_test()
+{
+	local state=$1; shift
+
+	RET=0
+
+	__ingress_stp_filter_test $state
+
+	log_test "Ingress STP filter - listening state"
+}
+
+ingress_stp_filter_learning_test()
+{
+	local state=$1; shift
+
+	RET=0
+
+	__ingress_stp_filter_test $state
+
+	log_test "Ingress STP filter - learning state"
+}
+
+ingress_stp_filter_test()
+{
+	ingress_stp_filter_listening_test 1
+	ingress_stp_filter_learning_test 2
+}
+
+port_list_is_empty_uc_test()
+{
+	local trap_name="port_list_is_empty"
+	local dmac=de:ad:be:ef:13:37
+	local mz_pid
+
+	# Disable unicast flooding on both ports, so that packets cannot egress
+	# any port.
+	ip link set dev $swp1 type bridge_slave flood off
+	ip link set dev $swp2 type bridge_slave flood off
+
+	RET=0
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_mac $dmac action drop
+
+	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	# Allow packets to be flooded to one port.
+	ip link set dev $swp2 type bridge_slave flood on
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_idle_test $trap_name
+	check_err $? "Trap stats not idle when packets should not be dropped"
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
+	check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_fail $? "Packets not forwarded when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	log_test "Port list is empty - unicast"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+
+	ip link set dev $swp1 type bridge_slave flood on
+}
+
+port_list_is_empty_mc_test()
+{
+	local trap_name="port_list_is_empty"
+	local dmac=01:00:5e:00:00:01
+	local dip=239.0.0.1
+	local mz_pid
+
+	# Disable multicast flooding on both ports, so that packets cannot
+	# egress any port. We also need to flush IP addresses from the bridge
+	# in order to prevent packets from being flooded to the router port.
+	ip link set dev $swp1 type bridge_slave mcast_flood off
+	ip link set dev $swp2 type bridge_slave mcast_flood off
+	ip address flush dev br0
+
+	RET=0
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_mac $dmac action drop
+
+	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	# Allow packets to be flooded to one port.
+	ip link set dev $swp2 type bridge_slave mcast_flood on
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_idle_test $trap_name
+	check_err $? "Trap stats not idle when packets should not be dropped"
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
+	check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_fail $? "Packets not forwarded when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	log_test "Port list is empty - multicast"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+
+	ip link set dev $swp1 type bridge_slave mcast_flood on
+}
+
+port_list_is_empty_test()
+{
+	port_list_is_empty_uc_test
+	port_list_is_empty_mc_test
+}
+
+port_loopback_filter_uc_test()
+{
+	local trap_name="port_loopback_filter"
+	local dmac=de:ad:be:ef:13:37
+	local mz_pid
+
+	# Make sure packets can only egress the input port.
+	ip link set dev $swp2 type bridge_slave flood off
+
+	RET=0
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_mac $dmac action drop
+
+	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	# Allow packets to be flooded.
+	ip link set dev $swp2 type bridge_slave flood on
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_idle_test $trap_name
+	check_err $? "Trap stats not idle when packets should not be dropped"
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
+	check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_fail $? "Packets not forwarded when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	log_test "Port loopback filter - unicast"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+}
+
+port_loopback_filter_test()
+{
+	port_loopback_filter_uc_test
+}
+
+locked_port_miss_test()
+{
+	local trap_name="locked_port"
+	local smac=00:11:22:33:44:55
+
+	bridge link set dev $swp1 learning off
+	bridge link set dev $swp1 locked on
+
+	RET=0
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased before setting action to \"trap\""
+
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_err $? "Trap stats did not increase when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased after setting action to \"drop\""
+
+	devlink_trap_action_set $trap_name "trap"
+
+	bridge fdb replace $smac dev $swp1 master static vlan 1
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased after adding an FDB entry"
+
+	bridge fdb del $smac dev $swp1 master static vlan 1
+	bridge link set dev $swp1 locked off
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased after unlocking port"
+
+	log_test "Locked port - FDB miss"
+
+	devlink_trap_action_set $trap_name "drop"
+	bridge link set dev $swp1 learning on
+}
+
+locked_port_mismatch_test()
+{
+	local trap_name="locked_port"
+	local smac=00:11:22:33:44:55
+
+	bridge link set dev $swp1 learning off
+	bridge link set dev $swp1 locked on
+
+	RET=0
+
+	bridge fdb replace $smac dev $swp2 master static vlan 1
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased before setting action to \"trap\""
+
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_err $? "Trap stats did not increase when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased after setting action to \"drop\""
+
+	devlink_trap_action_set $trap_name "trap"
+	bridge link set dev $swp1 locked off
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased after unlocking port"
+
+	bridge link set dev $swp1 locked on
+	bridge fdb replace $smac dev $swp1 master static vlan 1
+
+	devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+		-a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+	check_fail $? "Trap stats increased after replacing an FDB entry"
+
+	bridge fdb del $smac dev $swp1 master static vlan 1
+	devlink_trap_action_set $trap_name "drop"
+
+	log_test "Locked port - FDB mismatch"
+
+	bridge link set dev $swp1 locked off
+	bridge link set dev $swp1 learning on
+}
+
+locked_port_test()
+{
+	locked_port_miss_test
+	locked_port_mismatch_test
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
new file mode 100755
index 000000000000..db5806d189bb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -0,0 +1,696 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap L3 drops functionality over mlxsw. Each registered L3 drop
+# packet trap is tested to make sure it is triggered under the right
+# conditions.
+
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    | 2001:db8:1::1/64           |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# |    |  default via 2001:db8:1::2 |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |        2001:db8:1::2/64                                                   |
+# |                                                                           |
+# |        2001:db8:2::2/64                                                   |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |  default via 2001:db8:2::2 |
+# |    |                            |
+# |    | 2001:db8:2::1/64           |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	non_ip_test
+	uc_dip_over_mc_dmac_test
+	dip_is_loopback_test
+	sip_is_mc_test
+	sip_is_loopback_test
+	ip_header_corrupted_test
+	ipv4_sip_is_limited_bc_test
+	ipv6_mc_dip_reserved_scope_test
+	ipv6_mc_dip_interface_local_scope_test
+	blackhole_route_test
+	irif_disabled_test
+	erif_disabled_test
+	blackhole_nexthop_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $h2_ipv4/24 $h2_ipv6/64
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 $h2_ipv4/24 $h2_ipv6/64
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	tc qdisc add dev $rp2 clsact
+
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	__addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	__addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+	__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+	tc qdisc del dev $rp2 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	rp1mac=$(mac_get $rp1)
+
+	h1_ipv4=192.0.2.1
+	h2_ipv4=198.51.100.1
+	h1_ipv6=2001:db8:1::1
+	h2_ipv6=2001:db8:2::1
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+ping_check()
+{
+	trap_name=$1; shift
+
+	devlink_trap_action_set $trap_name "trap"
+	ping_do $h1 $h2_ipv4
+	check_err $? "Packets that should not be trapped were trapped"
+	devlink_trap_action_set $trap_name "drop"
+}
+
+non_ip_test()
+{
+	local trap_name="non_ip"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+		flower dst_ip $h2_ipv4 action drop
+
+	# Generate non-IP packets to the router
+	$MZ $h1 -c 0 -p 100 -d 1msec -B $h2_ipv4 -q "$rp1mac $h1mac \
+		00:00 de:ad:be:ef" &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "Non IP"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
+}
+
+__uc_dip_over_mc_dmac_test()
+{
+	local desc=$1; shift
+	local proto=$1; shift
+	local dip=$1; shift
+	local flags=${1:-""}; shift
+	local trap_name="uc_dip_over_mc_dmac"
+	local dmac=01:02:03:04:05:06
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower ip_proto udp src_port 54321 dst_port 12345 action drop
+
+	# Generate IP packets with a unicast IP and a multicast destination MAC
+	$MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $dmac \
+		-B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "Unicast destination IP over multicast destination MAC: $desc"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+}
+
+uc_dip_over_mc_dmac_test()
+{
+	__uc_dip_over_mc_dmac_test "IPv4" "ip" $h2_ipv4
+	__uc_dip_over_mc_dmac_test "IPv6" "ipv6" $h2_ipv6 "-6"
+}
+
+__sip_is_loopback_test()
+{
+	local desc=$1; shift
+	local proto=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local flags=${1:-""}; shift
+	local trap_name="sip_is_loopback_address"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower src_ip $sip action drop
+
+	# Generate packets with loopback source IP
+	$MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip \
+		-b $rp1mac -B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "Source IP is loopback address: $desc"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+}
+
+sip_is_loopback_test()
+{
+	__sip_is_loopback_test "IPv4" "ip" "127.0.0.0/8" $h2_ipv4
+	__sip_is_loopback_test "IPv6" "ipv6" "::1" $h2_ipv6 "-6"
+}
+
+__dip_is_loopback_test()
+{
+	local desc=$1; shift
+	local proto=$1; shift
+	local dip=$1; shift
+	local flags=${1:-""}; shift
+	local trap_name="dip_is_loopback_address"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower dst_ip $dip action drop
+
+	# Generate packets with loopback destination IP
+	$MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \
+		-B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "Destination IP is loopback address: $desc"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+}
+
+dip_is_loopback_test()
+{
+	__dip_is_loopback_test "IPv4" "ip" "127.0.0.0/8"
+	__dip_is_loopback_test "IPv6" "ipv6" "::1" "-6"
+}
+
+__sip_is_mc_test()
+{
+	local desc=$1; shift
+	local proto=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local flags=${1:-""}; shift
+	local trap_name="sip_is_mc"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower src_ip $sip action drop
+
+	# Generate packets with multicast source IP
+	$MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip \
+		-b $rp1mac -B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "Source IP is multicast: $desc"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+}
+
+sip_is_mc_test()
+{
+	__sip_is_mc_test "IPv4" "ip" "239.1.1.1" $h2_ipv4
+	__sip_is_mc_test "IPv6" "ipv6" "FF02::2" $h2_ipv6 "-6"
+}
+
+ipv4_sip_is_limited_bc_test()
+{
+	local trap_name="ipv4_sip_is_limited_bc"
+	local sip=255.255.255.255
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+		flower src_ip $sip action drop
+
+	# Generate packets with limited broadcast source IP
+	$MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip -b $rp1mac \
+		-B $h2_ipv4 -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "IPv4 source IP is limited broadcast"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
+}
+
+ipv4_payload_get()
+{
+	local ipver=$1; shift
+	local ihl=$1; shift
+	local checksum=$1; shift
+
+	p=$(:
+		)"08:00:"$(                   : ETH type
+		)"$ipver"$(                   : IP version
+		)"$ihl:"$(                    : IHL
+		)"00:"$(		      : IP TOS
+		)"00:F4:"$(                   : IP total length
+		)"00:00:"$(                   : IP identification
+		)"20:00:"$(                   : IP flags + frag off
+		)"30:"$(                      : IP TTL
+		)"01:"$(                      : IP proto
+		)"$checksum:"$(               : IP header csum
+		)"$h1_ipv4:"$(                : IP saddr
+	        )"$h2_ipv4:"$(                : IP daddr
+		)
+	echo $p
+}
+
+__ipv4_header_corrupted_test()
+{
+	local desc=$1; shift
+	local ipver=$1; shift
+	local ihl=$1; shift
+	local checksum=$1; shift
+	local trap_name="ip_header_corrupted"
+	local payload
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+		flower dst_ip $h2_ipv4 action drop
+
+	payload=$(ipv4_payload_get $ipver $ihl $checksum)
+
+	# Generate packets with corrupted IP header
+	$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "IP header corrupted: $desc: IPv4"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
+}
+
+ipv6_payload_get()
+{
+	local ipver=$1; shift
+
+	p=$(:
+		)"86:DD:"$(                  : ETH type
+		)"$ipver"$(                  : IP version
+		)"0:0:"$(                    : Traffic class
+		)"0:00:00:"$(		     : Flow label
+		)"00:00:"$(                  : Payload length
+		)"01:"$(                     : Next header
+		)"04:"$(                     : Hop limit
+		)"$h1_ipv6:"$(      	     : IP saddr
+		)"$h2_ipv6:"$(               : IP daddr
+		)
+	echo $p
+}
+
+__ipv6_header_corrupted_test()
+{
+	local desc=$1; shift
+	local ipver=$1; shift
+	local trap_name="ip_header_corrupted"
+	local payload
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+		flower dst_ip $h2_ipv4 action drop
+
+	payload=$(ipv6_payload_get $ipver)
+
+	# Generate packets with corrupted IP header
+	$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "IP header corrupted: $desc: IPv6"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
+}
+
+ip_header_corrupted_test()
+{
+	# Each test uses one wrong value. The three values below are correct.
+	local ipv="4"
+	local ihl="5"
+	local checksum="00:F4"
+
+	__ipv4_header_corrupted_test "wrong IP version" 5 $ihl $checksum
+	__ipv4_header_corrupted_test "wrong IHL" $ipv 4 $checksum
+	__ipv4_header_corrupted_test "wrong checksum" $ipv $ihl "00:00"
+	__ipv6_header_corrupted_test "wrong IP version" 5
+}
+
+ipv6_mc_dip_reserved_scope_test()
+{
+	local trap_name="ipv6_mc_dip_reserved_scope"
+	local dip=FF00::
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \
+		flower dst_ip $dip action drop
+
+	# Generate packets with reserved scope destination IP
+	$MZ $h1 -6 -t udp "sp=54321,dp=12345" -c 0 -p 100 -b \
+		"33:33:00:00:00:00" -B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "IPv6 multicast destination IP reserved scope"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101
+}
+
+ipv6_mc_dip_interface_local_scope_test()
+{
+	local trap_name="ipv6_mc_dip_interface_local_scope"
+	local dip=FF01::
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \
+		flower dst_ip $dip action drop
+
+	# Generate packets with interface local scope destination IP
+	$MZ $h1 -6 -t udp "sp=54321,dp=12345" -c 0 -p 100 -b \
+		"33:33:00:00:00:00" -B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "IPv6 multicast destination IP interface-local scope"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101
+}
+
+__blackhole_route_test()
+{
+	local flags=$1; shift
+	local subnet=$1; shift
+	local proto=$1; shift
+	local dip=$1; shift
+	local ip_proto=${1:-"icmp"}; shift
+	local trap_name="blackhole_route"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	ip -$flags route add blackhole $subnet
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower skip_hw dst_ip $dip ip_proto $ip_proto action drop
+
+	# Generate packets to the blackhole route
+	$MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \
+		-B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+	log_test "Blackhole route: IPv$flags"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+	ip -$flags route del blackhole $subnet
+}
+
+blackhole_route_test()
+{
+	__blackhole_route_test "4" "198.51.100.0/30" "ip" $h2_ipv4
+	__blackhole_route_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6 "icmpv6"
+}
+
+irif_disabled_test()
+{
+	local trap_name="irif_disabled"
+	local t0_packets t0_bytes
+	local t1_packets t1_bytes
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	devlink_trap_action_set $trap_name "trap"
+
+	# When RIF of a physical port ("Sub-port RIF") is destroyed, we first
+	# block the STP of the {Port, VLAN} so packets cannot get into the RIF.
+	# Using bridge enables us to see this trap because when bridge is
+	# destroyed, there is a small time window that packets can go into the
+	# RIF, while it is disabled.
+	ip link add dev br0 type bridge
+	ip link set dev $rp1 master br0
+	ip address flush dev $rp1
+	__addr_add_del br0 add 192.0.2.2/24
+	ip li set dev br0 up
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+	t0_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+	# Generate packets to h2 through br0 RIF that will be removed later
+	$MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -p 100 -a own -b $rp1mac \
+		-B $h2_ipv4 -q &
+	mz_pid=$!
+
+	# Wait before removing br0 RIF to allow packets to go into the bridge.
+	sleep 1
+
+	# Flushing address will dismantle the RIF
+	ip address flush dev br0
+
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+	t1_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then
+		check_err 1 "Trap stats idle when packets should be trapped"
+	fi
+
+	log_test "Ingress RIF disabled"
+
+	kill_process $mz_pid
+	ip link set dev $rp1 nomaster
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	ip link del dev br0 type bridge
+	devlink_trap_action_set $trap_name "drop"
+}
+
+erif_disabled_test()
+{
+	local trap_name="erif_disabled"
+	local t0_packets t0_bytes
+	local t1_packets t1_bytes
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	devlink_trap_action_set $trap_name "trap"
+	ip link add dev br0 type bridge
+	ip add flush dev $rp1
+	ip link set dev $rp1 master br0
+	__addr_add_del br0 add 192.0.2.2/24
+	ip link set dev br0 up
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+	t0_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+	rp2mac=$(mac_get $rp2)
+
+	# Generate packets that should go out through br0 RIF that will be
+	# removed later
+	$MZ $h2 -t udp "sp=54321,dp=12345" -c 0 -p 100 -a own -b $rp2mac \
+		-B 192.0.2.1 -q &
+	mz_pid=$!
+
+	sleep 5
+	# Unlinking the port from the bridge will disable the RIF associated
+	# with br0 as it is no longer an upper of any mlxsw port.
+	ip link set dev $rp1 nomaster
+
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+	t1_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then
+		check_err 1 "Trap stats idle when packets should be trapped"
+	fi
+
+	log_test "Egress RIF disabled"
+
+	kill_process $mz_pid
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	ip link del dev br0 type bridge
+	devlink_trap_action_set $trap_name "drop"
+}
+
+__blackhole_nexthop_test()
+{
+	local flags=$1; shift
+	local subnet=$1; shift
+	local proto=$1; shift
+	local dip=$1; shift
+	local trap_name="blackhole_nexthop"
+	local mz_pid
+
+	RET=0
+
+	ip -$flags nexthop add id 1 blackhole
+	ip -$flags route add $subnet nhid 1
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower skip_hw dst_ip $dip ip_proto udp action drop
+
+	# Generate packets to the blackhole nexthop
+	$MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \
+		-B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+	log_test "Blackhole nexthop: IPv$flags"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+	ip -$flags route del $subnet
+	ip -$flags nexthop del id 1
+}
+
+blackhole_nexthop_test()
+{
+	__blackhole_nexthop_test "4" "198.51.100.0/30" "ip" $h2_ipv4
+	__blackhole_nexthop_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
new file mode 100755
index 000000000000..5d6d88b600f0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -0,0 +1,583 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap L3 exceptions functionality over mlxsw.
+# Check all exception traps to make sure they are triggered under the right
+# conditions.
+
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    | 2001:db8:1::1/64           |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# |    |  default via 2001:db8:1::2 |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |        2001:db8:1::2/64                                                   |
+# |                                                                           |
+# |        2001:db8:2::2/64                                                   |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |  default via 2001:db8:2::2 |
+# |    |                            |
+# |    | 2001:db8:2::1/64           |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	mtu_value_is_too_small_test
+	ttl_value_is_too_small_test
+	mc_reverse_path_forwarding_test
+	reject_route_test
+	unresolved_neigh_test
+	ipv4_lpm_miss_test
+	ipv6_lpm_miss_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+require_command $MCD
+require_command $MC_CLI
+table_name=selftests
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+
+	tc qdisc add dev $h1 clsact
+}
+
+h1_destroy()
+{
+	tc qdisc del dev $h1 clsact
+
+	ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	tc qdisc add dev $rp2 clsact
+
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	__addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	__addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+	__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+	tc qdisc del dev $rp2 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1mac=$(mac_get $rp1)
+
+	start_mcd
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+
+	kill_mcd
+}
+
+ping_check()
+{
+	ping_do $h1 198.51.100.1
+	check_err $? "Packets that should not be trapped were trapped"
+}
+
+trap_action_check()
+{
+	local trap_name=$1; shift
+	local expected_action=$1; shift
+
+	action=$(devlink_trap_action_get $trap_name)
+	if [ "$action" != $expected_action ]; then
+		check_err 1 "Trap $trap_name has wrong action: $action"
+	fi
+}
+
+mtu_value_is_too_small_test()
+{
+	local trap_name="mtu_value_is_too_small"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	# type - Destination Unreachable
+	# code - Fragmentation Needed and Don't Fragment was Set
+	tc filter add dev $h1 ingress protocol ip pref 1 handle 101 \
+		flower skip_hw ip_proto icmp type 3 code 4 action pass
+
+	mtu_set $rp2 1300
+
+	# Generate IP packets bigger than router's MTU with don't fragment
+	# flag on.
+	$MZ $h1 -t udp "sp=54321,dp=12345,df" -p 1400 -c 0 -d 1msec -b $rp1mac \
+		-B 198.51.100.1 -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets_hitting "dev $h1 ingress" 101
+	check_err $? "Packets were not received to h1"
+
+	log_test "MTU value is too small"
+
+	mtu_restore $rp2
+
+	kill_process $mz_pid
+	tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
+}
+
+__ttl_value_is_too_small_test()
+{
+	local ttl_val=$1; shift
+	local trap_name="ttl_value_is_too_small"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	# type - Time Exceeded
+	# code - Time to Live exceeded in Transit
+	tc filter add dev $h1 ingress protocol ip pref 1 handle 101 \
+		 flower skip_hw ip_proto icmp type 11 code 0 action pass
+
+	# Generate IP packets with small TTL
+	$MZ $h1 -t udp "ttl=$ttl_val,sp=54321,dp=12345" -c 0 -d 1msec \
+		-b $rp1mac -B 198.51.100.1 -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets_hitting "dev $h1 ingress" 101
+	check_err $? "Packets were not received to h1"
+
+	log_test "TTL value is too small: TTL=$ttl_val"
+
+	kill_process $mz_pid
+	tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
+}
+
+ttl_value_is_too_small_test()
+{
+	__ttl_value_is_too_small_test 0
+	__ttl_value_is_too_small_test 1
+}
+
+start_mcd()
+{
+	SMCROUTEDIR="$(mktemp -d)"
+	for ((i = 1; i <= $NUM_NETIFS; ++i)); do
+		 echo "phyint ${NETIFS[p$i]} enable" >> \
+			 $SMCROUTEDIR/$table_name.conf
+	done
+
+	$MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
+		-P $SMCROUTEDIR/$table_name.pid
+}
+
+kill_mcd()
+{
+	pkill $MCD
+	rm -rf $SMCROUTEDIR
+}
+
+__mc_reverse_path_forwarding_test()
+{
+	local desc=$1; shift
+	local src_ip=$1; shift
+	local dst_ip=$1; shift
+	local dst_mac=$1; shift
+	local proto=$1; shift
+	local flags=${1:-""}; shift
+	local trap_name="mc_reverse_path_forwarding"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower dst_ip $dst_ip ip_proto udp action drop
+
+	$MC_CLI -I $table_name add $rp1 $src_ip $dst_ip $rp2
+
+	# Generate packets to multicast address.
+	$MZ $h2 $flags -t udp "sp=54321,dp=12345" -c 0 -p 128 \
+		-a 00:11:22:33:44:55 -b $dst_mac \
+		-A $src_ip -B $dst_ip -q &
+
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $rp2 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "Multicast reverse path forwarding: $desc"
+
+	kill_process $mz_pid
+	tc filter del dev $rp2 egress protocol $proto pref 1 handle 101 flower
+}
+
+mc_reverse_path_forwarding_test()
+{
+	__mc_reverse_path_forwarding_test "IPv4" "192.0.2.1" "225.1.2.3" \
+		"01:00:5e:01:02:03" "ip"
+	__mc_reverse_path_forwarding_test "IPv6" "2001:db8:1::1" "ff0e::3" \
+		"33:33:00:00:00:03" "ipv6" "-6"
+}
+
+__reject_route_test()
+{
+	local desc=$1; shift
+	local dst_ip=$1; shift
+	local proto=$1; shift
+	local ip_proto=$1; shift
+	local type=$1; shift
+	local code=$1; shift
+	local unreachable=$1; shift
+	local flags=${1:-""}; shift
+	local trap_name="reject_route"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	tc filter add dev $h1 ingress protocol $proto pref 1 handle 101 flower \
+		skip_hw ip_proto $ip_proto type $type code $code action pass
+
+	ip route add unreachable $unreachable
+
+	# Generate pacekts to h2. The destination IP is unreachable.
+	$MZ $flags $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \
+		-B $dst_ip -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets_hitting "dev $h1 ingress" 101
+	check_err $? "ICMP packet was not received to h1"
+
+	log_test "Reject route: $desc"
+
+	kill_process $mz_pid
+	ip route del unreachable $unreachable
+	tc filter del dev $h1 ingress protocol $proto pref 1 handle 101 flower
+}
+
+reject_route_test()
+{
+	# type - Destination Unreachable
+	# code - Host Unreachable
+	__reject_route_test "IPv4" 198.51.100.1 "ip" "icmp" 3 1 \
+		"198.51.100.0/26"
+	# type - Destination Unreachable
+	# code - No Route
+	__reject_route_test "IPv6" 2001:db8:2::1 "ipv6" "icmpv6" 1 0 \
+		"2001:db8:2::0/66" "-6"
+}
+
+__host_miss_test()
+{
+	local desc=$1; shift
+	local dip=$1; shift
+	local trap_name="unresolved_neigh"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	ip neigh flush dev $rp2
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	# Generate packets to h2 (will incur a unresolved neighbor).
+	# The ping should pass and devlink counters should be increased.
+	ping_do $h1 $dip
+	check_err $? "ping failed: $desc"
+
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets ]]; then
+		check_err 1 "Trap counter did not increase"
+	fi
+
+	log_test "Unresolved neigh: host miss: $desc"
+}
+
+__invalid_nexthop_test()
+{
+	local desc=$1; shift
+	local dip=$1; shift
+	local extra_add=$1; shift
+	local subnet=$1; shift
+	local via_add=$1; shift
+	local trap_name="unresolved_neigh"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	ip address add $extra_add/$subnet dev $h2
+
+	# Check that correct route does not trigger unresolved_neigh
+	ip $flags route add $dip via $extra_add dev $rp2
+
+	# Generate packets in order to discover all neighbours.
+	# Without it, counters of unresolved_neigh will be increased
+	# during neighbours discovery and the check below will fail
+	# for a wrong reason
+	ping_do $h1 $dip
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+	ping_do $h1 $dip
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	if [[ $t0_packets -ne $t1_packets ]]; then
+		check_err 1 "Trap counter increased when it should not"
+	fi
+
+	ip $flags route del $dip via $extra_add dev $rp2
+
+	# Check that route to nexthop that does not exist trigger
+	# unresolved_neigh
+	ip $flags route add $dip via $via_add dev $h2
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+	ping_do $h1 $dip
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets ]]; then
+		check_err 1 "Trap counter did not increase"
+	fi
+
+	ip $flags route del $dip via $via_add dev $h2
+	ip address del $extra_add/$subnet dev $h2
+	log_test "Unresolved neigh: nexthop does not exist: $desc"
+}
+
+__invalid_nexthop_bucket_test()
+{
+	local desc=$1; shift
+	local dip=$1; shift
+	local via_add=$1; shift
+	local trap_name="unresolved_neigh"
+
+	RET=0
+
+	# Check that route to nexthop that does not exist triggers
+	# unresolved_neigh
+	ip nexthop add id 1 via $via_add dev $rp2
+	ip nexthop add id 10 group 1 type resilient buckets 32
+	ip route add $dip nhid 10
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+	ping_do $h1 $dip
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets ]]; then
+		check_err 1 "Trap counter did not increase"
+	fi
+
+	ip route del $dip nhid 10
+	ip nexthop del id 10
+	ip nexthop del id 1
+	log_test "Unresolved neigh: nexthop bucket does not exist: $desc"
+}
+
+unresolved_neigh_test()
+{
+	__host_miss_test "IPv4" 198.51.100.1
+	__host_miss_test "IPv6" 2001:db8:2::1
+	__invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4
+	__invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \
+		2001:db8:2::4
+	__invalid_nexthop_bucket_test "IPv4" 198.51.100.1 198.51.100.4
+	__invalid_nexthop_bucket_test "IPv6" 2001:db8:2::1 2001:db8:2::4
+}
+
+vrf_without_routes_create()
+{
+	# VRF creating makes the links to be down and then up again.
+	# By default, IPv6 address is not saved after link becomes down.
+	# Save IPv6 address using sysctl configuration.
+	sysctl_set net.ipv6.conf.$rp1.keep_addr_on_down 1
+	sysctl_set net.ipv6.conf.$rp2.keep_addr_on_down 1
+
+	ip link add dev vrf1 type vrf table 101
+	ip link set dev $rp1 master vrf1
+	ip link set dev $rp2 master vrf1
+	ip link set dev vrf1 up
+
+	# Wait for rp1 and rp2 to be up
+	setup_wait
+}
+
+vrf_without_routes_destroy()
+{
+	ip link set dev $rp1 nomaster
+	ip link set dev $rp2 nomaster
+	ip link del dev vrf1
+
+	sysctl_restore net.ipv6.conf.$rp2.keep_addr_on_down
+	sysctl_restore net.ipv6.conf.$rp1.keep_addr_on_down
+
+	# Wait for interfaces to be up
+	setup_wait
+}
+
+ipv4_lpm_miss_test()
+{
+	local trap_name="ipv4_lpm_miss"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	# Create a VRF without a default route
+	vrf_without_routes_create
+
+	# Generate packets through a VRF without a matching route.
+	$MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \
+		-B 203.0.113.1 -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	log_test "LPM miss: IPv4"
+
+	kill_process $mz_pid
+	vrf_without_routes_destroy
+}
+
+ipv6_lpm_miss_test()
+{
+	local trap_name="ipv6_lpm_miss"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	# Create a VRF without a default route
+	vrf_without_routes_create
+
+	# Generate packets through a VRF without a matching route.
+	$MZ -6 $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \
+		-B 2001:db8::1 -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	log_test "LPM miss: IPv6"
+
+	kill_process $mz_pid
+	vrf_without_routes_destroy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
new file mode 100755
index 000000000000..e212ad8ccef6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -0,0 +1,353 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap policer functionality over mlxsw.
+
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |                                                                           |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |                            |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	rate_limits_test
+	burst_limits_test
+	rate_test
+	burst_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	adf_simple_if_init $h1 192.0.2.1/24
+
+	mtu_set $h1 10000
+	defer mtu_restore $h1
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	defer ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h2_create()
+{
+	adf_simple_if_init $h2 198.51.100.1/24
+
+	mtu_set $h2 10000
+	defer mtu_restore $h2
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	defer ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	defer ip link set dev $rp1 down
+
+	ip link set dev $rp2 up
+	defer ip link set dev $rp2 down
+
+	__addr_add_del $rp1 add 192.0.2.2/24
+	defer __addr_add_del $rp1 del 192.0.2.2/24
+
+	__addr_add_del $rp2 add 198.51.100.2/24
+	defer __addr_add_del $rp2 del 198.51.100.2/24
+
+	mtu_set $rp1 10000
+	defer mtu_restore $rp1
+
+	mtu_set $rp2 10000
+	defer mtu_restore $rp2
+
+	ip -4 route add blackhole 198.51.100.100
+	defer ip -4 route del blackhole 198.51.100.100
+
+	devlink trap set $DEVLINK_DEV trap blackhole_route action trap
+	defer devlink trap set $DEVLINK_DEV trap blackhole_route action drop
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1_mac=$(mac_get $rp1)
+
+	# Reload to ensure devlink-trap settings are back to default.
+	defer devlink_reload
+
+	adf_vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+}
+
+rate_limits_test()
+{
+	RET=0
+
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null
+	check_fail $? "Policer rate was changed to rate lower than limit"
+	devlink trap policer set $DEVLINK_DEV policer 1 \
+		rate 2000000001 &> /dev/null
+	check_fail $? "Policer rate was changed to rate higher than limit"
+
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 1
+	check_err $? "Failed to set policer rate to minimum"
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 2000000000
+	check_err $? "Failed to set policer rate to maximum"
+
+	log_test "Trap policer rate limits"
+}
+
+burst_limits_test()
+{
+	RET=0
+
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 0 &> /dev/null
+	check_fail $? "Policer burst size was changed to 0"
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 17 &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size that is not power of 2"
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 8 &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size lower than limit"
+	devlink trap policer set $DEVLINK_DEV policer 1 \
+		burst $((2**25)) &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size higher than limit"
+
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 16
+	check_err $? "Failed to set policer burst size to minimum"
+	devlink trap policer set $DEVLINK_DEV policer 1 burst $((2**24))
+	check_err $? "Failed to set policer burst size to maximum"
+
+	log_test "Trap policer burst size limits"
+}
+
+trap_rate_get()
+{
+	local t0 t1
+
+	t0=$(devlink_trap_rx_packets_get blackhole_route)
+	sleep 10
+	t1=$(devlink_trap_rx_packets_get blackhole_route)
+
+	echo $(((t1 - t0) / 10))
+}
+
+policer_drop_rate_get()
+{
+	local id=$1; shift
+	local t0 t1
+
+	t0=$(devlink_trap_policer_rx_dropped_get $id)
+	sleep 10
+	t1=$(devlink_trap_policer_rx_dropped_get $id)
+
+	echo $(((t1 - t0) / 10))
+}
+
+__rate_test()
+{
+	local rate pct drop_rate
+	local id=$1; shift
+
+	RET=0
+
+	devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
+	devlink trap group set $DEVLINK_DEV group l3_drops policer $id
+
+	# Send packets at highest possible rate and make sure they are dropped
+	# by the policer. Make sure measured received rate is about 1000 pps
+	log_info "=== Tx rate: Highest, Policer rate: 1000 pps ==="
+
+	defer_scope_push
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac
+	defer stop_traffic $!
+
+	sleep 5 # Take measurements when rate is stable
+
+	rate=$(trap_rate_get)
+	pct=$((100 * (rate - 1000) / 1000))
+	((-10 <= pct && pct <= 10))
+	check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-10%"
+	log_info "Expected rate 1000 pps, measured rate $rate pps"
+
+	drop_rate=$(policer_drop_rate_get $id)
+	(( drop_rate > 0 ))
+	check_err $? "Expected non-zero policer drop rate, got 0"
+	log_info "Measured policer drop rate of $drop_rate pps"
+
+	defer_scope_pop
+
+	# Send packets at a rate of 1000 pps and make sure they are not dropped
+	# by the policer
+	log_info "=== Tx rate: 1000 pps, Policer rate: 1000 pps ==="
+
+	defer_scope_push
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -d 1msec
+	defer stop_traffic $!
+
+	sleep 5 # Take measurements when rate is stable
+
+	drop_rate=$(policer_drop_rate_get $id)
+	(( drop_rate == 0 ))
+	check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps"
+	log_info "Measured policer drop rate of $drop_rate pps"
+
+	defer_scope_pop
+
+	# Unbind the policer and send packets at highest possible rate. Make
+	# sure they are not dropped by the policer and that the measured
+	# received rate is higher than 1000 pps
+	log_info "=== Tx rate: Highest, Policer rate: No policer ==="
+
+	devlink trap group set $DEVLINK_DEV group l3_drops nopolicer
+
+	defer_scope_push
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac
+	defer stop_traffic $!
+
+	rate=$(trap_rate_get)
+	(( rate > 1000 ))
+	check_err $? "Expected rate higher than 1000 pps, got $rate pps"
+	log_info "Measured rate $rate pps"
+
+	drop_rate=$(policer_drop_rate_get $id)
+	(( drop_rate == 0 ))
+	check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps"
+	log_info "Measured policer drop rate of $drop_rate pps"
+
+	defer_scope_pop
+
+	log_test "Trap policer rate"
+}
+
+rate_test()
+{
+	local last_policer=$(devlink -j -p trap policer show |
+			     jq '[.[]["'$DEVLINK_DEV'"][].policer] | max')
+
+	log_info "Running rate test for policer 1"
+	__rate_test 1
+
+	log_info "Running rate test for policer $((last_policer / 2))"
+	__rate_test $((last_policer / 2))
+
+	log_info "Running rate test for policer $last_policer"
+	__rate_test $last_policer
+}
+
+__burst_test()
+{
+	local t0_rx t0_drop t1_rx t1_drop rx drop
+	local id=$1; shift
+
+	RET=0
+
+	devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
+	devlink trap group set $DEVLINK_DEV group l3_drops policer $id
+
+	# Send a burst of 16 packets and make sure that 16 are received
+	# and that none are dropped by the policer
+	log_info "=== Tx burst size: 16, Policer burst size: 512 ==="
+
+	t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 16
+
+	t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	rx=$((t1_rx - t0_rx))
+	(( rx == 16 ))
+	check_err $? "Expected burst size of 16 packets, got $rx packets"
+	log_info "Expected burst size of 16 packets, measured burst size of $rx packets"
+
+	drop=$((t1_drop - t0_drop))
+	(( drop == 0 ))
+	check_err $? "Expected zero policer drops, got $drop"
+	log_info "Measured policer drops of $drop packets"
+
+	# Unbind the policer and send a burst of 64 packets. Make sure that
+	# 64 packets are received and that none are dropped by the policer
+	log_info "=== Tx burst size: 64, Policer burst size: No policer ==="
+
+	devlink trap group set $DEVLINK_DEV group l3_drops nopolicer
+
+	t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
+
+	t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	rx=$((t1_rx - t0_rx))
+	(( rx == 64 ))
+	check_err $? "Expected burst size of 64 packets, got $rx packets"
+	log_info "Expected burst size of 64 packets, measured burst size of $rx packets"
+
+	drop=$((t1_drop - t0_drop))
+	(( drop == 0 ))
+	check_err $? "Expected zero policer drops, got $drop"
+	log_info "Measured policer drops of $drop packets"
+
+	log_test "Trap policer burst size"
+}
+
+burst_test()
+{
+	local last_policer=$(devlink -j -p trap policer show |
+			     jq '[.[]["'$DEVLINK_DEV'"][].policer] | max')
+
+	log_info "Running burst test for policer 1"
+	__burst_test 1
+
+	log_info "Running burst test for policer $((last_policer / 2))"
+	__burst_test $((last_policer / 2))
+
+	log_info "Running burst test for policer $last_policer"
+	__burst_test $last_policer
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
new file mode 100755
index 000000000000..4ac1dae92d0f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
@@ -0,0 +1,249 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel exceptions functionality over mlxsw.
+# Check all exception traps to make sure they are triggered under the right
+# conditions.
+
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |      192.0.2.1/28 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|-----+
+# | SW1               |     |
+# |             $swp1 +     |
+# |      192.0.2.2/28       |
+# |                         |
+# |  + g1a (gre)            |
+# |    loc=192.0.2.65       |
+# |    rem=192.0.2.66       |
+# |    tos=inherit          |
+# |                         |
+# |  + $rp1                 |
+# |  |  198.51.100.1/28     |
+# +--|----------------------+
+#    |
+# +--|----------------------+
+# |  |                 VRF2 |
+# |  + $rp2                 |
+# |    198.51.100.2/28      |
+# +-------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	decap_error_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+vrf2_create()
+{
+	simple_if_init $rp2 198.51.100.2/28
+}
+
+vrf2_destroy()
+{
+	simple_if_fini $rp2 198.51.100.2/28
+}
+
+switch_create()
+{
+	__addr_add_del $swp1 add 192.0.2.2/28
+	tc qdisc add dev $swp1 clsact
+	ip link set dev $swp1 up
+
+	tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit
+	__addr_add_del g1 add 192.0.2.65/32
+	ip link set dev g1 up
+
+	__addr_add_del $rp1 add 198.51.100.1/28
+	ip link set dev $rp1 up
+}
+
+switch_destroy()
+{
+	ip link set dev $rp1 down
+	__addr_add_del $rp1 del 198.51.100.1/28
+
+	ip link set dev g1 down
+	__addr_add_del g1 del 192.0.2.65/32
+	tunnel_destroy g1
+
+	ip link set dev $swp1 down
+	tc qdisc del dev $swp1 clsact
+	__addr_add_del $swp1 del 192.0.2.2/28
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	switch_create
+	vrf2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vrf2_destroy
+	switch_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+ipip_payload_get()
+{
+	local flags=$1; shift
+	local key=$1; shift
+
+	p=$(:
+		)"$flags"$(		      : GRE flags
+	        )"0:00:"$(                    : Reserved + version
+		)"08:00:"$(		      : ETH protocol type
+		)"$key"$( 		      : Key
+		)"4"$(	                      : IP version
+		)"5:"$(                       : IHL
+		)"00:"$(                      : IP TOS
+		)"00:14:"$(                   : IP total length
+		)"00:00:"$(                   : IP identification
+		)"20:00:"$(                   : IP flags + frag off
+		)"30:"$(                      : IP TTL
+		)"01:"$(                      : IP proto
+		)"E7:E6:"$(    	              : IP header csum
+		)"C0:00:01:01:"$(             : IP saddr : 192.0.1.1
+		)"C0:00:02:01:"$(             : IP daddr : 192.0.2.1
+		)
+	echo $p
+}
+
+ecn_payload_get()
+{
+	echo $(ipip_payload_get "0")
+}
+
+ecn_decap_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local ecn_desc=$1; shift
+	local outer_tos=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower src_ip 192.0.1.1 dst_ip 192.0.2.1 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	rp2_mac=$(mac_get $rp2)
+	payload=$(ecn_payload_get)
+
+	ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+		-A 192.0.2.66 -B 192.0.2.65 -t ip \
+			len=48,tos=$outer_tos,proto=47,p=$payload -q &
+
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+	kill_process $mz_pid
+	tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
+}
+
+no_matching_tunnel_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local sip=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower src_ip 192.0.1.1 dst_ip 192.0.2.1 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	rp2_mac=$(mac_get $rp2)
+	payload=$(ipip_payload_get "$@")
+
+	ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+		-A $sip -B 192.0.2.65 -t ip len=48,proto=47,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc"
+
+	kill_process $mz_pid
+	tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+	# Correct source IP - the remote address
+	local sip=192.0.2.66
+
+	ecn_decap_test "Decap error" "ECT(1)" 01
+	ecn_decap_test "Decap error" "ECT(0)" 02
+	ecn_decap_test "Decap error" "CE" 03
+
+	no_matching_tunnel_test "Decap error: Source IP check failed" \
+		192.0.2.68 "0"
+	no_matching_tunnel_test \
+		"Decap error: Key exists but was not expected" $sip "2" \
+		"00:00:00:E9:"
+
+	# Destroy the tunnel and create new one with key
+	__addr_add_del g1 del 192.0.2.65/32
+	tunnel_destroy g1
+
+	tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit key 233
+	__addr_add_del g1 add 192.0.2.65/32
+
+	no_matching_tunnel_test \
+		"Decap error: Key does not exist but was expected" $sip "0"
+	no_matching_tunnel_test \
+		"Decap error: Packet has a wrong key field" $sip "2" \
+		"00:00:00:E8:"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh
new file mode 100755
index 000000000000..fce885184404
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh
@@ -0,0 +1,250 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel exceptions functionality over mlxsw.
+# Check all exception traps to make sure they are triggered under the right
+# conditions.
+
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |  2001:db8:1::1/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|-----+
+# | SW1               |     |
+# |             $swp1 +     |
+# |  2001:db8:1::2/64       |
+# |                         |
+# |  + g1 (ip6gre)          |
+# |    loc=2001:db8:3::1    |
+# |    rem=2001:db8:3::2    |
+# |    tos=inherit          |
+# |                         |
+# |  + $rp1                 |
+# |  | 2001:db8:10::1/64    |
+# +--|----------------------+
+#    |
+# +--|----------------------+
+# |  |                 VRF2 |
+# |  + $rp2                 |
+# |    2001:db8:10::2/64    |
+# +-------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	decap_error_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 2001:db8:1::1/64
+}
+
+vrf2_create()
+{
+	simple_if_init $rp2 2001:db8:10::2/64
+}
+
+vrf2_destroy()
+{
+	simple_if_fini $rp2 2001:db8:10::2/64
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	__addr_add_del $swp1 add 2001:db8:1::2/64
+	tc qdisc add dev $swp1 clsact
+
+	tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+		ttl inherit
+	ip link set dev g1 up
+	__addr_add_del g1 add 2001:db8:3::1/128
+
+	ip link set dev $rp1 up
+	__addr_add_del $rp1 add 2001:db8:10::1/64
+}
+
+switch_destroy()
+{
+	__addr_add_del $rp1 del 2001:db8:10::1/64
+	ip link set dev $rp1 down
+
+	__addr_add_del g1 del 2001:db8:3::1/128
+	ip link set dev g1 down
+	tunnel_destroy g1
+
+	tc qdisc del dev $swp1 clsact
+	__addr_add_del $swp1 del 2001:db8:1::2/64
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	switch_create
+	vrf2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vrf2_destroy
+	switch_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+ipip_payload_get()
+{
+	local saddr="20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:01"
+	local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+	local flags=$1; shift
+	local key=$1; shift
+
+	p=$(:
+		)"$flags"$(		      : GRE flags
+	        )"0:00:"$(                    : Reserved + version
+		)"86:dd:"$(		      : ETH protocol type
+		)"$key"$( 		      : Key
+		)"6"$(	                      : IP version
+		)"0:0"$(		      : Traffic class
+		)"0:00:00:"$(		      : Flow label
+		)"00:00:"$(                   : Payload length
+		)"3a:"$(                      : Next header
+		)"04:"$(                      : Hop limit
+		)"$saddr:"$(                  : IP saddr
+		)"$daddr:"$(                  : IP daddr
+		)
+	echo $p
+}
+
+ecn_payload_get()
+{
+	echo $(ipip_payload_get "0")
+}
+
+ecn_decap_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local ecn_desc=$1; shift
+	local outer_tos=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+		flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 skip_sw \
+		action pass
+
+	rp1_mac=$(mac_get $rp1)
+	rp2_mac=$(mac_get $rp2)
+	payload=$(ecn_payload_get)
+
+	ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+		-A 2001:db8:3::2 -B 2001:db8:3::1 -t ip \
+			tos=$outer_tos,next=47,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+	kill_process $mz_pid
+	tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+no_matching_tunnel_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local sip=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+		flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	rp2_mac=$(mac_get $rp2)
+	payload=$(ipip_payload_get "$@")
+
+	ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+		-A $sip -B 2001:db8:3::1 -t ip next=47,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc"
+
+	kill_process $mz_pid
+	tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+	# Correct source IP - the remote address
+	local sip=2001:db8:3::2
+
+	ecn_decap_test "Decap error" "ECT(1)" 01
+	ecn_decap_test "Decap error" "ECT(0)" 02
+	ecn_decap_test "Decap error" "CE" 03
+
+	no_matching_tunnel_test "Decap error: Source IP check failed" \
+		2001:db8:4::2 "0"
+	no_matching_tunnel_test \
+		"Decap error: Key exists but was not expected" $sip "2" \
+		"00:00:00:E9:"
+
+	# Destroy the tunnel and create new one with key
+	__addr_add_del g1 del 2001:db8:3::1/128
+	tunnel_destroy g1
+
+	tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+		ttl inherit key 233
+	__addr_add_del g1 add 2001:db8:3::1/128
+
+	no_matching_tunnel_test \
+		"Decap error: Key does not exist but was expected" $sip "0"
+	no_matching_tunnel_test \
+		"Decap error: Packet has a wrong key field" $sip "2" \
+		"00:00:00:E8:"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
new file mode 100755
index 000000000000..7aca8e5922cf
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
@@ -0,0 +1,330 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel drops and exceptions functionality over mlxsw.
+# Check all traps to make sure they are triggered under the right
+# conditions.
+
+# +--------------------+
+# | H1 (vrf)           |
+# |    + $h1           |
+# |    | 192.0.2.1/28  |
+# +----|---------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# | +--|--------------------------------------------------------------------+ |
+# | |  + $swp1                   BR1 (802.1d)                               | |
+# | |                                                                       | |
+# | |  + vx1 (vxlan)                                                        | |
+# | |    local 192.0.2.17                                                   | |
+# | |    id 1000 dstport $VXPORT                                            | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 192.0.2.17/28                                                        |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             VRF2       |
+# |    + $rp2                                                   |
+# |      192.0.2.18/28                                          |
+# |                                                             |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	decap_error_test
+	overlay_smac_is_mc_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+: ${VXPORT:=4789}
+export VXPORT
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	tc qdisc add dev $swp1 clsact
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link add name vx1 type vxlan id 1000 local 192.0.2.17 \
+		dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx1 master br1
+	ip link set dev vx1 up
+
+	ip address add dev $rp1 192.0.2.17/28
+	ip link set dev $rp1 up
+}
+
+switch_destroy()
+{
+	ip link set dev $rp1 down
+	ip address del dev $rp1 192.0.2.17/28
+
+	ip link set dev vx1 down
+	ip link set dev vx1 nomaster
+	ip link del dev vx1
+
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrf2_create()
+{
+	simple_if_init $rp2 192.0.2.18/28
+}
+
+vrf2_destroy()
+{
+	simple_if_fini $rp2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+	h1_create
+	switch_create
+	vrf2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vrf2_destroy
+	switch_destroy
+	h1_destroy
+	forwarding_restore
+	vrf_cleanup
+}
+
+ecn_payload_get()
+{
+	dest_mac=$(mac_get $h1)
+	p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"00:00:00:"$(                : VXLAN reserved
+		)"00:03:e8:"$(                : VXLAN VNI : 1000
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"00:00:00:00:00:00:"$(       : ETH saddr
+		)"08:00:"$(                   : ETH type
+		)"45:"$(                      : IP version + IHL
+		)"00:"$(                      : IP TOS
+		)"00:14:"$(                   : IP total length
+		)"00:00:"$(                   : IP identification
+		)"20:00:"$(                   : IP flags + frag off
+		)"40:"$(                      : IP TTL
+		)"00:"$(                      : IP proto
+		)"D6:E5:"$(                   : IP header csum
+		)"c0:00:02:03:"$(             : IP saddr: 192.0.2.3
+		)"c0:00:02:01:"$(             : IP daddr: 192.0.2.1
+		)
+	echo $p
+}
+
+ecn_decap_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local ecn_desc=$1; shift
+	local outer_tos=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower src_ip 192.0.2.3 dst_ip 192.0.2.1 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	payload=$(ecn_payload_get)
+
+	ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac -B 192.0.2.17 \
+		-t udp sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+	kill_process $mz_pid
+	tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
+}
+
+reserved_bits_payload_get()
+{
+	dest_mac=$(mac_get $h1)
+	p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"01:00:00:"$(                : VXLAN reserved
+		)"00:03:e8:"$(                : VXLAN VNI : 1000
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"00:00:00:00:00:00:"$(       : ETH saddr
+		)"08:00:"$(                   : ETH type
+		)"45:"$(                      : IP version + IHL
+		)"00:"$(                      : IP TOS
+		)"00:14:"$(                   : IP total length
+		)"00:00:"$(                   : IP identification
+		)"20:00:"$(                   : IP flags + frag off
+		)"40:"$(                      : IP TTL
+		)"00:"$(                      : IP proto
+		)"00:00:"$(                   : IP header csum
+		)"c0:00:02:03:"$(             : IP saddr: 192.0.2.3
+		)"c0:00:02:01:"$(             : IP daddr: 192.0.2.1
+		)
+	echo $p
+}
+
+short_payload_get()
+{
+        dest_mac=$(mac_get $h1)
+        p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"00:00:00:"$(                : VXLAN reserved
+		)"00:03:e8:"$(                : VXLAN VNI : 1000
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"00:00:00:00:00:00:"$(       : ETH saddr
+		)
+        echo $p
+}
+
+corrupted_packet_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local payload_get=$1; shift
+	local mz_pid
+
+	RET=0
+
+	# In case of too short packet, there is no any inner packet,
+	# so the matching will always succeed
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower skip_hw src_ip 192.0.2.3 dst_ip 192.0.2.1 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	payload=$($payload_get)
+	ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac \
+		-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc"
+
+	kill_process $mz_pid
+	tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+	ecn_decap_test "Decap error" "ECT(1)" 01
+	ecn_decap_test "Decap error" "ECT(0)" 02
+	ecn_decap_test "Decap error" "CE" 03
+
+	corrupted_packet_test "Decap error: Reserved bits in use" \
+		"reserved_bits_payload_get"
+	corrupted_packet_test "Decap error: Too short inner packet" \
+		"short_payload_get"
+}
+
+mc_smac_payload_get()
+{
+	dest_mac=$(mac_get $h1)
+	source_mac=01:02:03:04:05:06
+	p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"00:00:00:"$(                : VXLAN reserved
+		)"00:03:e8:"$(                : VXLAN VNI : 1000
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"$source_mac:"$(             : ETH saddr
+		)"08:00:"$(                   : ETH type
+		)"45:"$(                      : IP version + IHL
+		)"00:"$(                      : IP TOS
+		)"00:14:"$(                   : IP total length
+		)"00:00:"$(                   : IP identification
+		)"20:00:"$(                   : IP flags + frag off
+		)"40:"$(                      : IP TTL
+		)"00:"$(                      : IP proto
+		)"00:00:"$(                   : IP header csum
+		)"c0:00:02:03:"$(             : IP saddr: 192.0.2.3
+		)"c0:00:02:01:"$(             : IP daddr: 192.0.2.1
+		)
+	echo $p
+}
+
+overlay_smac_is_mc_test()
+{
+	local trap_name="overlay_smac_is_mc"
+	local mz_pid
+
+	RET=0
+
+	# The matching will be checked on devlink_trap_drop_test()
+	# and the filter will be removed on devlink_trap_drop_cleanup()
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower src_mac 01:02:03:04:05:06 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	payload=$(mc_smac_payload_get)
+
+	ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac \
+		-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp1 101
+
+	log_test "Overlay source MAC is multicast"
+
+	devlink_trap_drop_cleanup $mz_pid $swp1 "ip" 1 101
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh
new file mode 100755
index 000000000000..4599c331240b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh
@@ -0,0 +1,342 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel drops and exceptions functionality over mlxsw.
+# Check all traps to make sure they are triggered under the right
+# conditions.
+
+# +------------------------+
+# | H1 (vrf)               |
+# |    + $h1               |
+# |    | 2001:db8:1::1/64  |
+# +----|-------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# | +--|--------------------------------------------------------------------+ |
+# | |  + $swp1                   BR1 (802.1d)                               | |
+# | |                                                                       | |
+# | |  + vx1 (vxlan)                                                        | |
+# | |    local 2001:db8:3::1                                                | |
+# | |    id 1000 dstport $VXPORT                                            | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 2001:db8:3::1/64                                                     |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             VRF2       |
+# |    + $rp2                                                   |
+# |      2001:db8:3::2/64                                       |
+# |                                                             |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	decap_error_test
+	overlay_smac_is_mc_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+: ${VXPORT:=4789}
+export VXPORT
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 2001:db8:1::1/64
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	tc qdisc add dev $swp1 clsact
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link add name vx1 type vxlan id 1000 local 2001:db8:3::1 \
+		dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+		tos inherit ttl 100
+	ip link set dev vx1 master br1
+	ip link set dev vx1 up
+
+	ip link set dev $rp1 up
+	ip address add dev $rp1 2001:db8:3::1/64
+}
+
+switch_destroy()
+{
+	ip address del dev $rp1 2001:db8:3::1/64
+	ip link set dev $rp1 down
+
+	ip link set dev vx1 down
+	ip link set dev vx1 nomaster
+	ip link del dev vx1
+
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrf2_create()
+{
+	simple_if_init $rp2 2001:db8:3::2/64
+}
+
+vrf2_destroy()
+{
+	simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+	h1_create
+	switch_create
+	vrf2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vrf2_destroy
+	switch_destroy
+	h1_destroy
+	forwarding_restore
+	vrf_cleanup
+}
+
+ecn_payload_get()
+{
+	local dest_mac=$(mac_get $h1)
+	local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+	local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+	p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"00:00:00:"$(                : VXLAN reserved
+		)"00:03:e8:"$(                : VXLAN VNI : 1000
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"00:00:00:00:00:00:"$(       : ETH saddr
+		)"86:dd:"$(                   : ETH type
+		)"6"$(                        : IP version
+		)"0:0"$(		      : Traffic class
+		)"0:00:00:"$(                 : Flow label
+		)"00:08:"$(                   : Payload length
+		)"3a:"$(                      : Next header
+		)"04:"$(                      : Hop limit
+		)"$saddr:"$(                  : IP saddr
+		)"$daddr:"$(		      : IP daddr
+		)"80:"$(                      : ICMPv6.type
+		)"00:"$(                      : ICMPv6.code
+		)"00:"$(                      : ICMPv6.checksum
+		)
+	echo $p
+}
+
+ecn_decap_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local ecn_desc=$1; shift
+	local outer_tos=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+		flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	payload=$(ecn_payload_get)
+
+	ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \
+		-B 2001:db8:3::1 -t udp \
+		sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+	kill_process $mz_pid
+	tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+reserved_bits_payload_get()
+{
+	local dest_mac=$(mac_get $h1)
+	local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+	local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+	p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"01:00:00:"$(                : VXLAN reserved
+		)"00:03:e8:"$(                : VXLAN VNI : 1000
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"00:00:00:00:00:00:"$(       : ETH saddr
+		)"86:dd:"$(                   : ETH type
+		)"6"$(                        : IP version
+		)"0:0"$(		      : Traffic class
+		)"0:00:00:"$(                 : Flow label
+		)"00:08:"$(                   : Payload length
+		)"3a:"$(                      : Next header
+		)"04:"$(                      : Hop limit
+		)"$saddr:"$(                  : IP saddr
+		)"$daddr:"$(		      : IP daddr
+		)"80:"$(                      : ICMPv6.type
+		)"00:"$(                      : ICMPv6.code
+		)"00:"$(                      : ICMPv6.checksum
+		)
+	echo $p
+}
+
+short_payload_get()
+{
+        dest_mac=$(mac_get $h1)
+        p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"00:00:00:"$(                : VXLAN reserved
+		)"00:03:e8:"$(                : VXLAN VNI : 1000
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"00:00:00:00:00:00:"$(       : ETH saddr
+		)
+        echo $p
+}
+
+corrupted_packet_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local payload_get=$1; shift
+	local mz_pid
+
+	RET=0
+
+	# In case of too short packet, there is no any inner packet,
+	# so the matching will always succeed
+	tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+		flower skip_hw src_ip 2001:db8:3::1 dst_ip 2001:db8:1::1 \
+		action pass
+
+	rp1_mac=$(mac_get $rp1)
+	payload=$($payload_get)
+	ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \
+		-B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc"
+
+	kill_process $mz_pid
+	tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+	ecn_decap_test "Decap error" "ECT(1)" 01
+	ecn_decap_test "Decap error" "ECT(0)" 02
+	ecn_decap_test "Decap error" "CE" 03
+
+	corrupted_packet_test "Decap error: Reserved bits in use" \
+		"reserved_bits_payload_get"
+	corrupted_packet_test "Decap error: Too short inner packet" \
+		"short_payload_get"
+}
+
+mc_smac_payload_get()
+{
+	local dest_mac=$(mac_get $h1)
+	local source_mac="01:02:03:04:05:06"
+	local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+	local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+	p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"00:00:00:"$(                : VXLAN reserved
+		)"00:03:e8:"$(                : VXLAN VNI : 1000
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"$source_mac:"$(	      : ETH saddr
+		)"86:dd:"$(                   : ETH type
+		)"6"$(                        : IP version
+		)"0:0"$(		      : Traffic class
+		)"0:00:00:"$(                 : Flow label
+		)"00:08:"$(                   : Payload length
+		)"3a:"$(                      : Next header
+		)"04:"$(                      : Hop limit
+		)"$saddr:"$(                  : IP saddr
+		)"$daddr:"$(		      : IP daddr
+		)"80:"$(                      : ICMPv6.type
+		)"00:"$(                      : ICMPv6.code
+		)"00:"$(                      : ICMPv6.checksum
+		)
+	echo $p
+}
+
+overlay_smac_is_mc_test()
+{
+	local trap_name="overlay_smac_is_mc"
+	local mz_pid
+
+	RET=0
+
+	# The matching will be checked on devlink_trap_drop_test()
+	# and the filter will be removed on devlink_trap_drop_cleanup()
+	tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+		flower src_mac 01:02:03:04:05:06 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	payload=$(mc_smac_payload_get)
+
+	ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \
+		-B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp1 101
+
+	log_test "Overlay source MAC is multicast"
+
+	devlink_trap_drop_cleanup $mz_pid $swp1 "ipv6" 1 101
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
new file mode 100755
index 000000000000..a5c2aec52898
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
@@ -0,0 +1,272 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test VLAN classification after routing and verify that the order of
+# configuration does not impact switch behavior. Verify that {RIF, Port}->VID
+# mapping is added correctly for existing {Port, VID}->FID mapping and that
+# {RIF, Port}->VID mapping is added correctly for new {Port, VID}->FID mapping.
+
+# +-------------------+                   +--------------------+
+# | H1                |                   | H2                 |
+# |                   |                   |                    |
+# |         $h1.10 +  |                   |  + $h2.10          |
+# |   192.0.2.1/28 |  |                   |  | 192.0.2.3/28    |
+# |                |  |                   |  |                 |
+# |            $h1 +  |                   |  + $h2             |
+# +----------------|--+                   +--|-----------------+
+#                  |                         |
+# +----------------|-------------------------|-----------------+
+# | SW       $swp1 +                         + $swp2           |
+# |                |                         |                 |
+# | +--------------|-------------------------|---------------+ |
+# | |     $swp1.10 +                         + $swp2.10      | |
+# | |                                                        | |
+# | |                           br0                          | |
+# | |                       192.0.2.2/28                     | |
+# | +--------------------------------------------------------+ |
+# |                                                            |
+# |      $swp3.20 +                                            |
+# | 192.0.2.17/28 |                                            |
+# |               |                                            |
+# |         $swp3 +                                            |
+# +---------------|--------------------------------------------+
+#                 |
+# +---------------|--+
+# |           $h3 +  |
+# |               |  |
+# |        $h3.20 +  |
+# | 192.0.2.18/28    |
+# |                  |
+# | H3               |
+# +------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	port_vid_map_rif
+	rif_port_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+
+	ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+	vlan_destroy $h1 10
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 10
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3
+	vlan_create $h3 20 v$h3 192.0.2.18/28
+
+	ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+	ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+
+	vlan_destroy $h3 20
+	simple_if_fini $h3
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	tc qdisc add dev $swp1 clsact
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	# By default, a link-local address is generated when netdevice becomes
+	# up. Adding an address to the bridge will cause creating a RIF for it.
+	# Prevent generating link-local address to be able to control when the
+	# RIF is added.
+	sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+	ip link set dev br0 up
+
+	ip link set dev $swp2 up
+	vlan_create $swp2 10
+	ip link set dev $swp2.10 master br0
+
+	ip link set dev $swp3 up
+	vlan_create $swp3 20 "" 192.0.2.17/28
+
+	# Replace neighbor to avoid 1 packet which is forwarded in software due
+	# to "unresolved neigh".
+	ip neigh replace dev $swp3.20 192.0.2.18 lladdr $(mac_get $h3.20)
+}
+
+switch_destroy()
+{
+	vlan_destroy $swp3 20
+	ip link set dev $swp3 down
+
+	ip link set dev $swp2.10 nomaster
+	vlan_destroy $swp2 10
+	ip link set dev $swp2 down
+
+	ip link set dev br0 down
+	sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+	ip link del dev br0
+
+	tc qdisc del dev $swp1 clsact
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	h3_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+bridge_rif_add()
+{
+	rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	__addr_add_del br0 add 192.0.2.2/28
+	rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+	expected_rifs=$((rifs_occ_t0 + 1))
+
+	[[ $expected_rifs -eq $rifs_occ_t1 ]]
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	sleep 1
+}
+
+bridge_rif_del()
+{
+	__addr_add_del br0 del 192.0.2.2/28
+}
+
+port_vid_map_rif()
+{
+	RET=0
+
+	# First add {port, VID}->FID for swp1.10, then add a RIF and verify that
+	# packets get the correct VID after routing.
+	vlan_create $swp1 10
+	ip link set dev $swp1.10 master br0
+	bridge_rif_add
+
+	# Replace neighbor to avoid 1 packet which is forwarded in software due
+	# to "unresolved neigh".
+	ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10)
+
+	# The hardware matches on the first ethertype which is not VLAN,
+	# so the protocol should be IP.
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.1 action pass
+
+	ping_do $h1.10 192.0.2.18
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add RIF for existing {port, VID}->FID mapping"
+
+	tc filter del dev $swp1 egress
+
+	bridge_rif_del
+	ip link set dev $swp1.10 nomaster
+	vlan_destroy $swp1 10
+}
+
+rif_port_vid_map()
+{
+	RET=0
+
+	# First add an address to the bridge, which will create a RIF on top of
+	# it, then add a new {port, VID}->FID mapping and verify that packets
+	# get the correct VID after routing.
+	bridge_rif_add
+	vlan_create $swp1 10
+	ip link set dev $swp1.10 master br0
+
+	# Replace neighbor to avoid 1 packet which is forwarded in software due
+	# to "unresolved neigh".
+	ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10)
+
+	# The hardware matches on the first ethertype which is not VLAN,
+	# so the protocol should be IP.
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.1 action pass
+
+	ping_do $h1.10 192.0.2.18
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add {port, VID}->FID mapping for FID with a RIF"
+
+	tc filter del dev $swp1 egress
+
+	ip link set dev $swp1.10 nomaster
+	vlan_destroy $swp1 10
+	bridge_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
new file mode 100755
index 000000000000..fe905a7f34b3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
@@ -0,0 +1,190 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+ethtool_lib_dir=$(dirname $0)/../hw
+
+ALL_TESTS="
+	autoneg
+	autoneg_force_mode
+"
+
+NUM_NETIFS=2
+: ${TIMEOUT:=30000} # ms
+source $lib_dir/lib.sh
+source $ethtool_lib_dir/ethtool_lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
+	check_err $? "ports did not come up"
+
+	busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:"
+	if [[ $? -ne 0 ]]; then
+		log_test "SKIP: driver does not support lanes setting"
+		exit 1
+	fi
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+check_lanes()
+{
+	local dev=$1; shift
+	local lanes=$1; shift
+	local max_speed=$1; shift
+	local chosen_lanes
+
+	chosen_lanes=$(ethtool $dev | grep 'Lanes:')
+	chosen_lanes=${chosen_lanes#*"Lanes: "}
+
+	((chosen_lanes == lanes))
+	check_err $? "swp1 advertise $max_speed and $lanes, devs sync to $chosen_lanes"
+}
+
+check_unsupported_lanes()
+{
+	local dev=$1; shift
+	local max_speed=$1; shift
+	local max_lanes=$1; shift
+	local autoneg=$1; shift
+	local autoneg_str=""
+
+	local unsupported_lanes=$((max_lanes *= 2))
+
+	if [[ $autoneg -eq 0 ]]; then
+		autoneg_str="autoneg off"
+	fi
+
+	ethtool -s $swp1 speed $max_speed lanes $unsupported_lanes $autoneg_str &> /dev/null
+	check_fail $? "Unsuccessful $unsupported_lanes lanes setting was expected"
+}
+
+max_speed_and_lanes_get()
+{
+	local dev=$1; shift
+	local arr=("$@")
+	local max_lanes
+	local max_speed
+	local -a lanes_arr
+	local -a speeds_arr
+	local -a max_values
+
+	for ((i=0; i<${#arr[@]}; i+=2)); do
+		speeds_arr+=("${arr[$i]}")
+		lanes_arr+=("${arr[i+1]}")
+	done
+
+	max_values+=($(get_max "${speeds_arr[@]}"))
+	max_values+=($(get_max "${lanes_arr[@]}"))
+
+	echo ${max_values[@]}
+}
+
+search_linkmode()
+{
+	local speed=$1; shift
+	local lanes=$1; shift
+	local arr=("$@")
+
+	for ((i=0; i<${#arr[@]}; i+=2)); do
+		if [[ $speed -eq ${arr[$i]} && $lanes -eq ${arr[i+1]} ]]; then
+			return 1
+		fi
+	done
+	return 0
+}
+
+autoneg()
+{
+	RET=0
+
+	local lanes
+	local max_speed
+	local max_lanes
+
+	local -a linkmodes_params=($(dev_linkmodes_params_get $swp1 1))
+	local -a max_values=($(max_speed_and_lanes_get $swp1 "${linkmodes_params[@]}"))
+	max_speed=${max_values[0]}
+	max_lanes=${max_values[1]}
+
+	lanes=$max_lanes
+
+	while [[ $lanes -ge 1 ]]; do
+		search_linkmode $max_speed $lanes "${linkmodes_params[@]}"
+		if [[ $? -eq 1 ]]; then
+			ethtool_set $swp1 speed $max_speed lanes $lanes
+			ip link set dev $swp1 up
+			ip link set dev $swp2 up
+
+			busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:"
+			check_err $? "Lanes parameter is not presented on time"
+
+			check_lanes $swp1 $lanes $max_speed
+			log_test "$lanes lanes is autonegotiated"
+		fi
+		let $((lanes /= 2))
+	done
+
+	check_unsupported_lanes $swp1 $max_speed $max_lanes 1
+	log_test "Lanes number larger than max width is not set"
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+autoneg_force_mode()
+{
+	RET=0
+
+	local lanes
+	local max_speed
+	local max_lanes
+
+	local -a linkmodes_params=($(dev_linkmodes_params_get $swp1 1))
+	local -a max_values=($(max_speed_and_lanes_get $swp1 "${linkmodes_params[@]}"))
+	max_speed=${max_values[0]}
+	max_lanes=${max_values[1]}
+
+	lanes=$max_lanes
+
+	while [[ $lanes -ge 1 ]]; do
+		search_linkmode $max_speed $lanes "${linkmodes_params[@]}"
+		if [[ $? -eq 1 ]]; then
+			ethtool_set $swp1 speed $max_speed lanes $lanes autoneg off
+			ethtool_set $swp2 speed $max_speed lanes $lanes autoneg off
+			ip link set dev $swp1 up
+			ip link set dev $swp2 up
+
+			busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:"
+			check_err $? "Lanes parameter is not presented on time"
+
+			check_lanes $swp1 $lanes $max_speed
+			log_test "Autoneg off, $lanes lanes detected during force mode"
+		fi
+		let $((lanes /= 2))
+	done
+
+	check_unsupported_lanes $swp1 $max_speed $max_lanes 0
+	log_test "Lanes number larger than max width is not set"
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ethtool -s $swp2 autoneg on
+	ethtool -s $swp1 autoneg on
+}
+
+check_ethtool_lanes_support
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
new file mode 100755
index 000000000000..6fd422d38fe8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test operations that we expect to report extended ack.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	netdev_pre_up_test
+	vxlan_vlan_add_test
+	vxlan_bridge_create_test
+	bridge_create_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+netdev_pre_up_test()
+{
+	RET=0
+
+	ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+	ip link set dev br1 addrgenmode none
+	ip link set dev br1 up
+	ip link add name vx1 up type vxlan id 1000 \
+		local 192.0.2.17 remote 192.0.2.18 \
+		dstport 4789 nolearning noudpcsum tos inherit ttl 100
+
+	ip link set dev vx1 master br1
+	check_err $?
+
+	ip link set dev $swp1 master br1
+	check_err $?
+
+	ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0
+	ip link set dev br2 addrgenmode none
+	ip link set dev br2 up
+	ip link add name vx2 up type vxlan id 2000 \
+		local 192.0.2.17 remote 192.0.2.18 \
+		dstport 4789 nolearning noudpcsum tos inherit ttl 100
+
+	ip link set dev vx2 master br2
+	check_err $?
+
+	ip link set dev $swp2 master br2
+	check_err $?
+
+	# Unsupported configuration: mlxsw demands that all offloaded VXLAN
+	# devices have the same TTL.
+	ip link set dev vx2 down
+	ip link set dev vx2 type vxlan ttl 200
+
+	ip link set dev vx2 up &>/dev/null
+	check_fail $?
+
+	ip link set dev vx2 up 2>&1 >/dev/null | grep -q mlxsw_spectrum
+	check_err $?
+
+	log_test "extack - NETDEV_PRE_UP"
+
+	ip link del dev vx2
+	ip link del dev br2
+
+	ip link del dev vx1
+	ip link del dev br1
+}
+
+vxlan_vlan_add_test()
+{
+	RET=0
+
+	ip link add name br1 type bridge vlan_filtering 1 mcast_snooping 0
+	ip link set dev br1 addrgenmode none
+	ip link set dev br1 up
+
+	# Unsupported configuration: mlxsw demands VXLAN with "noudpcsum".
+	ip link add name vx1 up type vxlan id 1000 \
+		local 192.0.2.17 remote 192.0.2.18 \
+		dstport 4789 tos inherit ttl 100
+
+	ip link set dev vx1 master br1
+	check_err $?
+
+	bridge vlan add dev vx1 vid 1
+	check_err $?
+
+	ip link set dev $swp1 master br1
+	check_err $?
+
+	bridge vlan add dev vx1 vid 1 pvid untagged 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $?
+
+	log_test "extack - map VLAN at VXLAN device"
+
+	ip link del dev vx1
+	ip link del dev br1
+}
+
+vxlan_bridge_create_test()
+{
+	RET=0
+
+	# Unsupported configuration: mlxsw demands VXLAN with "noudpcsum".
+	ip link add name vx1 up type vxlan id 1000 \
+		local 192.0.2.17 remote 192.0.2.18 \
+		dstport 4789 tos inherit ttl 100
+
+	# Test with VLAN-aware bridge.
+	ip link add name br1 type bridge vlan_filtering 1 mcast_snooping 0
+	ip link set dev br1 addrgenmode none
+	ip link set dev br1 up
+
+	ip link set dev vx1 master br1
+
+	ip link set dev $swp1 master br1 2>&1 > /dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $?
+
+	# Test with VLAN-unaware bridge.
+	ip link set dev br1 type bridge vlan_filtering 0
+
+	ip link set dev $swp1 master br1 2>&1 > /dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $?
+
+	log_test "extack - bridge creation with VXLAN"
+
+	ip link del dev br1
+	ip link del dev vx1
+}
+
+bridge_create_test()
+{
+	RET=0
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 addrgenmode none
+	ip link set dev br1 up
+	ip link add name br2 type bridge vlan_filtering 1
+	ip link set dev br2 addrgenmode none
+	ip link set dev br2 up
+
+	ip link set dev $swp1 master br1
+	check_err $?
+
+	# Only one VLAN-aware bridge is supported, so this should fail with
+	# an extack.
+	ip link set dev $swp2 master br2 2>&1 > /dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $?
+
+	log_test "extack - multiple VLAN-aware bridges creation"
+
+	ip link del dev br2
+	ip link del dev br1
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib.sh b/tools/testing/selftests/drivers/net/mlxsw/fib.sh
new file mode 100755
index 000000000000..dcbf32b99bb6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/fib.sh
@@ -0,0 +1,270 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the FIB offload API on top of mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ipv4_identical_routes
+	ipv4_tos
+	ipv4_metric
+	ipv4_replace
+	ipv4_delete
+	ipv4_plen
+	ipv4_replay
+	ipv4_flush
+	ipv4_local_replace
+	ipv6_add
+	ipv6_metric
+	ipv6_append_single
+	ipv6_replace_single
+	ipv6_metric_multipath
+	ipv6_append_multipath
+	ipv6_replace_multipath
+	ipv6_append_multipath_to_single
+	ipv6_delete_single
+	ipv6_delete_multipath
+	ipv6_replay_single
+	ipv6_replay_multipath
+	ipv6_local_replace
+"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source $lib_dir/fib_offload_lib.sh
+
+ipv4_identical_routes()
+{
+	fib_ipv4_identical_routes_test "testns1"
+}
+
+ipv4_tos()
+{
+	fib_ipv4_tos_test "testns1"
+}
+
+ipv4_metric()
+{
+	fib_ipv4_metric_test "testns1"
+}
+
+ipv4_replace()
+{
+	fib_ipv4_replace_test "testns1"
+}
+
+ipv4_delete()
+{
+	fib_ipv4_delete_test "testns1"
+}
+
+ipv4_plen()
+{
+	fib_ipv4_plen_test "testns1"
+}
+
+ipv4_replay_metric()
+{
+	fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay_tos()
+{
+	fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay_plen()
+{
+	fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay()
+{
+	ipv4_replay_metric
+	ipv4_replay_tos
+	ipv4_replay_plen
+}
+
+ipv4_flush()
+{
+	fib_ipv4_flush_test "testns1"
+}
+
+ipv4_local_replace()
+{
+	local ns="testns1"
+
+	RET=0
+
+	ip -n $ns link add name dummy1 type dummy
+	ip -n $ns link set dev dummy1 up
+
+	ip -n $ns route add table local 192.0.2.1/32 dev dummy1
+	fib4_trap_check $ns "table local 192.0.2.1/32 dev dummy1" false
+	check_err $? "Local table route not in hardware when should"
+
+	ip -n $ns route add table main 192.0.2.1/32 dev dummy1
+	fib4_trap_check $ns "table main 192.0.2.1/32 dev dummy1" true
+	check_err $? "Main table route in hardware when should not"
+
+	fib4_trap_check $ns "table local 192.0.2.1/32 dev dummy1" false
+	check_err $? "Local table route was replaced when should not"
+
+	# Test that local routes can replace routes in main table.
+	ip -n $ns route add table main 192.0.2.2/32 dev dummy1
+	fib4_trap_check $ns "table main 192.0.2.2/32 dev dummy1" false
+	check_err $? "Main table route not in hardware when should"
+
+	ip -n $ns route add table local 192.0.2.2/32 dev dummy1
+	fib4_trap_check $ns "table local 192.0.2.2/32 dev dummy1" false
+	check_err $? "Local table route did not replace route in main table when should"
+
+	fib4_trap_check $ns "table main 192.0.2.2/32 dev dummy1" true
+	check_err $? "Main table route was not replaced when should"
+
+	log_test "IPv4 local table route replacement"
+
+	ip -n $ns link del dev dummy1
+}
+
+ipv6_add()
+{
+	fib_ipv6_add_test "testns1"
+}
+
+ipv6_metric()
+{
+	fib_ipv6_metric_test "testns1"
+}
+
+ipv6_append_single()
+{
+	fib_ipv6_append_single_test "testns1"
+}
+
+ipv6_replace_single()
+{
+	fib_ipv6_replace_single_test "testns1"
+}
+
+ipv6_metric_multipath()
+{
+	fib_ipv6_metric_multipath_test "testns1"
+}
+
+ipv6_append_multipath()
+{
+	fib_ipv6_append_multipath_test "testns1"
+}
+
+ipv6_replace_multipath()
+{
+	fib_ipv6_replace_multipath_test "testns1"
+}
+
+ipv6_append_multipath_to_single()
+{
+	fib_ipv6_append_multipath_to_single_test "testns1"
+}
+
+ipv6_delete_single()
+{
+	fib_ipv6_delete_single_test "testns1"
+}
+
+ipv6_delete_multipath()
+{
+	fib_ipv6_delete_multipath_test "testns1"
+}
+
+ipv6_replay_single()
+{
+	fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv6_replay_multipath()
+{
+	fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv6_local_replace()
+{
+	local ns="testns1"
+
+	RET=0
+
+	ip -n $ns link add name dummy1 type dummy
+	ip -n $ns link set dev dummy1 up
+
+	ip -n $ns route add table local 2001:db8:1::1/128 dev dummy1
+	fib6_trap_check $ns "table local 2001:db8:1::1/128 dev dummy1" false
+	check_err $? "Local table route not in hardware when should"
+
+	ip -n $ns route add table main 2001:db8:1::1/128 dev dummy1
+	fib6_trap_check $ns "table main 2001:db8:1::1/128 dev dummy1" true
+	check_err $? "Main table route in hardware when should not"
+
+	fib6_trap_check $ns "table local 2001:db8:1::1/128 dev dummy1" false
+	check_err $? "Local table route was replaced when should not"
+
+	# Test that local routes can replace routes in main table.
+	ip -n $ns route add table main 2001:db8:1::2/128 dev dummy1
+	fib6_trap_check $ns "table main 2001:db8:1::2/128 dev dummy1" false
+	check_err $? "Main table route not in hardware when should"
+
+	ip -n $ns route add table local 2001:db8:1::2/128 dev dummy1
+	fib6_trap_check $ns "table local 2001:db8:1::2/128 dev dummy1" false
+	check_err $? "Local route route did not replace route in main table when should"
+
+	fib6_trap_check $ns "table main 2001:db8:1::2/128 dev dummy1" true
+	check_err $? "Main table route was not replaced when should"
+
+	log_test "IPv6 local table route replacement"
+
+	ip -n $ns link del dev dummy1
+}
+
+fib_notify_on_flag_change_set()
+{
+	local notify=$1; shift
+
+	ip netns exec testns1 sysctl -qw net.ipv4.fib_notify_on_flag_change=$notify
+	ip netns exec testns1 sysctl -qw net.ipv6.fib_notify_on_flag_change=$notify
+
+	log_info "Set fib_notify_on_flag_change to $notify"
+}
+
+setup_prepare()
+{
+	ip netns add testns1
+	if [ $? -ne 0 ]; then
+		echo "Failed to add netns \"testns1\""
+		exit 1
+	fi
+
+	devlink dev reload $DEVLINK_DEV netns testns1
+	if [ $? -ne 0 ]; then
+		echo "Failed to reload into netns \"testns1\""
+		exit 1
+	fi
+}
+
+cleanup()
+{
+	pre_cleanup
+	devlink -N testns1 dev reload $DEVLINK_DEV netns $$
+	ip netns del testns1
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+fib_notify_on_flag_change_set 1
+tests_run
+
+fib_notify_on_flag_change_set 0
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh
new file mode 100755
index 000000000000..e99ae500f387
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh
@@ -0,0 +1,349 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test unicast FIB offload indication.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ipv6_route_add
+	ipv6_route_replace
+	ipv6_route_nexthop_group_share
+	ipv6_route_rate
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+tor1_create()
+{
+	simple_if_init $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128
+}
+
+tor1_destroy()
+{
+	simple_if_fini $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128
+}
+
+tor2_create()
+{
+	simple_if_init $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128
+}
+
+tor2_destroy()
+{
+	simple_if_fini $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128
+}
+
+spine_create()
+{
+	ip link set dev $spine_p1 up
+	ip link set dev $spine_p2 up
+
+	__addr_add_del $spine_p1 add 2001:db8:1::1/64
+	__addr_add_del $spine_p2 add 2001:db8:2::1/64
+}
+
+spine_destroy()
+{
+	__addr_add_del $spine_p2 del 2001:db8:2::1/64
+	__addr_add_del $spine_p1 del 2001:db8:1::1/64
+
+	ip link set dev $spine_p2 down
+	ip link set dev $spine_p1 down
+}
+
+ipv6_offload_check()
+{
+	local pfx="$1"; shift
+	local expected_num=$1; shift
+	local num
+
+	# Try to avoid races with route offload
+	sleep .1
+
+	num=$(ip -6 route show match ${pfx} | grep "offload" | wc -l)
+
+	if [ $num -eq $expected_num ]; then
+		return 0
+	fi
+
+	return 1
+}
+
+ipv6_route_add_prefix()
+{
+	RET=0
+
+	# Add a prefix route and check that it is offloaded.
+	ip -6 route add 2001:db8:3::/64 dev $spine_p1 metric 100
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1
+	check_err $? "prefix route not offloaded"
+
+	# Append an identical prefix route with an higher metric and check that
+	# offload indication did not change.
+	ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 200
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1
+	check_err $? "lowest metric not offloaded after append"
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	# Prepend an identical prefix route with lower metric and check that
+	# it is offloaded and the others are not.
+	ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 10
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 10" 1
+	check_err $? "lowest metric not offloaded after prepend"
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 0
+	check_err $? "mid metric offloaded when should not"
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	# Delete the routes and add the same route with a different nexthop
+	# device. Check that it is offloaded.
+	ip -6 route flush 2001:db8:3::/64 dev $spine_p1
+	ip -6 route add 2001:db8:3::/64 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p2" 1
+
+	log_test "IPv6 prefix route add"
+
+	ip -6 route flush 2001:db8:3::/64
+}
+
+ipv6_route_add_mpath()
+{
+	RET=0
+
+	# Add a multipath route and check that it is offloaded.
+	ip -6 route add 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded when should"
+
+	# Append another nexthop and check that it is offloaded as well.
+	ip -6 route append 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::3 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 3
+	check_err $? "appended nexthop not offloaded when should"
+
+	# Mimic route replace by removing the route and adding it back with
+	# only two nexthops.
+	ip -6 route del 2001:db8:3::/64
+	ip -6 route add 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after delete & add"
+
+	# Append a nexthop with an higher metric and check that the offload
+	# indication did not change.
+	ip -6 route append 2001:db8:3::/64 metric 200 \
+		nexthop via 2001:db8:1::3 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "lowest metric not offloaded after append"
+	ipv6_offload_check "2001:db8:3::/64 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	# Prepend a nexthop with a lower metric and check that it is offloaded
+	# and the others are not.
+	ip -6 route append 2001:db8:3::/64 metric 10 \
+		nexthop via 2001:db8:1::3 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 10" 1
+	check_err $? "lowest metric not offloaded after prepend"
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 0
+	check_err $? "mid metric offloaded when should not"
+	ipv6_offload_check "2001:db8:3::/64 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	log_test "IPv6 multipath route add"
+
+	ip -6 route flush 2001:db8:3::/64
+}
+
+ipv6_route_add()
+{
+	ipv6_route_add_prefix
+	ipv6_route_add_mpath
+}
+
+ipv6_route_replace()
+{
+	RET=0
+
+	# Replace prefix route with prefix route.
+	ip -6 route add 2001:db8:3::/64 metric 100 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 1
+	check_err $? "prefix route not offloaded when should"
+	ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 1
+	check_err $? "prefix route not offloaded after replace"
+
+	# Replace prefix route with multipath route.
+	ip -6 route replace 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after replace"
+
+	# Replace multipath route with prefix route. A prefix route cannot
+	# replace a multipath route, so it is appended.
+	ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100 dev $spine_p1" 0
+	check_err $? "prefix route offloaded after 'replacing' multipath route"
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after being 'replaced' by prefix route"
+
+	# Replace multipath route with multipath route.
+	ip -6 route replace 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::3 dev $spine_p1 \
+		nexthop via 2001:db8:2::3 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after replacing multipath route"
+
+	# Replace a non-existing multipath route with a multipath route and
+	# check that it is appended and not offloaded.
+	ip -6 route replace 2001:db8:3::/64 metric 200 \
+		nexthop via 2001:db8:1::3 dev $spine_p1 \
+		nexthop via 2001:db8:2::3 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after non-existing route was 'replaced'"
+	ipv6_offload_check "2001:db8:3::/64 metric 200" 0
+	check_err $? "multipath route offloaded after 'replacing' non-existing route"
+
+	log_test "IPv6 route replace"
+
+	ip -6 route flush 2001:db8:3::/64
+}
+
+ipv6_route_nexthop_group_share()
+{
+	RET=0
+
+	# The driver consolidates identical nexthop groups in order to reduce
+	# the resource usage in its adjacency table. Check that the deletion
+	# of one multipath route using the group does not affect the other.
+	ip -6 route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ip -6 route add 2001:db8:4::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64" 2
+	check_err $? "multipath route not offloaded when should"
+	ipv6_offload_check "2001:db8:4::/64" 2
+	check_err $? "multipath route not offloaded when should"
+	ip -6 route del 2001:db8:3::/64
+	ipv6_offload_check "2001:db8:4::/64" 2
+	check_err $? "multipath route not offloaded after deletion of route sharing the nexthop group"
+
+	# Check that after unsharing a nexthop group the routes are still
+	# marked as offloaded.
+	ip -6 route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ip -6 route del 2001:db8:4::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1
+	ipv6_offload_check "2001:db8:4::/64" 1
+	check_err $? "singlepath route not offloaded after unsharing the nexthop group"
+	ipv6_offload_check "2001:db8:3::/64" 2
+	check_err $? "multipath route not offloaded after unsharing the nexthop group"
+
+	log_test "IPv6 nexthop group sharing"
+
+	ip -6 route flush 2001:db8:3::/64
+	ip -6 route flush 2001:db8:4::/64
+}
+
+ipv6_route_rate()
+{
+	local batch_dir=$(mktemp -d)
+	local num_rts=$((40 * 1024))
+	local num_nhs=16
+	local total
+	local start
+	local diff
+	local end
+	local nhs
+	local i
+
+	RET=0
+
+	# Prepare 40K /64 multipath routes with 16 nexthops each and check how
+	# long it takes to add them. A limit of 60 seconds is set. It is much
+	# higher than insertion should take and meant to flag a serious
+	# regression.
+	total=$((nums_nhs * num_rts))
+
+	for i in $(seq 1 $num_nhs); do
+		ip -6 address add 2001:db8:1::10:$i/128 dev $tor1_p1
+		nexthops+=" nexthop via 2001:db8:1::10:$i dev $spine_p1"
+	done
+
+	for i in $(seq 1 $num_rts); do
+		echo "route add 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \
+			>> $batch_dir/add.batch
+		echo "route del 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \
+			>> $batch_dir/del.batch
+	done
+
+	start=$(date +%s.%N)
+
+	ip -batch $batch_dir/add.batch
+	count=$(ip -6 route show | grep offload | wc -l)
+	while [ $count -lt $total ]; do
+		sleep .01
+		count=$(ip -6 route show | grep offload | wc -l)
+	done
+
+	end=$(date +%s.%N)
+
+	diff=$(echo "$end - $start" | bc -l)
+	test "$(echo "$diff > 60" | bc -l)" -eq 0
+	check_err $? "route insertion took too long"
+	log_info "inserted $num_rts routes in $diff seconds"
+
+	log_test "IPv6 routes insertion rate"
+
+	ip -batch $batch_dir/del.batch
+	for i in $(seq 1 $num_nhs); do
+		ip -6 address del 2001:db8:1::10:$i/128 dev $tor1_p1
+	done
+	rm -rf $batch_dir
+}
+
+setup_prepare()
+{
+	spine_p1=${NETIFS[p1]}
+	tor1_p1=${NETIFS[p2]}
+
+	spine_p2=${NETIFS[p3]}
+	tor2_p1=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	tor1_create
+	tor2_create
+	spine_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	spine_destroy
+	tor2_destroy
+	tor1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh
new file mode 100755
index 000000000000..941ba4c485c9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	l3_monitor_test
+"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+swp=$NETIF_NO_CABLE
+
+cleanup()
+{
+	pre_cleanup
+}
+
+l3_monitor_test()
+{
+	hw_stats_monitor_test $swp l3		    \
+		"ip addr add dev $swp 192.0.2.1/28" \
+		"ip addr del dev $swp 192.0.2.1/28"
+}
+
+trap cleanup EXIT
+
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
new file mode 100755
index 000000000000..7d7f862c809c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
@@ -0,0 +1,263 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing over bridge and verify that the order of configuration does not
+# impact switch behavior. Verify that RIF is added correctly for existing
+# mappings and that new mappings use the correct RIF.
+
+# +-------------------+                   +--------------------+
+# | H1                |                   | H2                 |
+# |                   |                   |                    |
+# |         $h1.10 +  |                   |  + $h2.10          |
+# |   192.0.2.1/28 |  |                   |  | 192.0.2.3/28    |
+# |                |  |                   |  |                 |
+# |            $h1 +  |                   |  + $h2             |
+# +----------------|--+                   +--|-----------------+
+#                  |                         |
+# +----------------|-------------------------|-----------------+
+# | SW       $swp1 +                         + $swp2           |
+# |                |                         |                 |
+# | +--------------|-------------------------|---------------+ |
+# | |     $swp1.10 +                         + $swp2.10      | |
+# | |                                                        | |
+# | |                           br0                          | |
+# | |                       192.0.2.2/28                     | |
+# | +--------------------------------------------------------+ |
+# |                                                            |
+# |      $swp3.10 +                                            |
+# | 192.0.2.17/28 |                                            |
+# |               |                                            |
+# |         $swp3 +                                            |
+# +---------------|--------------------------------------------+
+#                 |
+# +---------------|--+
+# |           $h3 +  |
+# |               |  |
+# |        $h3.10 +  |
+# | 192.0.2.18/28    |
+# |                  |
+# | H3               |
+# +------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	port_vid_map_rif
+	rif_port_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+
+	ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+	vlan_destroy $h1 10
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 10
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3
+	vlan_create $h3 10 v$h3 192.0.2.18/28
+
+	ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+	ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+
+	vlan_destroy $h3 10
+	simple_if_fini $h3
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	# By default, a link-local address is generated when netdevice becomes
+	# up. Adding an address to the bridge will cause creating a RIF for it.
+	# Prevent generating link-local address to be able to control when the
+	# RIF is added.
+	sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+	ip link set dev br0 up
+
+	ip link set dev $swp2 up
+	vlan_create $swp2 10
+	ip link set dev $swp2.10 master br0
+
+	ip link set dev $swp3 up
+	vlan_create $swp3 10 "" 192.0.2.17/28
+	tc qdisc add dev $swp3 clsact
+
+	# Replace neighbor to avoid 1 packet which is forwarded in software due
+	# to "unresolved neigh".
+	ip neigh replace dev $swp3.10 192.0.2.18 lladdr $(mac_get $h3.10)
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp3 clsact
+	vlan_destroy $swp3 10
+	ip link set dev $swp3 down
+
+	ip link set dev $swp2.10 nomaster
+	vlan_destroy $swp2 10
+	ip link set dev $swp2 down
+
+	ip link set dev br0 down
+	sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+	ip link del dev br0
+
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	h3_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+bridge_rif_add()
+{
+	rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	__addr_add_del br0 add 192.0.2.2/28
+	rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+	expected_rifs=$((rifs_occ_t0 + 1))
+
+	[[ $expected_rifs -eq $rifs_occ_t1 ]]
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	sleep 1
+}
+
+bridge_rif_del()
+{
+	__addr_add_del br0 del 192.0.2.2/28
+}
+
+port_vid_map_rif()
+{
+	RET=0
+
+	# First add {port, VID}->FID for $swp1.10, then add a RIF and verify
+	# that packets can be routed via the existing mapping.
+	vlan_create $swp1 10
+	ip link set dev $swp1.10 master br0
+	bridge_rif_add
+
+	# The hardware matches on the first ethertype which is not VLAN,
+	# so the protocol should be IP.
+	tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.18 action pass
+
+	ping_do $h1.10 192.0.2.18
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add RIF for existing {port, VID}->FID mapping"
+
+	tc filter del dev $swp3 egress
+
+	bridge_rif_del
+	ip link set dev $swp1.10 nomaster
+	vlan_destroy $swp1 10
+}
+
+rif_port_vid_map()
+{
+	RET=0
+
+	# First add an address to the bridge, which will create a RIF on top of
+	# it, then add a new {port, VID}->FID mapping and verify that packets
+	# can be routed via the new mapping.
+	bridge_rif_add
+	vlan_create $swp1 10
+	ip link set dev $swp1.10 master br0
+
+	# The hardware matches on the first ethertype which is not VLAN,
+	# so the protocol should be IP.
+	tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.18 action pass
+
+	ping_do $h1.10 192.0.2.18
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add {port, VID}->FID mapping for FID with a RIF"
+
+	tc filter del dev $swp3 egress
+
+	ip link set dev $swp1.10 nomaster
+	vlan_destroy $swp1 10
+	bridge_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh
new file mode 100755
index 000000000000..577293bab88b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh
@@ -0,0 +1,264 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing over bridge and verify that the order of configuration does not
+# impact switch behavior. Verify that RIF is added correctly for existing
+# mapping and that packets can be routed via port which is added after the FID
+# already has a RIF.
+
+# +-------------------+                   +--------------------+
+# | H1                |                   | H2                 |
+# |                   |                   |                    |
+# |         $h1.10 +  |                   |  + $h2.10          |
+# |   192.0.2.1/28 |  |                   |  | 192.0.2.3/28    |
+# |                |  |                   |  |                 |
+# |            $h1 +  |                   |  + $h2             |
+# +----------------|--+                   +--|-----------------+
+#                  |                         |
+# +----------------|-------------------------|-----------------+
+# | SW             |                         |                 |
+# | +--------------|-------------------------|---------------+ |
+# | |        $swp1 +                         + $swp2         | |
+# | |                                                        | |
+# | |                           br0                          | |
+# | +--------------------------------------------------------+ |
+# |                              |                             |
+# |                           br0.10                           |
+# |                        192.0.2.2/28                        |
+# |                                                            |
+# |                                                            |
+# |          $swp3 +                                           |
+# |  192.0.2.17/28 |                                           |
+# +----------------|-------------------------------------------+
+#                  |
+# +----------------|--+
+# |            $h3 +  |
+# |  192.0.2.18/28    |
+# |                   |
+# | H3                |
+# +-------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	vid_map_rif
+	rif_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+
+	ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+	vlan_destroy $h1 10
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 10
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.18/28
+	ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+	ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+	simple_if_fini $h3 192.0.2.18/28
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+
+	ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+	# By default, a link-local address is generated when netdevice becomes
+	# up. Adding an address to the bridge will cause creating a RIF for it.
+	# Prevent generating link-local address to be able to control when the
+	# RIF is added.
+	sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+	ip link set dev br0 up
+
+	ip link set dev $swp2 up
+	ip link set dev $swp2 master br0
+	bridge vlan add vid 10 dev $swp2
+
+	ip link set dev $swp3 up
+	__addr_add_del $swp3 add 192.0.2.17/28
+	tc qdisc add dev $swp3 clsact
+
+	# Replace neighbor to avoid 1 packet which is forwarded in software due
+	# to "unresolved neigh".
+	ip neigh replace dev $swp3 192.0.2.18 lladdr $(mac_get $h3)
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp3 clsact
+	__addr_add_del $swp3 del 192.0.2.17/28
+	ip link set dev $swp3 down
+
+	bridge vlan del vid 10 dev $swp2
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp2 down
+
+	ip link set dev br0 down
+	sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+	ip link del dev br0
+
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	h3_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+bridge_rif_add()
+{
+	rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	vlan_create br0 10 "" 192.0.2.2/28
+	rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+	expected_rifs=$((rifs_occ_t0 + 1))
+
+	[[ $expected_rifs -eq $rifs_occ_t1 ]]
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	sleep 1
+}
+
+bridge_rif_del()
+{
+	vlan_destroy br0 10
+}
+
+vid_map_rif()
+{
+	RET=0
+
+	# First add VID->FID for vlan 10, then add a RIF and verify that
+	# packets can be routed via the existing mapping.
+	bridge vlan add vid 10 dev br0 self
+	ip link set dev $swp1 master br0
+	bridge vlan add vid 10 dev $swp1
+
+	bridge_rif_add
+
+	tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.18 action pass
+
+	ping_do $h1.10 192.0.2.18
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add RIF for existing VID->FID mapping"
+
+	tc filter del dev $swp3 egress
+
+	bridge_rif_del
+
+	bridge vlan del vid 10 dev $swp1
+	ip link set dev $swp1 nomaster
+	bridge vlan del vid 10 dev br0 self
+}
+
+rif_vid_map()
+{
+	RET=0
+
+	# Using 802.1Q, there is only one VID->FID map for each VID. That means
+	# that we cannot really check adding a new map for existing FID with a
+	# RIF. Verify that packets can be routed via port which is added after
+	# the FID already has a RIF, although in practice there is no new
+	# mapping in the hardware.
+	bridge vlan add vid 10 dev br0 self
+	bridge_rif_add
+
+	ip link set dev $swp1 master br0
+	bridge vlan add vid 10 dev $swp1
+
+	tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.18 action pass
+
+	ping_do $h1.10 192.0.2.18
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add port to VID->FID mapping for FID with a RIF"
+
+	tc filter del dev $swp3 egress
+
+	bridge vlan del vid 10 dev $swp1
+	ip link set dev $swp1 nomaster
+
+	bridge_rif_del
+	bridge vlan del vid 10 dev br0 self
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh
new file mode 100755
index 000000000000..90450216a10d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh
@@ -0,0 +1,311 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing after VXLAN decapsulation and verify that the order of
+# configuration does not impact switch behavior. Verify that RIF is added
+# correctly for existing mapping and that new mapping uses the correct RIF.
+
+# +---------------------------+
+# |                        H1 |
+# |    + $h1                  |
+# |    | 192.0.2.1/28         |
+# +----|----------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# | +--|--------------------------------------------------------------------+ |
+# | |  + $swp1                         br1                                  | |
+# | |     vid 10 pvid untagged                                              | |
+# | |                                                                       | |
+# | |                                                                       | |
+# | |                                            + vx4001                   | |
+# | |                                              local 192.0.2.17         | |
+# | |                                              remote 192.0.2.18        | |
+# | |                                              id 104001                | |
+# | |                                              dstport $VXPORT          | |
+# | |                                              vid 4001 pvid untagged   | |
+# | |                                                                       | |
+# | +----------------------------------+------------------------------------+ |
+# |                                    |                                      |
+# | +----------------------------------|------------------------------------+ |
+# | |                                  |                                    | |
+# | |  +-------------------------------+---------------------------------+  | |
+# | |  |                                                                 |  | |
+# | |  + vlan10                                                 vlan4001 +  | |
+# | |    192.0.2.2/28                                                       | |
+# | |                                                                       | |
+# | |                               vrf-green                               | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |    + $rp1                                       +lo                       |
+# |    | 198.51.100.1/24                             192.0.2.17/32            |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             v$rp2      |
+# |    + $rp2                                                   |
+# |      198.51.100.2/24                                        |
+# |                                                             |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	vni_fid_map_rif
+	rif_vni_fid_map
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+: ${VXPORT:=4789}
+export VXPORT
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	ip link set dev $rp1 up
+	ip address add dev $rp1 198.51.100.1/24
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	bridge vlan add vid 10 dev $swp1 pvid untagged
+
+	tc qdisc add dev $swp1 clsact
+
+	ip link add name vx4001 type vxlan id 104001 \
+		local 192.0.2.17 dstport $VXPORT \
+		nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx4001 up
+
+	ip link set dev vx4001 master br1
+
+	ip address add 192.0.2.17/32 dev lo
+
+	# Create SVIs.
+	vrf_create "vrf-green"
+	ip link set dev vrf-green up
+
+	ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+
+	# Replace neighbor to avoid 1 packet which is forwarded in software due
+	# to "unresolved neigh".
+	ip neigh replace dev vlan10 192.0.2.1 lladdr $(mac_get $h1)
+
+	ip address add 192.0.2.2/28 dev vlan10
+
+	bridge vlan add vid 10 dev br1 self
+	bridge vlan add vid 4001 dev br1 self
+
+	sysctl_set net.ipv4.conf.all.rp_filter 0
+}
+
+switch_destroy()
+{
+	sysctl_restore net.ipv4.conf.all.rp_filter
+
+	bridge vlan del vid 4001 dev br1 self
+	bridge vlan del vid 10 dev br1 self
+
+	ip link del dev vlan10
+
+	vrf_destroy "vrf-green"
+
+	ip address del 192.0.2.17/32 dev lo
+
+	tc qdisc del dev $swp1 clsact
+
+	bridge vlan del vid 10 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vx4001 nomaster
+
+	ip link set dev vx4001 down
+	ip link del dev vx4001
+
+	ip address del dev $rp1 198.51.100.1/24
+	ip link set dev $rp1 down
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 198.51.100.2/24
+
+	ip route add 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1
+}
+
+vrp2_destroy()
+{
+	ip route del 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1
+
+	simple_if_fini $rp2 198.51.100.2/24
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	switch_create
+
+	vrp2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vrp2_destroy
+
+	switch_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+payload_get()
+{
+	local dest_mac=$(mac_get vlan4001)
+	local src_mac=$(mac_get $rp1)
+
+	p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"00:00:00:"$(                : VXLAN reserved
+		)"01:96:41:"$(                : VXLAN VNI : 104001
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"$src_mac:"$(                : ETH saddr
+		)"08:00:"$(                   : ETH type
+		)"45:"$(                      : IP version + IHL
+		)"00:"$(                      : IP TOS
+		)"00:54:"$(                   : IP total length
+		)"3f:49:"$(                   : IP identification
+		)"00:00:"$(                   : IP flags + frag off
+		)"3f:"$(                      : IP TTL
+		)"01:"$(                      : IP proto
+		)"50:21:"$(                   : IP header csum
+		)"c6:33:64:0a:"$(             : IP saddr: 198.51.100.10
+		)"c0:00:02:01:"$(             : IP daddr: 192.0.2.1
+	)
+	echo $p
+}
+
+vlan_rif_add()
+{
+	rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+	ip link add link br1 name vlan4001 up master vrf-green \
+		type vlan id 4001
+
+	rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	expected_rifs=$((rifs_occ_t0 + 1))
+
+	[[ $expected_rifs -eq $rifs_occ_t1 ]]
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+}
+
+vlan_rif_del()
+{
+	ip link del dev vlan4001
+}
+
+vni_fid_map_rif()
+{
+	local rp1_mac=$(mac_get $rp1)
+
+	RET=0
+
+	# First add VNI->FID mapping to the FID of VLAN 4001
+	bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+	# Add a RIF to the FID with VNI->FID mapping
+	vlan_rif_add
+
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.1 action pass
+
+	payload=$(payload_get)
+	ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \
+		-B 192.0.2.17 -A 192.0.2.18 \
+		-t udp sp=12345,dp=$VXPORT,p=$payload -q
+
+	tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add RIF for existing VNI->FID mapping"
+
+	tc filter del dev $swp1 egress
+
+	bridge vlan del vid 4001 dev vx4001 pvid untagged
+	vlan_rif_del
+}
+
+rif_vni_fid_map()
+{
+	local rp1_mac=$(mac_get $rp1)
+
+	RET=0
+
+	# First add a RIF to the FID of VLAN 4001
+	vlan_rif_add
+
+	# Add VNI->FID mapping to FID with a RIF
+	bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.1 action pass
+
+	payload=$(payload_get)
+	ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \
+		-B 192.0.2.17 -A 192.0.2.18 \
+		-t udp sp=12345,dp=$VXPORT,p=$payload -q
+
+	tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add VNI->FID mapping for FID with a RIF"
+
+	tc filter del dev $swp1 egress
+
+	bridge vlan del vid 4001 dev vx4001 pvid untagged
+	vlan_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
new file mode 100755
index 000000000000..e1ad623146d7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
@@ -0,0 +1,202 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# ../../../net/forwarding/mirror_gre_topo_lib.sh for more details.
+#
+# Test offloading various features of offloading gretap mirrors specific to
+# mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/mirror_lib.sh
+source $lib_dir/mirror_gre_lib.sh
+source $lib_dir/mirror_gre_topo_lib.sh
+
+ALL_TESTS="
+	test_keyful
+	test_soft
+	test_tos_fixed
+	test_ttl_inherit
+"
+
+setup_keyful()
+{
+	tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \
+		      ttl 100 tos inherit allow-localremote \
+		      key 1234
+
+	tunnel_create h3-gt6-key ip6gretap 2001:db8:3::2 2001:db8:3::1 \
+		      key 1234
+	ip link set h3-gt6-key vrf v$h3
+	matchall_sink_create h3-gt6-key
+
+	ip address add dev $swp3 2001:db8:3::1/64
+	ip address add dev $h3 2001:db8:3::2/64
+}
+
+cleanup_keyful()
+{
+	ip address del dev $h3 2001:db8:3::2/64
+	ip address del dev $swp3 2001:db8:3::1/64
+
+	tunnel_destroy h3-gt6-key
+	tunnel_destroy gt6-key
+}
+
+setup_soft()
+{
+	# Set up a topology for testing underlay routes that point at an
+	# unsupported soft device.
+
+	tunnel_create gt6-soft ip6gretap 2001:db8:4::1 2001:db8:4::2 \
+		      ttl 100 tos inherit allow-localremote
+
+	tunnel_create h3-gt6-soft ip6gretap 2001:db8:4::2 2001:db8:4::1
+	ip link set h3-gt6-soft vrf v$h3
+	matchall_sink_create h3-gt6-soft
+
+	ip link add name v1 type veth peer name v2
+	ip link set dev v1 up
+	ip address add dev v1 2001:db8:4::1/64
+
+	ip link set dev v2 vrf v$h3
+	ip link set dev v2 up
+	ip address add dev v2 2001:db8:4::2/64
+}
+
+cleanup_soft()
+{
+	ip link del dev v1
+
+	tunnel_destroy h3-gt6-soft
+	tunnel_destroy gt6-soft
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	setup_keyful
+	setup_soft
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	cleanup_soft
+	cleanup_keyful
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_ttl_inherit()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type ttl inherit
+	mirror_install $swp1 ingress $tundev "matchall"
+	fail_test_span_gre_dir $tundev
+
+	ip link set dev $tundev type $type ttl 100
+
+	quick_test_span_gre_dir $tundev
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: no offload on TTL of inherit"
+}
+
+test_span_gre_tos_fixed()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type tos 0x10
+	mirror_install $swp1 ingress $tundev "matchall"
+	fail_test_span_gre_dir $tundev
+
+	ip link set dev $tundev type $type tos inherit
+	quick_test_span_gre_dir $tundev
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: no offload on a fixed TOS"
+}
+
+test_span_failable()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall"
+	fail_test_span_gre_dir  $tundev
+	mirror_uninstall $swp1 ingress
+
+	log_test "fail $what"
+}
+
+test_keyful()
+{
+	test_span_failable gt6-key "mirror to keyful gretap"
+}
+
+test_soft()
+{
+	test_span_failable gt6-soft "mirror to gretap w/ soft underlay"
+}
+
+test_tos_fixed()
+{
+	test_span_gre_tos_fixed gt4 gretap "mirror to gretap"
+	test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap"
+}
+
+
+test_ttl_inherit()
+{
+	test_span_gre_ttl_inherit gt4 gretap "mirror to gretap"
+	test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
new file mode 100644
index 000000000000..d43093310e23
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -0,0 +1,185 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Test offloading a number of mirrors-to-gretap. The test creates a number of
+# tunnels. Then it adds one flower mirror for each of the tunnels, matching a
+# given host IP. Then it generates traffic at each of the host IPs and checks
+# that the traffic has been mirrored at the appropriate tunnel.
+#
+#   +--------------------------+                   +--------------------------+
+#   | H1                       |                   |                       H2 |
+#   |     + $h1                |                   |                $h2 +     |
+#   |     | 2001:db8:1:X::1/64 |                   | 2001:db8:1:X::2/64 |     |
+#   +-----|--------------------+                   +--------------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirrors                                                  |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3                          + gt6-<X> (ip6gretap)              |
+#   |     | 2001:db8:2:X::1/64             : loc=2001:db8:2:X::1              |
+#   |     |                                : rem=2001:db8:2:X::2              |
+#   |     |                                : ttl=100                          |
+#   |     |                                : tos=inherit                      |
+#   |     |                                :                                  |
+#   +-----|--------------------------------:----------------------------------+
+#         |                                :
+#   +-----|--------------------------------:----------------------------------+
+#   | H3  + $h3                            + h3-gt6-<X> (ip6gretap)           |
+#   |       2001:db8:2:X::2/64               loc=2001:db8:2:X::2              |
+#   |                                        rem=2001:db8:2:X::1              |
+#   |                                        ttl=100                          |
+#   |                                        tos=inherit                      |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+source ../../../../net/forwarding/mirror_lib.sh
+
+MIRROR_NUM_NETIFS=6
+
+mirror_gre_ipv6_addr()
+{
+	local net=$1; shift
+	local num=$1; shift
+
+	printf "2001:db8:%x:%x" $net $num
+}
+
+mirror_gre_tunnels_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	MIRROR_GRE_BATCH_FILE="$(mktemp)"
+	for ((i=0; i < count; ++i)); do
+		local match_dip=$(mirror_gre_ipv6_addr 1 $i)::2
+		local htun=h3-gt6-$i
+		local tun=gt6-$i
+
+		((mirror_gre_tunnels++))
+
+		ip address add dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+		ip address add dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+
+		ip address add dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+		ip address add dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+
+		tunnel_create $tun ip6gretap \
+			      $(mirror_gre_ipv6_addr 2 $i)::1 \
+			      $(mirror_gre_ipv6_addr 2 $i)::2 \
+			      ttl 100 tos inherit allow-localremote
+
+		tunnel_create $htun ip6gretap \
+			      $(mirror_gre_ipv6_addr 2 $i)::2 \
+			      $(mirror_gre_ipv6_addr 2 $i)::1
+		ip link set $htun vrf v$h3
+		matchall_sink_create $htun
+
+		cat >> $MIRROR_GRE_BATCH_FILE <<-EOF
+			filter add dev $swp1 ingress pref 1000 \
+				protocol ipv6 \
+				flower skip_sw dst_ip $match_dip \
+				action mirred egress mirror dev $tun
+		EOF
+	done
+
+	tc -b $MIRROR_GRE_BATCH_FILE
+	check_err_fail $should_fail $? "Mirror rule insertion"
+}
+
+mirror_gre_tunnels_destroy()
+{
+	local count=$1; shift
+
+	for ((i=0; i < count; ++i)); do
+		local htun=h3-gt6-$i
+		local tun=gt6-$i
+
+		ip address del dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+		ip address del dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+
+		ip address del dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+		ip address del dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+
+		tunnel_destroy $htun
+		tunnel_destroy $tun
+	done
+}
+
+mirror_gre_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	mirror_gre_tunnels_create $count $should_fail
+	if ((should_fail)); then
+	    return
+	fi
+
+	sleep 5
+
+	for ((i = 0; i < count; ++i)); do
+		local sip=$(mirror_gre_ipv6_addr 1 $i)::1
+		local dip=$(mirror_gre_ipv6_addr 1 $i)::2
+		local htun=h3-gt6-$i
+		local message
+
+		icmp6_capture_install $htun
+		mirror_test v$h1 $sip $dip $htun 100 10
+		icmp6_capture_uninstall $htun
+	done
+}
+
+mirror_gre_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	mirror_gre_tunnels=0
+
+	vrf_prepare
+
+	simple_if_init $h1
+	simple_if_init $h2
+	simple_if_init $h3
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 addrgenmode none
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	tc qdisc add dev $swp1 clsact
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	ip link set dev $swp3 up
+}
+
+mirror_gre_cleanup()
+{
+	mirror_gre_tunnels_destroy $mirror_gre_tunnels
+
+	ip link set dev $swp3 down
+
+	ip link set dev $swp2 down
+
+	tc qdisc del dev $swp1 clsact
+	ip link set dev $swp1 down
+
+	ip link del dev br1
+
+	simple_if_fini $h3
+	simple_if_fini $h2
+	simple_if_fini $h1
+
+	vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
new file mode 100644
index 000000000000..48395cfd4f95
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+if [[ ! -v MLXSW_CHIP ]]; then
+	MLXSW_CHIP=$(devlink -j dev info $DEVLINK_DEV | jq -r '.[][]["driver"]')
+	if [ -z "$MLXSW_CHIP" ]; then
+		echo "SKIP: Device $DEVLINK_DEV doesn't support devlink info command"
+		exit 1
+	fi
+fi
+
+MLXSW_SPECTRUM_REV=$(case $MLXSW_CHIP in
+			     mlxsw_spectrum)
+				     echo 1 ;;
+			     mlxsw_spectrum*)
+				     echo ${MLXSW_CHIP#mlxsw_spectrum} ;;
+			     *)
+				     echo "Couldn't determine Spectrum chip revision." \
+					  > /dev/stderr ;;
+		     esac)
+
+mlxsw_on_spectrum()
+{
+	local rev=$1; shift
+	local op="=="
+	local rev2=${rev%+}
+
+	if [[ $rev2 != $rev ]]; then
+		op=">="
+	fi
+
+	((MLXSW_SPECTRUM_REV $op rev2))
+}
+
+__mlxsw_only_on_spectrum()
+{
+	local rev=$1; shift
+	local caller=$1; shift
+	local src=$1; shift
+
+	if ! mlxsw_on_spectrum "$rev"; then
+		log_test_xfail $src:$caller "(Spectrum-$rev only)"
+		return 1
+	fi
+}
+
+mlxsw_only_on_spectrum()
+{
+	local caller=${FUNCNAME[1]}
+	local src=${BASH_SOURCE[1]}
+	local rev
+
+	for rev in "$@"; do
+		if __mlxsw_only_on_spectrum "$rev" "$caller" "$src"; then
+			return 0
+		fi
+	done
+
+	return 1
+}
+
+mlxsw_max_descriptors_get()
+{
+	local spectrum_rev=$MLXSW_SPECTRUM_REV
+
+	case $spectrum_rev in
+	1) echo 81920 ;;
+	2) echo 136960 ;;
+	3) echo 204800 ;;
+	4) echo 220000 ;;
+	*) echo "Unknown max descriptors for chip revision." > /dev/stderr
+	   return 1 ;;
+	esac
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
new file mode 100755
index 000000000000..fca0e1e642c6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
@@ -0,0 +1,260 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test a "one-armed router" [1] scenario. Packets forwarded between H1 and H2
+# should be forwarded by the ASIC, but also trapped so that ICMP redirect
+# packets could be potentially generated.
+#
+# 1. https://en.wikipedia.org/wiki/One-armed_router
+#
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    | 2001:db8:1::1/64           |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# |    |  default via 2001:db8:1::2 |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# | +--|--------------------------------------------------------------------+ |
+# | |  + $swp1                   BR0 (802.1d)                               | |
+# | |                                                                       | |
+# | |                            192.0.2.2/24                               | |
+# | |                          2001:db8:1::2/64                             | |
+# | |                           198.51.100.2/24                             | |
+# | |                          2001:db8:2::2/64                             | |
+# | |                                                                       | |
+# | |  + $swp2                                                              | |
+# | +--|--------------------------------------------------------------------+ |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |  default via 2001:db8:2::2 |
+# |    |                            |
+# |    | 2001:db8:2::1/64           |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="ping_ipv4 ping_ipv6 fwd_mark_ipv4 fwd_mark_ipv6"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+switch_create()
+{
+	ip link add name br0 address $(mac_get $swp1) \
+		type bridge mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br0
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+
+	__addr_add_del br0 add 192.0.2.2/24 2001:db8:1::2/64
+	__addr_add_del br0 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+switch_destroy()
+{
+	__addr_add_del br0 del 198.51.100.2/24 2001:db8:2::2/64
+	__addr_add_del br0 del 192.0.2.2/24 2001:db8:1::2/64
+
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev br0 down
+	ip link del dev br0
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.1 ": h1->h2"
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::1 ": h1->h2"
+}
+
+fwd_mark_ipv4()
+{
+	# Transmit packets from H1 to H2 and make sure they are trapped at
+	# swp1 due to loopback error, but only forwarded by the ASIC through
+	# swp2
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \
+		action pass
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 flower \
+		skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \
+		action pass
+
+	tc filter add dev $swp2 egress protocol ip pref 2 handle 102 flower \
+		skip_sw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \
+		action pass
+
+	ip vrf exec v$h1 $MZ $h1 -c 10 -d 100msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	RET=0
+
+	tc_check_packets "dev $swp1 ingress" 101 10
+	check_err $?
+
+	log_test "fwd mark: trapping IPv4 packets due to LBERROR"
+
+	RET=0
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_err $?
+
+	log_test "fwd mark: forwarding IPv4 packets in software"
+
+	RET=0
+
+	tc_check_packets "dev $swp2 egress" 102 10
+	check_err $?
+
+	log_test "fwd mark: forwarding IPv4 packets in hardware"
+
+	tc filter del dev $swp2 egress protocol ip pref 2 handle 102 flower
+	tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+fwd_mark_ipv6()
+{
+	tc filter add dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower \
+		skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \
+		action pass
+
+	tc filter add dev $swp2 egress protocol ipv6 pref 1 handle 101 flower \
+		skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \
+		action pass
+
+	tc filter add dev $swp2 egress protocol ipv6 pref 2 handle 102 flower \
+		skip_sw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \
+		action pass
+
+	ip vrf exec v$h1 $MZ $h1 -6 -c 10 -d 100msec -p 64 -A 2001:db8:1::1 \
+		-B 2001:db8:2::1 -t udp dp=52768,sp=42768 -q
+
+	RET=0
+
+	tc_check_packets "dev $swp1 ingress" 101 10
+	check_err $?
+
+	log_test "fwd mark: trapping IPv6 packets due to LBERROR"
+
+	RET=0
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_err $?
+
+	log_test "fwd mark: forwarding IPv6 packets in software"
+
+	RET=0
+
+	tc_check_packets "dev $swp2 egress" 102 10
+	check_err $?
+
+	log_test "fwd mark: forwarding IPv6 packets in hardware"
+
+	tc filter del dev $swp2 egress protocol ipv6 pref 2 handle 102 flower
+	tc filter del dev $swp2 egress protocol ipv6 pref 1 handle 101 flower
+	tc filter del dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	sysctl_set net.ipv4.conf.all.accept_redirects 0
+	sysctl_set net.ipv6.conf.all.accept_redirects 0
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	sysctl_restore net.ipv6.conf.all.accept_redirects
+	sysctl_restore net.ipv4.conf.all.accept_redirects
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
new file mode 100755
index 000000000000..fe0343b95e6c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that PCI reset works correctly by verifying that only the expected reset
+# methods are supported and that after issuing the reset the ifindex of the
+# port changes.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	pci_reset_test
+"
+NUM_NETIFS=1
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+pci_reset_test()
+{
+	RET=0
+
+	local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1)
+	local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2)
+
+	if [ $bus != "pci" ]; then
+		check_err 1 "devlink device is not a PCI device"
+		log_test "pci reset"
+		return
+	fi
+
+	if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then
+		check_err 1 "reset is not supported"
+		log_test "pci reset"
+		return
+	fi
+
+	[[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]]
+	check_err $? "only \"bus\" reset method should be supported"
+
+	local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+	echo 1 > /sys/bus/pci/devices/$bdf/reset
+	check_err $? "reset failed"
+
+	# Wait for udev to rename newly created netdev.
+	udevadm settle
+
+	local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+	[[ $ifindex_pre != $ifindex_post ]]
+	check_err $? "reset not performed"
+
+	log_test "pci reset"
+}
+
+swp1=${NETIFS[p1]}
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh
new file mode 100755
index 000000000000..b1f0781f6b25
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that filters that match on the same port range, but with different
+# combination of IPv4/IPv6 and TCP/UDP all use the same port range register by
+# observing port range registers' occupancy via devlink-resource.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	port_range_occ_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+switch_create()
+{
+	simple_if_init $swp1
+	tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+port_range_occ_get()
+{
+	devlink_resource_occ_get port_range_registers
+}
+
+port_range_occ_test()
+{
+	RET=0
+
+	local occ=$(port_range_occ_get)
+
+	# Two port range registers are used, for source and destination port
+	# ranges.
+	tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+		flower skip_sw ip_proto udp src_port 1-100 dst_port 1-100 \
+		action pass
+	(( occ + 2 == $(port_range_occ_get) ))
+	check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))"
+
+	tc filter add dev $swp1 ingress pref 1 handle 102 proto ip \
+		flower skip_sw ip_proto tcp src_port 1-100 dst_port 1-100 \
+		action pass
+	tc filter add dev $swp1 ingress pref 2 handle 103 proto ipv6 \
+		flower skip_sw ip_proto udp src_port 1-100 dst_port 1-100 \
+		action pass
+	tc filter add dev $swp1 ingress pref 2 handle 104 proto ipv6 \
+		flower skip_sw ip_proto tcp src_port 1-100 dst_port 1-100 \
+		action pass
+	(( occ + 2 == $(port_range_occ_get) ))
+	check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))"
+
+	tc filter del dev $swp1 ingress pref 2 handle 104 flower
+	tc filter del dev $swp1 ingress pref 2 handle 103 flower
+	tc filter del dev $swp1 ingress pref 1 handle 102 flower
+	(( occ + 2 == $(port_range_occ_get) ))
+	check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))"
+
+	tc filter del dev $swp1 ingress pref 1 handle 101 flower
+	(( occ == $(port_range_occ_get) ))
+	check_err $? "Got occupancy $(port_range_occ_get), expected $occ"
+
+	log_test "port range occupancy"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh
new file mode 100644
index 000000000000..2a70840ff14b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-2.0
+
+PORT_RANGE_NUM_NETIFS=2
+
+port_range_h1_create()
+{
+	simple_if_init $h1
+}
+
+port_range_h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+port_range_switch_create()
+{
+	simple_if_init $swp1
+	tc qdisc add dev $swp1 clsact
+}
+
+port_range_switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1
+}
+
+port_range_rules_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+	local batch_file="$(mktemp)"
+
+	for ((i = 0; i < count; ++i)); do
+		cat >> $batch_file <<-EOF
+			filter add dev $swp1 ingress \
+				prot ipv4 \
+				pref 1000 \
+				flower skip_sw \
+				ip_proto udp dst_port 1-$((100 + i)) \
+				action pass
+		EOF
+	done
+
+	tc -b $batch_file
+	check_err_fail $should_fail $? "Rule insertion"
+
+	rm -f $batch_file
+}
+
+__port_range_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	port_range_rules_create $count $should_fail
+
+	offload_count=$(tc -j filter show dev $swp1 ingress |
+			jq "[.[] | select(.options.in_hw == true)] | length")
+	((offload_count == count))
+	check_err_fail $should_fail $? "port range offload count"
+}
+
+port_range_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	if ! tc_offload_check $PORT_RANGE_NUM_NETIFS; then
+		check_err 1 "Could not test offloaded functionality"
+		return
+	fi
+
+	__port_range_test $count $should_fail
+}
+
+port_range_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	port_range_h1_create
+	port_range_switch_create
+}
+
+port_range_cleanup()
+{
+	pre_cleanup
+
+	port_range_switch_destroy
+	port_range_h1_destroy
+
+	vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
new file mode 100644
index 000000000000..1e9a4aff76a2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for physical ports resource. The test splits each splittable port
+# to its width and checks that eventually the number of physical ports equals
+# the maximum number of physical ports.
+
+PORT_NUM_NETIFS=0
+
+declare -a unsplit
+
+port_setup_prepare()
+{
+	:
+}
+
+port_cleanup()
+{
+	pre_cleanup
+
+	for port in "${unsplit[@]}"; do
+		devlink port unsplit $port
+		check_err $? "Did not unsplit $netdev"
+	done
+	unsplit=()
+}
+
+split_all_ports()
+{
+	local should_fail=$1; shift
+
+	# Loop over the splittable netdevs and create tuples of netdev along
+	# with its width. For example:
+	# '$netdev1 $count1 $netdev2 $count2...', when:
+	# $netdev1-2 are splittable netdevs in the device, and
+	# $count1-2 are the netdevs width respectively.
+	while read netdev count <<<$(
+		devlink -j port show |
+		jq -r '.[][] | select(.splittable==true) | "\(.netdev) \(.lanes)"'
+		)
+		[[ ! -z $netdev ]]
+	do
+		devlink port split $netdev count $count
+		check_err $? "Did not split $netdev into $count"
+		unsplit+=( "${netdev}s0" )
+	done
+}
+
+port_test()
+{
+	local max_ports=$1; shift
+	local should_fail=$1; shift
+
+	split_all_ports $should_fail
+
+	occ=$(devlink -j resource show $DEVLINK_DEV \
+	      | jq '.[][][] | select(.name=="physical_ports") |.["occ"]')
+
+	[[ $occ -eq $max_ports ]]
+	check_err_fail $should_fail $? "Attempt to create $max_ports ports (actual result $occ)"
+
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
new file mode 100755
index 000000000000..00d55b0e98c1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
@@ -0,0 +1,304 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	create_8021ad_vlan_upper_on_top_front_panel_port
+	create_8021ad_vlan_upper_on_top_bridge_port
+	create_8021ad_vlan_upper_on_top_lag
+	create_8021ad_vlan_upper_on_top_bridge
+	create_8021ad_vlan_upper_on_top_8021ad_bridge
+	create_vlan_upper_on_top_8021ad_bridge
+	create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge
+	create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge
+	enslave_front_panel_with_vlan_upper_to_8021ad_bridge
+	enslave_lag_with_vlan_upper_to_8021ad_bridge
+	add_ip_address_to_8021ad_bridge
+	switch_bridge_protocol_from_8021q_to_8021ad
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	sleep 10
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+create_vlan_upper_on_top_of_bridge()
+{
+	RET=0
+
+	local bridge_proto=$1; shift
+	local netdev_proto=$1; shift
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol $bridge_proto vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+
+	ip link set dev br0 up
+	ip link set dev $swp1 master br0
+
+	ip link add name br0.100 link br0 type vlan \
+		protocol $netdev_proto id 100 2>/dev/null
+	check_fail $? "$netdev_proto vlan upper creation on top of an $bridge_proto bridge not rejected"
+
+	ip link add name br0.100 link br0 type vlan \
+		protocol $netdev_proto id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "$netdev_proto vlan upper creation on top of an $bridge_proto bridge rejected without extack"
+
+	log_test "create $netdev_proto vlan upper on top $bridge_proto bridge"
+
+	ip link del dev br0
+}
+
+create_8021ad_vlan_upper_on_top_front_panel_port()
+{
+	RET=0
+
+	ip link add name $swp1.100 link $swp1 type vlan \
+		protocol 802.1ad id 100 2>/dev/null
+	check_fail $? "802.1ad vlan upper creation on top of a front panel not rejected"
+
+	ip link add name $swp1.100 link $swp1 type vlan \
+		protocol 802.1ad id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1ad vlan upper creation on top of a front panel rejected without extack"
+
+	log_test "create 802.1ad vlan upper on top of a front panel"
+}
+
+create_8021ad_vlan_upper_on_top_bridge_port()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+
+	ip link set dev $swp1 master br0
+	ip link set dev br0 up
+
+	ip link add name $swp1.100 link $swp1 type vlan \
+		protocol 802.1ad id 100 2>/dev/null
+	check_fail $? "802.1ad vlan upper creation on top of a bridge port not rejected"
+
+	ip link add name $swp1.100 link $swp1 type vlan \
+		protocol 802.1ad id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1ad vlan upper creation on top of a bridge port rejected without extack"
+
+	log_test "create 802.1ad vlan upper on top of a bridge port"
+
+	ip link del dev br0
+}
+
+create_8021ad_vlan_upper_on_top_lag()
+{
+	RET=0
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master bond1
+
+	ip link add name bond1.100 link bond1 type vlan \
+		protocol 802.1ad id 100 2>/dev/null
+	check_fail $? "802.1ad vlan upper creation on top of a lag not rejected"
+
+	ip link add name bond1.100 link bond1 type vlan \
+		protocol 802.1ad id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1ad vlan upper creation on top of a lag rejected without extack"
+
+	log_test "create 802.1ad vlan upper on top of a lag"
+
+	ip link del dev bond1
+}
+
+create_8021ad_vlan_upper_on_top_bridge()
+{
+	RET=0
+
+	create_vlan_upper_on_top_of_bridge "802.1q" "802.1ad"
+}
+
+create_8021ad_vlan_upper_on_top_8021ad_bridge()
+{
+	RET=0
+
+	create_vlan_upper_on_top_of_bridge "802.1ad" "802.1ad"
+}
+
+create_vlan_upper_on_top_8021ad_bridge()
+{
+	RET=0
+
+	create_vlan_upper_on_top_of_bridge "802.1ad" "802.1q"
+}
+
+create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+
+	ip link set dev $swp1 master br0
+
+	ip link add name $swp1.100 link $swp1 type vlan id 100 2>/dev/null
+	check_fail $? "vlan upper creation on top of front panel enslaved to 802.1ad bridge not rejected"
+
+	ip link add name $swp1.100 link $swp1 type vlan id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "vlan upper creation on top of front panel enslaved to 802.1ad bridge rejected without extack"
+
+	log_test "create vlan upper on top of front panel enslaved to 802.1ad bridge"
+
+	ip link del dev br0
+}
+
+create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master bond1
+	ip link set dev bond1 master br0
+
+	ip link add name bond1.100 link bond1 type vlan id 100 2>/dev/null
+	check_fail $? "vlan upper creation on top of lag enslaved to 802.1ad bridge not rejected"
+
+	ip link add name bond1.100 link bond1 type vlan id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "vlan upper creation on top of lag enslaved to 802.1ad bridge rejected without extack"
+
+	log_test "create vlan upper on top of lag enslaved to 802.1ad bridge"
+
+	ip link del dev bond1
+	ip link del dev br0
+}
+
+enslave_front_panel_with_vlan_upper_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+
+	ip link add name $swp1.100 link $swp1 type vlan id 100
+
+	ip link set dev $swp1 master br0 2>/dev/null
+	check_fail $? "front panel with vlan upper enslavemnt to 802.1ad bridge not rejected"
+
+	ip link set dev $swp1 master br0 2>&1 >/dev/null | grep -q mlxsw_spectrum
+	check_err $? "front panel with vlan upper enslavemnt to 802.1ad bridge rejected without extack"
+
+	log_test "enslave front panel with vlan upper to 802.1ad bridge"
+
+	ip link del dev $swp1.100
+	ip link del dev br0
+}
+
+enslave_lag_with_vlan_upper_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master bond1
+	ip link add name bond1.100 link bond1 type vlan id 100
+
+	ip link set dev bond1 master br0 2>/dev/null
+	check_fail $? "lag with vlan upper enslavemnt to 802.1ad bridge not rejected"
+
+	ip link set dev bond1 master br0 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "lag with vlan upper enslavemnt to 802.1ad bridge rejected without extack"
+
+	log_test "enslave lag with vlan upper to 802.1ad bridge"
+
+	ip link del dev bond1
+	ip link del dev br0
+}
+
+
+add_ip_address_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+
+	ip link set dev br0 up
+	ip link set dev $swp1 master br0
+
+	ip addr add dev br0 192.0.2.17/28 2>/dev/null
+	check_fail $? "IP address addition to 802.1ad bridge not rejected"
+
+	ip addr add dev br0 192.0.2.17/28 2>&1 >/dev/null | grep -q mlxsw_spectrum
+	check_err $? "IP address addition to 802.1ad bridge rejected without extack"
+
+	log_test "IP address addition to 802.1ad bridge"
+
+	ip link del dev br0
+}
+
+switch_bridge_protocol_from_8021q_to_8021ad()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+
+	ip link set dev br0 up
+	ip link set dev $swp1 master br0
+
+	ip link set dev br0 type bridge vlan_protocol 802.1q 2>/dev/null
+	check_fail $? "switching bridge protocol from 802.1q to 802.1ad not rejected"
+
+	log_test "switch bridge protocol"
+
+	ip link del dev br0
+}
+
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
new file mode 100755
index 000000000000..5492fa5550d7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for port-default priority. Non-IP packets ingress $swp1 and are
+# prioritized according to the default priority specified at the port.
+# rx_octets_prio_* counters are used to verify the prioritization.
+#
+# +----------------------------------+
+# | H1                               |
+# |    + $h1                         |
+# |    | 192.0.2.1/28                |
+# +----|-----------------------------+
+#      |
+# +----|-----------------------------+
+# | SW |                             |
+# |    + $swp1                       |
+# |      192.0.2.2/28                |
+# |      dcb app default-prio <prio> |
+# +----------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_defprio
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=2
+: ${HIT_TIMEOUT:=1000} # ms
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	ip addr add dev $swp1 192.0.2.2/28
+}
+
+switch_destroy()
+{
+	dcb app flush dev $swp1 default-prio
+	ip addr del dev $swp1 192.0.2.2/28
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+__test_defprio()
+{
+	local prio_install=$1; shift
+	local prio_observe=$1; shift
+	local key
+	local t1
+	local i
+
+	RET=0
+
+	dcb app add dev $swp1 default-prio $prio_install
+
+	local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
+	mausezahn -q $h1 -d 100m -c 10 -t arp reply
+	t1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((t0 + 10))" \
+		ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
+
+	check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $((t1 - t0))."
+	log_test "Default priority $prio_install/$prio_observe"
+
+	dcb app del dev $swp1 default-prio $prio_install
+}
+
+test_defprio()
+{
+	local prio
+
+	for prio in {0..7}; do
+		__test_defprio $prio $prio
+	done
+
+	dcb app add dev $swp1 default-prio 3
+	__test_defprio 0 3
+	__test_defprio 1 3
+	__test_defprio 2 3
+	__test_defprio 4 4
+	__test_defprio 5 5
+	__test_defprio 6 6
+	__test_defprio 7 7
+	dcb app del dev $swp1 default-prio 3
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
new file mode 100755
index 000000000000..914c63d6318a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization and rewrite. Packets ingress $swp1 with a DSCP
+# tag and are prioritized according to the map at $swp1. They egress $swp2 and
+# the DSCP value is updated to match the map at that interface. The updated DSCP
+# tag is verified at $h2.
+#
+# ICMP responses are produced with the same DSCP tag that arrived at $h2. They
+# go through prioritization at $swp2 and DSCP retagging at $swp1. The tag is
+# verified at $h1--it should match the original tag.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |   192.0.2.2/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |  +-|----------------------------------------------------------------|-+   |
+# |  | + $swp1                       BR                           $swp2 + |   |
+# |  |   dcb dscp-prio 10:0...17:7            dcb dscp-prio 20:0...27:7   |   |
+# |  +--------------------------------------------------------------------+   |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_dscp
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+	tc qdisc add dev $h1 clsact
+	dscp_capture_install $h1 10
+}
+
+h1_destroy()
+{
+	dscp_capture_uninstall $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+	tc qdisc add dev $h2 clsact
+	dscp_capture_install $h2 20
+}
+
+h2_destroy()
+{
+	dscp_capture_uninstall $h2 20
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 addrgenmode none
+	ip link set dev br1 up
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	dcb app add dev $swp1 dscp-prio 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7
+	dcb app add dev $swp2 dscp-prio 20:0 21:1 22:2 23:3 24:4 25:5 26:6 27:7
+}
+
+switch_destroy()
+{
+	dcb app del dev $swp2 dscp-prio 20:0 21:1 22:2 23:3 24:4 25:5 26:6 27:7
+	dcb app del dev $swp1 dscp-prio 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7
+
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+dscp_ping_test()
+{
+	local vrf_name=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local prio=$1; shift
+	local dev_10=$1; shift
+	local dev_20=$1; shift
+	local key
+
+	local dscp_10=$(((prio + 10) << 2))
+	local dscp_20=$(((prio + 20) << 2))
+
+	RET=0
+
+	local -A t0s
+	eval "t0s=($(dscp_fetch_stats $dev_10 10)
+		   $(dscp_fetch_stats $dev_20 20))"
+
+	local ping_timeout=$((PING_TIMEOUT * 5))
+	ip vrf exec $vrf_name \
+	   ${PING} -Q $dscp_10 ${sip:+-I $sip} $dip \
+		   -c 10 -i 0.5 -w $ping_timeout &> /dev/null
+
+	local -A t1s
+	eval "t1s=($(dscp_fetch_stats $dev_10 10)
+		   $(dscp_fetch_stats $dev_20 20))"
+
+	for key in ${!t0s[@]}; do
+		local expect
+		if ((key == prio+10 || key == prio+20)); then
+			expect=10
+		else
+			expect=0
+		fi
+
+		local delta=$((t1s[$key] - t0s[$key]))
+		((expect == delta))
+		check_err $? "DSCP $key: Expected to capture $expect packets, got $delta."
+	done
+
+	log_test "DSCP rewrite: $dscp_10-(prio $prio)-$dscp_20"
+}
+
+test_dscp()
+{
+	local prio
+
+	for prio in {0..7}; do
+		dscp_ping_test v$h1 192.0.2.1 192.0.2.2 $prio $h1 $h2
+	done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
new file mode 100755
index 000000000000..f6c23f84423e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -0,0 +1,269 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization in the router.
+#
+# With ip_forward_update_priority disabled, the packets are expected to keep
+# their DSCP (which in this test uses only values 0..7) intact as they are
+# forwarded by the switch. That is verified at $h2. ICMP responses are formed
+# with the same DSCP as the requests, and likewise pass through the switch
+# intact, which is verified at $h1.
+#
+# With ip_forward_update_priority enabled, router reprioritizes the packets
+# according to the table in reprioritize(). Thus, say, DSCP 7 maps to priority
+# 4, which on egress maps back to DSCP 4. The response packet then gets
+# reprioritized to 6, getting DSCP 6 on egress.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |  192.0.2.18/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |    + $swp1                                                    $swp2 +     |
+# |      192.0.2.2/28                                     192.0.2.17/28       |
+# |      APP=0,5,0 .. 7,5,7                          APP=0,5,0 .. 7,5,7       |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_update
+	test_no_update
+	test_pedit_norewrite
+	test_dscp_leftover
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+reprioritize()
+{
+	local in=$1; shift
+
+	# This is based on rt_tos2priority in include/net/route.h. Assuming 1:1
+	# mapping between priorities and TOS, it yields a new priority for a
+	# packet with ingress priority of $in.
+	local -a reprio=(0 0 2 2 6 6 4 4)
+
+	echo ${reprio[$in]}
+}
+
+zero()
+{
+    echo 0
+}
+
+three()
+{
+    echo 3
+}
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+	tc qdisc add dev $h1 clsact
+	dscp_capture_install $h1 0
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	dscp_capture_uninstall $h1 0
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28
+	tc qdisc add dev $h2 clsact
+	dscp_capture_install $h2 0
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	dscp_capture_uninstall $h2 0
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.18/28
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/28
+	__simple_if_init $swp2 v$swp1 192.0.2.17/28
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+
+	dcb app add dev $swp1 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+	dcb app add dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+switch_destroy()
+{
+	dcb app del dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+	dcb app del dev $swp1 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	__simple_if_fini $swp2 192.0.2.17/28
+	simple_if_fini $swp1 192.0.2.2/28
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	sysctl_set net.ipv4.ip_forward_update_priority 1
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	sysctl_restore net.ipv4.ip_forward_update_priority
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+dscp_ping_test()
+{
+	local vrf_name=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local prio=$1; shift
+	local reprio=$1; shift
+	local dev1=$1; shift
+	local dev2=$1; shift
+	local i
+
+	local prio2=$($reprio $prio)   # ICMP Request egress prio
+	local prio3=$($reprio $prio2)  # ICMP Response egress prio
+
+	local dscp=$((prio << 2))     # ICMP Request ingress DSCP
+	local dscp2=$((prio2 << 2))   # ICMP Request egress DSCP
+	local dscp3=$((prio3 << 2))   # ICMP Response egress DSCP
+
+	RET=0
+
+	eval "local -A dev1_t0s=($(dscp_fetch_stats $dev1 0))"
+	eval "local -A dev2_t0s=($(dscp_fetch_stats $dev2 0))"
+
+	local ping_timeout=$((PING_TIMEOUT * 5))
+	ip vrf exec $vrf_name \
+	   ${PING} -Q $dscp ${sip:+-I $sip} $dip \
+		   -c 10 -i 0.5 -w $ping_timeout &> /dev/null
+
+	eval "local -A dev1_t1s=($(dscp_fetch_stats $dev1 0))"
+	eval "local -A dev2_t1s=($(dscp_fetch_stats $dev2 0))"
+
+	for i in {0..7}; do
+		local dscpi=$((i << 2))
+		local expect2=0
+		local expect3=0
+
+		if ((i == prio2)); then
+			expect2=10
+		fi
+		if ((i == prio3)); then
+			expect3=10
+		fi
+
+		local delta=$((dev2_t1s[$i] - dev2_t0s[$i]))
+		((expect2 == delta))
+		check_err $? "DSCP $dscpi@$dev2: Expected to capture $expect2 packets, got $delta."
+
+		delta=$((dev1_t1s[$i] - dev1_t0s[$i]))
+		((expect3 == delta))
+		check_err $? "DSCP $dscpi@$dev1: Expected to capture $expect3 packets, got $delta."
+	done
+
+	log_test "DSCP rewrite: $dscp-(prio $prio2)-$dscp2-(prio $prio3)-$dscp3"
+}
+
+__test_update()
+{
+	local update=$1; shift
+	local reprio=$1; shift
+	local prio
+
+	sysctl_restore net.ipv4.ip_forward_update_priority
+	sysctl_set net.ipv4.ip_forward_update_priority $update
+
+	for prio in {0..7}; do
+		dscp_ping_test v$h1 192.0.2.1 192.0.2.18 $prio $reprio $h1 $h2
+	done
+}
+
+test_update()
+{
+	echo "Test net.ipv4.ip_forward_update_priority=1"
+	__test_update 1 reprioritize
+}
+
+test_no_update()
+{
+	echo "Test net.ipv4.ip_forward_update_priority=0"
+	__test_update 0 echo
+}
+
+# Test that when DSCP is updated in pedit, the DSCP rewrite is turned off.
+test_pedit_norewrite()
+{
+	echo "Test no DSCP rewrite after DSCP is updated by pedit"
+
+	tc filter add dev $swp1 ingress handle 101 pref 1 prot ip flower \
+	    action pedit ex munge ip dsfield set $((3 << 2)) retain 0xfc \
+	    action skbedit priority 3
+
+	__test_update 0 three
+
+	tc filter del dev $swp1 ingress pref 1
+}
+
+# Test that when the last APP rule is removed, the prio->DSCP map is properly
+# set to zeroes, and that the last APP rule does not stay active in the ASIC.
+test_dscp_leftover()
+{
+	echo "Test that last removed DSCP rule is deconfigured correctly"
+
+	dcb app del dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+	__test_update 0 zero
+
+	dcb app add dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
new file mode 100755
index 000000000000..9ca340c5f3a6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
@@ -0,0 +1,324 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A test for strict prioritization of traffic in the switch. Run two streams of
+# traffic, each through a different ingress port, one tagged with PCP of 1, the
+# other with PCP of 2. Both streams converge at one egress port, where they are
+# assigned TC of, respectively, 1 and 2, with strict priority configured between
+# them. In H3, we expect to see (almost) exclusively the high-priority traffic.
+#
+# Please see qos_mc_aware.sh for an explanation of why we use mausezahn and
+# counters instead of just running iperf3.
+#
+# +---------------------------+                 +-----------------------------+
+# | H1                        |                 |                          H2 |
+# |         $h1.111 +         |                 |         + $h2.222           |
+# |   192.0.2.33/28 |         |                 |         | 192.0.2.65/28     |
+# |   e-qos-map 0:1 |         |                 |         | e-qos-map 0:2     |
+# |                 |         |                 |         |                   |
+# |             $h1 +         |                 |         + $h2               |
+# +-----------------|---------+                 +---------|-------------------+
+#                   |                                     |
+# +-----------------|-------------------------------------|-------------------+
+# |           $swp1 +                                     + $swp2             |
+# |          >1Gbps |                                     | >1Gbps            |
+# | +---------------|-----------+              +----------|----------------+  |
+# | |     $swp1.111 +           |              |          + $swp2.222      |  |
+# | |                     BR111 |       SW     | BR222                     |  |
+# | |     $swp3.111 +           |              |          + $swp3.222      |  |
+# | +---------------|-----------+              +----------|----------------+  |
+# |                 \_____________________________________/                   |
+# |                                    |                                      |
+# |                                    + $swp3                                |
+# |                                    | 1Gbps bottleneck                     |
+# |                                    | ETS: (up n->tc n for n in 0..7)      |
+# |                                    |      strict priority                 |
+# +------------------------------------|--------------------------------------+
+#                                      |
+#                 +--------------------|--------------------+
+#                 |                    + $h3             H3 |
+#                 |                   / \                   |
+#                 |                  /   \                  |
+#                 |         $h3.111 +     + $h3.222         |
+#                 |  192.0.2.34/28          192.0.2.66/28   |
+#                 +-----------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_ets_strict
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+h1_create()
+{
+	adf_simple_if_init $h1
+
+	mtu_set $h1 10000
+	defer mtu_restore $h1
+
+	vlan_create $h1 111 v$h1 192.0.2.33/28
+	defer vlan_destroy $h1 111
+	ip link set dev $h1.111 type vlan egress-qos-map 0:1
+}
+
+h2_create()
+{
+	adf_simple_if_init $h2
+
+	mtu_set $h2 10000
+	defer mtu_restore $h2
+
+	vlan_create $h2 222 v$h2 192.0.2.65/28
+	defer vlan_destroy $h2 222
+	ip link set dev $h2.222 type vlan egress-qos-map 0:2
+}
+
+h3_create()
+{
+	adf_simple_if_init $h3
+
+	mtu_set $h3 10000
+	defer mtu_restore $h3
+
+	vlan_create $h3 111 v$h3 192.0.2.34/28
+	defer vlan_destroy $h3 111
+
+	vlan_create $h3 222 v$h3 192.0.2.66/28
+	defer vlan_destroy $h3 222
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	defer ip link set dev $swp1 down
+
+	mtu_set $swp1 10000
+	defer mtu_restore $swp1
+
+	ip link set dev $swp2 up
+	defer ip link set dev $swp2 down
+
+	mtu_set $swp2 10000
+	defer mtu_restore $swp2
+
+	# prio n -> TC n, strict scheduling
+	lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7
+	defer lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0
+
+	lldptool -T -i $swp3 -V ETS-CFG tsa=$(
+			)"0:strict,"$(
+			)"1:strict,"$(
+			)"2:strict,"$(
+			)"3:strict,"$(
+			)"4:strict,"$(
+			)"5:strict,"$(
+			)"6:strict,"$(
+			)"7:strict"
+	sleep 1
+
+	ip link set dev $swp3 up
+	defer ip link set dev $swp3 down
+
+	mtu_set $swp3 10000
+	defer mtu_restore $swp3
+
+	tc qdisc replace dev $swp3 root handle 101: tbf rate 1gbit \
+		burst 128K limit 1G
+	defer tc qdisc del dev $swp3 root handle 101:
+
+	vlan_create $swp1 111
+	defer vlan_destroy $swp1 111
+
+	vlan_create $swp2 222
+	defer vlan_destroy $swp2 222
+
+	vlan_create $swp3 111
+	defer vlan_destroy $swp3 111
+
+	vlan_create $swp3 222
+	defer vlan_destroy $swp3 222
+
+	ip link add name br111 type bridge vlan_filtering 0
+	defer ip link del dev br111
+	ip link set dev br111 addrgenmode none
+
+	ip link set dev br111 up
+	defer ip link set dev br111 down
+
+	ip link set dev $swp1.111 master br111
+	defer ip link set dev $swp1.111 nomaster
+
+	ip link set dev $swp3.111 master br111
+	defer ip link set dev $swp3.111 nomaster
+
+	ip link add name br222 type bridge vlan_filtering 0
+	defer ip link del dev br222
+	ip link set dev br222 addrgenmode none
+
+	ip link set dev br222 up
+	defer ip link set dev br222 down
+
+	ip link set dev $swp2.222 master br222
+	defer ip link set dev $swp2.222 nomaster
+
+	ip link set dev $swp3.222 master br222
+	defer ip link set dev $swp3.222 nomaster
+
+	# Make sure that ingress quotas are smaller than egress so that there is
+	# room for both streams of traffic to be admitted to shared buffer.
+	devlink_pool_size_thtype_save 0
+	devlink_pool_size_thtype_set 0 dynamic 10000000
+	defer devlink_pool_size_thtype_restore 0
+
+	devlink_pool_size_thtype_save 4
+	devlink_pool_size_thtype_set 4 dynamic 10000000
+	defer devlink_pool_size_thtype_restore 4
+
+	devlink_port_pool_th_save $swp1 0
+	devlink_port_pool_th_set $swp1 0 6
+	defer devlink_port_pool_th_restore $swp1 0
+
+	devlink_tc_bind_pool_th_save $swp1 1 ingress
+	devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6
+	defer devlink_tc_bind_pool_th_restore $swp1 1 ingress
+
+	devlink_port_pool_th_save $swp2 0
+	devlink_port_pool_th_set $swp2 0 6
+	defer devlink_port_pool_th_restore $swp2 0
+
+	devlink_tc_bind_pool_th_save $swp2 2 ingress
+	devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6
+	defer devlink_tc_bind_pool_th_restore $swp2 2 ingress
+
+	devlink_tc_bind_pool_th_save $swp3 1 egress
+	devlink_tc_bind_pool_th_set $swp3 1 egress 4 7
+	defer devlink_tc_bind_pool_th_restore $swp3 1 egress
+
+	devlink_tc_bind_pool_th_save $swp3 2 egress
+	devlink_tc_bind_pool_th_set $swp3 2 egress 4 7
+	defer devlink_tc_bind_pool_th_restore $swp3 2 egress
+
+	devlink_port_pool_th_save $swp3 4
+	devlink_port_pool_th_set $swp3 4 7
+	defer devlink_port_pool_th_restore $swp3 4
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	h3mac=$(mac_get $h3)
+
+	adf_vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.34 " from H1"
+	ping_test $h2 192.0.2.66 " from H2"
+}
+
+rel()
+{
+	local old=$1; shift
+	local new=$1; shift
+
+	bc <<< "
+	    scale=2
+	    ret = 100 * $new / $old
+	    if (ret > 0) { ret } else { 0 }
+	"
+}
+
+__run_hi_measure_rate()
+{
+	local what=$1; shift
+	local -a uc_rate
+
+	start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac
+	defer stop_traffic $!
+
+	uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_2 "$what"))
+	check_err $? "Could not get high enough $what ingress rate"
+
+	echo ${uc_rate[@]}
+}
+
+run_hi_measure_rate()
+{
+	in_defer_scope __run_hi_measure_rate "$@"
+}
+
+test_ets_strict()
+{
+	RET=0
+
+	# Run high-prio traffic on its own.
+	local -a rate_2
+	rate_2=($(run_hi_measure_rate "prio 2"))
+	local rate_2_in=${rate_2[0]}
+	local rate_2_eg=${rate_2[1]}
+
+	# Start low-prio stream.
+	start_traffic $h1.111 192.0.2.33 192.0.2.34 $h3mac
+	defer stop_traffic $!
+
+	local -a rate_1
+	rate_1=($(measure_rate $swp1 $h3 rx_octets_prio_1 "prio 1"))
+	check_err $? "Could not get high enough prio-1 ingress rate"
+	local rate_1_in=${rate_1[0]}
+	local rate_1_eg=${rate_1[1]}
+
+	# High-prio and low-prio on their own should have about the same
+	# throughput.
+	local rel21=$(rel $rate_1_eg $rate_2_eg)
+	check_err $(bc <<< "$rel21 < 95")
+	check_err $(bc <<< "$rel21 > 105")
+
+	# Start the high-prio stream--now both streams run.
+	rate_3=($(run_hi_measure_rate "prio 2+1"))
+	local rate_3_in=${rate_3[0]}
+	local rate_3_eg=${rate_3[1]}
+
+	# High-prio should have about the same throughput whether or not
+	# low-prio is in the system.
+	local rel32=$(rel $rate_2_eg $rate_3_eg)
+	check_err $(bc <<< "$rel32 < 95")
+
+	log_test "strict priority"
+	echo "Ingress to switch:"
+	echo "  p1 in rate            $(humanize $rate_1_in)"
+	echo "  p2 in rate            $(humanize $rate_2_in)"
+	echo "  p2 in rate w/ p1      $(humanize $rate_3_in)"
+	echo "Egress from switch:"
+	echo "  p1 eg rate            $(humanize $rate_1_eg)"
+	echo "  p2 eg rate            $(humanize $rate_2_eg) ($rel21% of p1)"
+	echo "  p2 eg rate w/ p1      $(humanize $rate_3_eg) ($rel32% of p2)"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
new file mode 100755
index 000000000000..88162b4027c0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -0,0 +1,379 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	test_defaults
+	test_dcb_ets
+	test_mtu
+	test_pfc
+	test_int_buf
+	test_tc_priomap
+	test_tc_mtu
+	test_tc_sizes
+	test_tc_int_buf
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+swp=$NETIF_NO_CABLE
+
+cleanup()
+{
+	pre_cleanup
+}
+
+get_prio_pg()
+{
+	# Produces a string of numbers "<B0> <B1> ... <B7> ", where BX is number
+	# of buffer that priority X is mapped to.
+	dcb -j buffer show dev $swp |
+		jq -r '[.prio_buffer | .[] | tostring + " "] | add'
+}
+
+get_prio_pfc()
+{
+	# Produces a string of numbers "<P0> <P1> ... <P7> ", where PX denotes
+	# whether priority X has PFC enabled (the value is 1) or disabled (0).
+	dcb -j pfc show dev $swp |
+		jq -r '[.prio_pfc | .[] | if . then "1 " else "0 " end] | add'
+}
+
+get_prio_tc()
+{
+	# Produces a string of numbers "<T0> <T1> ... <T7> ", where TC is number
+	# of TC that priority X is mapped to.
+	dcb -j ets show dev $swp |
+		jq -r '[.prio_tc | .[] | tostring + " "] | add'
+}
+
+get_buf_size()
+{
+	local idx=$1; shift
+
+	dcb -j buffer show dev $swp | jq ".buffer_size[$idx]"
+}
+
+get_tot_size()
+{
+	dcb -j buffer show dev $swp | jq '.total_size'
+}
+
+check_prio_pg()
+{
+	local expect=$1; shift
+
+	local current=$(get_prio_pg)
+	test "$current" = "$expect"
+	check_err $? "prio2buffer is '$current', expected '$expect'"
+}
+
+check_prio_pfc()
+{
+	local expect=$1; shift
+
+	local current=$(get_prio_pfc)
+	test "$current" = "$expect"
+	check_err $? "prio PFC is '$current', expected '$expect'"
+}
+
+check_prio_tc()
+{
+	local expect=$1; shift
+
+	local current=$(get_prio_tc)
+	test "$current" = "$expect"
+	check_err $? "prio_tc is '$current', expected '$expect'"
+}
+
+__check_buf_size()
+{
+	local idx=$1; shift
+	local expr=$1; shift
+	local what=$1; shift
+
+	local current=$(get_buf_size $idx)
+	((current $expr))
+	check_err $? "${what}buffer $idx size is '$current', expected '$expr'"
+	echo $current
+}
+
+check_buf_size()
+{
+	__check_buf_size "$@" > /dev/null
+}
+
+test_defaults()
+{
+	RET=0
+
+	check_prio_pg "0 0 0 0 0 0 0 0 "
+	check_prio_tc "0 0 0 0 0 0 0 0 "
+	check_prio_pfc "0 0 0 0 0 0 0 0 "
+
+	log_test "Default headroom configuration"
+}
+
+test_dcb_ets()
+{
+	RET=0
+
+	dcb ets set dev $swp prio-tc 0:0 1:2 2:4 3:6 4:1 5:3 6:5 7:7
+
+	check_prio_pg "0 2 4 6 1 3 5 7 "
+	check_prio_tc "0 2 4 6 1 3 5 7 "
+	check_prio_pfc "0 0 0 0 0 0 0 0 "
+
+	dcb ets set dev $swp prio-tc all:0
+
+	check_prio_pg "0 0 0 0 0 0 0 0 "
+	check_prio_tc "0 0 0 0 0 0 0 0 "
+
+	dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 2>/dev/null
+	check_fail $? "prio2buffer accepted in DCB mode"
+
+	log_test "Configuring headroom through ETS"
+}
+
+test_mtu()
+{
+	local what=$1; shift
+	local buf0size_2
+	local buf0size
+
+	RET=0
+	buf0size=$(__check_buf_size 0 "> 0")
+
+	mtu_set $swp 3000
+	buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ")
+	mtu_restore $swp
+
+	mtu_set $swp 6000
+	check_buf_size 0 "> $buf0size_2" "MTU 6000: "
+	mtu_restore $swp
+
+	check_buf_size 0 "== $buf0size"
+
+	log_test "${what}MTU impacts buffer size"
+}
+
+test_tc_mtu()
+{
+	# In TC mode, MTU still impacts the threshold below which a buffer is
+	# not permitted to go.
+
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+	test_mtu "TC: "
+	tc qdisc delete dev $swp root
+}
+
+test_pfc()
+{
+	RET=0
+
+	dcb ets set dev $swp prio-tc all:0 5:1 6:2 7:3
+
+	local buf0size=$(get_buf_size 0)
+	local buf1size=$(get_buf_size 1)
+	local buf2size=$(get_buf_size 2)
+	local buf3size=$(get_buf_size 3)
+	check_buf_size 0 "> 0"
+	check_buf_size 1 "> 0"
+	check_buf_size 2 "> 0"
+	check_buf_size 3 "> 0"
+	check_buf_size 4 "== 0"
+	check_buf_size 5 "== 0"
+	check_buf_size 6 "== 0"
+	check_buf_size 7 "== 0"
+
+	log_test "Buffer size sans PFC"
+
+	RET=0
+
+	dcb pfc set dev $swp prio-pfc all:off 5:on 6:on 7:on delay 0
+
+	check_prio_pg "0 0 0 0 0 1 2 3 "
+	check_prio_pfc "0 0 0 0 0 1 1 1 "
+	check_buf_size 0 "== $buf0size"
+	check_buf_size 1 "> $buf1size"
+	check_buf_size 2 "> $buf2size"
+	check_buf_size 3 "> $buf3size"
+
+	local buf1size=$(get_buf_size 1)
+	check_buf_size 2 "== $buf1size"
+	check_buf_size 3 "== $buf1size"
+
+	log_test "PFC: Cable length 0"
+
+	RET=0
+
+	dcb pfc set dev $swp delay 1000
+
+	check_buf_size 0 "== $buf0size"
+	check_buf_size 1 "> $buf1size"
+	check_buf_size 2 "> $buf1size"
+	check_buf_size 3 "> $buf1size"
+
+	log_test "PFC: Cable length 1000"
+
+	RET=0
+
+	dcb pfc set dev $swp prio-pfc all:off delay 0
+	dcb ets set dev $swp prio-tc all:0
+
+	check_prio_pg "0 0 0 0 0 0 0 0 "
+	check_prio_tc "0 0 0 0 0 0 0 0 "
+	check_buf_size 0 "> 0"
+	check_buf_size 1 "== 0"
+	check_buf_size 2 "== 0"
+	check_buf_size 3 "== 0"
+	check_buf_size 4 "== 0"
+	check_buf_size 5 "== 0"
+	check_buf_size 6 "== 0"
+	check_buf_size 7 "== 0"
+
+	log_test "PFC: Restore defaults"
+}
+
+test_tc_priomap()
+{
+	RET=0
+
+	dcb ets set dev $swp prio-tc 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+	check_prio_pg "0 1 2 3 4 5 6 7 "
+
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+	check_prio_pg "0 0 0 0 0 0 0 0 "
+
+	dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6
+	check_prio_pg "1 3 5 7 0 2 4 6 "
+
+	tc qdisc delete dev $swp root
+	check_prio_pg "0 1 2 3 4 5 6 7 "
+
+	# Clean up.
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+	dcb buffer set dev $swp prio-buffer all:0
+	tc qdisc delete dev $swp root
+	dcb ets set dev $swp prio-tc all:0
+
+	log_test "TC: priomap"
+}
+
+test_tc_sizes()
+{
+	local cell_size=$(devlink_cell_size_get)
+	local size=$((cell_size * 1000))
+
+	RET=0
+
+	dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
+	check_fail $? "buffer_size should fail before qdisc is added"
+
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+
+	dcb buffer set dev $swp buffer-size all:0 0:$size
+	check_err $? "buffer_size should pass after qdisc is added"
+	check_buf_size 0 "== $size" "set size: "
+
+	mtu_set $swp 6000
+	check_buf_size 0 "== $size" "set MTU: "
+	mtu_restore $swp
+
+	dcb buffer set dev $swp buffer-size all:0
+
+	# After replacing the qdisc for the same kind, buffer_size still has to
+	# work.
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1M
+
+	dcb buffer set dev $swp buffer-size all:0 0:$size
+	check_buf_size 0 "== $size" "post replace, set size: "
+
+	dcb buffer set dev $swp buffer-size all:0
+
+	# Likewise after replacing for a different kind.
+	tc qdisc replace dev $swp root handle 2: prio bands 8
+
+	dcb buffer set dev $swp buffer-size all:0 0:$size
+	check_buf_size 0 "== $size" "post replace different kind, set size: "
+
+	tc qdisc delete dev $swp root
+
+	dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
+	check_fail $? "buffer_size should fail after qdisc is deleted"
+
+	log_test "TC: buffer size"
+}
+
+test_int_buf()
+{
+	local what=$1; shift
+
+	RET=0
+
+	local buf0size=$(get_buf_size 0)
+	local tot_size=$(get_tot_size)
+
+	# Size of internal buffer and buffer 9.
+	local dsize=$((tot_size - buf0size))
+
+	tc qdisc add dev $swp clsact
+	tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp
+
+	local buf0size_2=$(get_buf_size 0)
+	local tot_size_2=$(get_tot_size)
+	local dsize_2=$((tot_size_2 - buf0size_2))
+
+	# Egress SPAN should have added to the "invisible" buffer configuration.
+	((dsize_2 > dsize))
+	check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'"
+
+	mtu_set $swp 3000
+
+	local buf0size_3=$(get_buf_size 0)
+	local tot_size_3=$(get_tot_size)
+	local dsize_3=$((tot_size_3 - buf0size_3))
+
+	# MTU change might change buffer 0, which will show at total, but the
+	# hidden buffers should stay the same size.
+	((dsize_3 == dsize_2))
+	check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'"
+
+	mtu_restore $swp
+	tc qdisc del dev $swp clsact
+
+	# After SPAN removal, hidden buffers should be back to the original sizes.
+	local buf0size_4=$(get_buf_size 0)
+	local tot_size_4=$(get_tot_size)
+	local dsize_4=$((tot_size_4 - buf0size_4))
+	((dsize_4 == dsize))
+	check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'"
+
+	log_test "${what}internal buffer size"
+}
+
+test_tc_int_buf()
+{
+	local cell_size=$(devlink_cell_size_get)
+	local size=$((cell_size * 1000))
+
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+	test_int_buf "TC: "
+
+	dcb buffer set dev $swp buffer-size all:0 0:$size
+	test_int_buf "TC+buffsize: "
+
+	dcb buffer set dev $swp buffer-size all:0
+	tc qdisc delete dev $swp root
+}
+
+bail_on_lldpad "configure DCB" "configure Qdiscs"
+
+trap cleanup EXIT
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
new file mode 100644
index 000000000000..5ad092b9bf10
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+
+check_rate()
+{
+	local rate=$1; shift
+	local min=$1; shift
+	local what=$1; shift
+
+	if ((rate > min)); then
+		return 0
+	fi
+
+	echo "$what $(humanize $ir) < $(humanize $min)" > /dev/stderr
+	return 1
+}
+
+measure_rate()
+{
+	local sw_in=$1; shift   # Where the traffic ingresses the switch
+	local host_in=$1; shift # Where it ingresses another host
+	local counter=$1; shift # Counter to use for measurement
+	local what=$1; shift
+
+	local interval=10
+	local i
+	local ret=0
+
+	# Dips in performance might cause momentary ingress rate to drop below
+	# 1Gbps. That wouldn't saturate egress and MC would thus get through,
+	# seemingly winning bandwidth on account of UC. Demand at least 2Gbps
+	# average ingress rate to somewhat mitigate this.
+	local min_ingress=2147483648
+
+	for i in {5..0}; do
+		local t0=$(ethtool_stats_get $host_in $counter)
+		local u0=$(ethtool_stats_get $sw_in $counter)
+		sleep $interval
+		local t1=$(ethtool_stats_get $host_in $counter)
+		local u1=$(ethtool_stats_get $sw_in $counter)
+
+		local ir=$(rate $u0 $u1 $interval)
+		local er=$(rate $t0 $t1 $interval)
+
+		if check_rate $ir $min_ingress "$what ingress rate"; then
+			break
+		fi
+
+		# Fail the test if we can't get the throughput.
+		if ((i == 0)); then
+			ret=1
+		fi
+	done
+
+	echo $ir $er
+	return $ret
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh
new file mode 100755
index 000000000000..a4a25637fe2a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh
@@ -0,0 +1,243 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test sends many small packets (size is less than cell size) through the
+# switch. A shaper is used in $swp2, so the traffic is limited there. Packets
+# are queued till they will be sent.
+#
+# The idea is to verify that the switch can handle at least 85% of maximum
+# supported descrpitors by hardware. Then, we verify that the driver configures
+# firmware to allow infinite size of egress descriptor pool, and does not use a
+# lower limitation. Increase the size of the relevant pools such that the pool's
+# size does not limit the traffic.
+
+# +-----------------------+
+# | H1                    |
+# |   + $h1.111           |
+# |   | 192.0.2.33/28     |
+# |   |                   |
+# |   + $h1               |
+# +---|-------------------+
+#     |
+# +---|-----------------------------+
+# |   + $swp1                       |
+# |   | iPOOL1                      |
+# |   |                             |
+# | +-|------------------------+    |
+# | | + $swp1.111              |    |
+# | |                          |    |
+# | | BR1                      |    |
+# | |                          |    |
+# | | + $swp2.111              |    |
+# | +-|------------------------+    |
+# |   |                             |
+# |   + $swp2                       |
+# |   | ePOOL6                      |
+# |   | 1mbit                       |
+# +---+-----------------------------+
+#     |
+# +---|-------------------+
+# |   + $h2            H2 |
+# |   |                   |
+# |   + $h2.111           |
+# |     192.0.2.34/28     |
+# +-----------------------+
+#
+
+ALL_TESTS="
+	ping_ipv4
+	max_descriptors
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
+
+MAX_POOL_SIZE=$(devlink_pool_size_get)
+SHAPER_RATE=1mbit
+
+# The current TBF qdisc interface does not allow us to configure the shaper to
+# flat zero. The ASIC shaper is guaranteed to work with a granularity of
+# 200Mbps. On Spectrum-2, writing a value close to zero instead of zero works
+# well, but the performance on Spectrum-1 is unpredictable. Thus, do not run the
+# test on Spectrum-1.
+mlxsw_only_on_spectrum 2+ || exit
+
+h1_create()
+{
+	adf_simple_if_init $h1
+
+	vlan_create $h1 111 v$h1 192.0.2.33/28
+	defer vlan_destroy $h1 111
+	ip link set dev $h1.111 type vlan egress-qos-map 0:1
+}
+
+h2_create()
+{
+	adf_simple_if_init $h2
+
+	vlan_create $h2 111 v$h2 192.0.2.34/28
+	defer vlan_destroy $h2 111
+}
+
+switch_create()
+{
+	# pools
+	# -----
+	# devlink_pool_size_thtype_restore needs to be done first so that we can
+	# reset the various limits to values that are only valid for the
+	# original static / dynamic setting.
+
+	devlink_pool_size_thtype_save 1
+	devlink_pool_size_thtype_set 1 dynamic $MAX_POOL_SIZE
+	defer_prio devlink_pool_size_thtype_restore 1
+
+	devlink_pool_size_thtype_save 6
+	devlink_pool_size_thtype_set 6 static $MAX_POOL_SIZE
+	defer_prio devlink_pool_size_thtype_restore 6
+
+	# $swp1
+	# -----
+
+	ip link set dev $swp1 up
+	defer ip link set dev $swp1 down
+
+	vlan_create $swp1 111
+	defer vlan_destroy $swp1 111
+	ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_save $swp1 1
+	devlink_port_pool_th_set $swp1 1 16
+	defer devlink_tc_bind_pool_th_restore $swp1 1 ingress
+
+	devlink_tc_bind_pool_th_save $swp1 1 ingress
+	devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16
+	defer devlink_port_pool_th_restore $swp1 1
+
+	tc qdisc replace dev $swp1 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	defer tc qdisc del dev $swp1 root
+
+	dcb buffer set dev $swp1 prio-buffer all:0 1:1
+	defer dcb buffer set dev $swp1 prio-buffer all:0
+
+	# $swp2
+	# -----
+
+	ip link set dev $swp2 up
+	defer ip link set dev $swp2 down
+
+	vlan_create $swp2 111
+	defer vlan_destroy $swp2 111
+	ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_save $swp2 6
+	devlink_port_pool_th_set $swp2 6 $MAX_POOL_SIZE
+	defer devlink_tc_bind_pool_th_restore $swp2 1 egress
+
+	devlink_tc_bind_pool_th_save $swp2 1 egress
+	devlink_tc_bind_pool_th_set $swp2 1 egress 6 $MAX_POOL_SIZE
+	defer devlink_port_pool_th_restore $swp2 6
+
+	tc qdisc replace dev $swp2 root handle 1: tbf rate $SHAPER_RATE \
+		burst 128K limit 500M
+	defer tc qdisc del dev $swp2 root
+
+	tc qdisc replace dev $swp2 parent 1:1 handle 11: \
+		ets bands 8 strict 8 priomap 7 6
+	defer tc qdisc del dev $swp2 parent 1:1 handle 11:
+
+	# bridge
+	# ------
+
+	ip link add name br1 type bridge vlan_filtering 0
+	defer ip link del dev br1
+
+	ip link set dev $swp1.111 master br1
+	defer ip link set dev $swp1.111 nomaster
+
+	ip link set dev br1 up
+	defer ip link set dev br1 down
+
+	ip link set dev $swp2.111 master br1
+	defer ip link set dev $swp2.111 nomaster
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h2mac=$(mac_get $h2)
+
+	adf_vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.34 " h1->h2"
+}
+
+percentage_used()
+{
+	local num_packets=$1; shift
+	local max_packets=$1; shift
+
+	bc <<< "
+	    scale=2
+	    100 * $num_packets / $max_packets
+	"
+}
+
+max_descriptors()
+{
+	local cell_size=$(devlink_cell_size_get)
+	local exp_perc_used=85
+	local max_descriptors
+	local pktsize=30
+
+	RET=0
+
+	max_descriptors=$(mlxsw_max_descriptors_get) || exit 1
+
+	local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+
+	log_info "Send many small packets, packet size = $pktsize bytes"
+	start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac
+	defer stop_traffic $!
+
+	# Sleep to wait for congestion.
+	sleep 5
+
+	local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+	((d1 == d0))
+	check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))"
+
+	# Check how many packets the switch can handle, the limitation is
+	# maximum descriptors.
+	local pkts_bytes=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1)
+	local pkts_num=$((pkts_bytes / cell_size))
+	local perc_used=$(percentage_used $pkts_num $max_descriptors)
+
+	check_err $(bc <<< "$perc_used < $exp_perc_used") \
+		"Expected > $exp_perc_used% of descriptors, handle $perc_used%"
+
+	log_test "Maximum descriptors usage. The percentage used is $perc_used%"
+}
+
+trap cleanup EXIT
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
new file mode 100755
index 000000000000..d8f8ae8533cd
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -0,0 +1,330 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A test for switch behavior under MC overload. An issue in Spectrum chips
+# causes throughput of UC traffic to drop severely when a switch is under heavy
+# MC load. This issue can be overcome by putting the switch to MC-aware mode.
+# This test verifies that UC performance stays intact even as the switch is
+# under MC flood, and therefore that the MC-aware mode is enabled and correctly
+# configured.
+#
+# Because mlxsw throttles CPU port, the traffic can't actually reach userspace
+# at full speed. That makes it impossible to use iperf3 to simply measure the
+# throughput, because many packets (that reach $h3) don't get to the kernel at
+# all even in UDP mode (the situation is even worse in TCP mode, where one can't
+# hope to see more than a couple Mbps).
+#
+# So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
+# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
+# each gets a different priority and we can use per-prio ethtool counters to
+# measure the throughput. In order to avoid prioritizing unicast traffic, prio
+# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
+# thus TC 0).
+#
+# Mausezahn can't actually saturate the links unless it's using large frames.
+# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
+# multicast traffic uses 8K frames.
+#
+# +---------------------------+            +----------------------------------+
+# | H1                        |            |                               H2 |
+# |                           |            |  unicast --> + $h2.111           |
+# |                 multicast |            |  traffic     | 192.0.2.129/28    |
+# |                 traffic   |            |              | e-qos-map 0:1     |
+# |           $h1 + <-----    |            |              |                   |
+# | 192.0.2.65/28 |           |            |              + $h2               |
+# +---------------|-----------+            +--------------|-------------------+
+#                 |                                       |
+# +---------------|---------------------------------------|-------------------+
+# |         $swp1 +                                       + $swp2             |
+# |        >1Gbps |                                       | >1Gbps            |
+# | +-------------|------+                     +----------|----------------+  |
+# | |     $swp1.1 +      |                     |          + $swp2.111      |  |
+# | |                BR1 |             SW      | BR111                     |  |
+# | |     $swp3.1 +      |                     |          + $swp3.111      |  |
+# | +-------------|------+                     +----------|----------------+  |
+# |               \_______________________________________/                   |
+# |                                    |                                      |
+# |                                    + $swp3                                |
+# |                                    | 1Gbps bottleneck                     |
+# |                                    | prio qdisc: {0..7} -> 7              |
+# +------------------------------------|--------------------------------------+
+#                                      |
+#                                   +--|-----------------+
+#                                   |  + $h3          H3 |
+#                                   |  | 192.0.2.66/28   |
+#                                   |  |                 |
+#                                   |  + $h3.111         |
+#                                   |    192.0.2.130/28  |
+#                                   +--------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_mc_aware
+	test_uc_aware
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+h1_create()
+{
+	adf_simple_if_init $h1 192.0.2.65/28
+
+	mtu_set $h1 10000
+	defer mtu_restore $h1
+}
+
+h2_create()
+{
+	adf_simple_if_init $h2
+
+	mtu_set $h2 10000
+	defer mtu_restore $h2
+
+	vlan_create $h2 111 v$h2 192.0.2.129/28
+	defer vlan_destroy $h2 111
+	ip link set dev $h2.111 type vlan egress-qos-map 0:1
+}
+
+h3_create()
+{
+	adf_simple_if_init $h3 192.0.2.66/28
+
+	mtu_set $h3 10000
+	defer mtu_restore $h3
+
+	vlan_create $h3 111 v$h3 192.0.2.130/28
+	defer vlan_destroy $h3 111
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	defer ip link set dev $swp1 down
+
+	mtu_set $swp1 10000
+	defer mtu_restore $swp1
+
+	ip link set dev $swp2 up
+	defer ip link set dev $swp2 down
+
+	mtu_set $swp2 10000
+	defer mtu_restore $swp2
+
+	ip link set dev $swp3 up
+	defer ip link set dev $swp3 down
+
+	mtu_set $swp3 10000
+	defer mtu_restore $swp3
+
+	vlan_create $swp2 111
+	defer vlan_destroy $swp2 111
+
+	vlan_create $swp3 111
+	defer vlan_destroy $swp3 111
+
+	tc qdisc replace dev $swp3 root handle 3: tbf rate 1gbit \
+		burst 128K limit 1G
+	defer tc qdisc del dev $swp3 root handle 3:
+
+	tc qdisc replace dev $swp3 parent 3:3 handle 33: \
+		prio bands 8 priomap 7 7 7 7 7 7 7 7
+	defer tc qdisc del dev $swp3 parent 3:3 handle 33:
+
+	ip link add name br1 type bridge vlan_filtering 0
+	defer ip link del dev br1
+	ip link set dev br1 addrgenmode none
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	defer ip link set dev $swp1 nomaster
+
+	ip link set dev $swp3 master br1
+	defer ip link set dev $swp3 nomaster
+
+	ip link add name br111 type bridge vlan_filtering 0
+	defer ip link del dev br111
+	ip link set dev br111 addrgenmode none
+	ip link set dev br111 up
+
+	ip link set dev $swp2.111 master br111
+	defer ip link set dev $swp2.111 nomaster
+
+	ip link set dev $swp3.111 master br111
+	defer ip link set dev $swp3.111 nomaster
+
+	# Make sure that ingress quotas are smaller than egress so that there is
+	# room for both streams of traffic to be admitted to shared buffer.
+	devlink_port_pool_th_save $swp1 0
+	devlink_port_pool_th_set $swp1 0 5
+	defer devlink_port_pool_th_restore $swp1 0
+
+	devlink_tc_bind_pool_th_save $swp1 0 ingress
+	devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
+	defer devlink_tc_bind_pool_th_restore $swp1 0 ingress
+
+	devlink_port_pool_th_save $swp2 0
+	devlink_port_pool_th_set $swp2 0 5
+	defer devlink_port_pool_th_restore $swp2 0
+
+	devlink_tc_bind_pool_th_save $swp2 1 ingress
+	devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
+	defer devlink_tc_bind_pool_th_restore $swp2 1 ingress
+
+	devlink_port_pool_th_save $swp3 4
+	devlink_port_pool_th_set $swp3 4 12
+	defer devlink_port_pool_th_restore $swp3 4
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	h3mac=$(mac_get $h3)
+
+	adf_vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+ping_ipv4()
+{
+	ping_test $h2 192.0.2.130
+}
+
+__run_uc_measure_rate()
+{
+	local what=$1; shift
+	local -a uc_rate
+
+	start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
+	defer stop_traffic $!
+
+	uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "$what"))
+	check_err $? "Could not get high enough $what ingress rate"
+
+	echo ${uc_rate[@]}
+}
+
+run_uc_measure_rate()
+{
+	in_defer_scope __run_uc_measure_rate "$@"
+}
+
+test_mc_aware()
+{
+	RET=0
+
+	local -a uc_rate=($(run_uc_measure_rate "UC-only"))
+	local ucth1=${uc_rate[1]}
+
+	start_traffic $h1 192.0.2.65 bc bc
+	defer stop_traffic $!
+
+	local d0=$(date +%s)
+	local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
+	local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
+
+	local -a uc_rate_2=($(run_uc_measure_rate "UC+MC"))
+	local ucth2=${uc_rate_2[1]}
+
+	local d1=$(date +%s)
+	local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
+	local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
+
+	local deg=$(bc <<< "
+			scale=2
+			ret = 100 * ($ucth1 - $ucth2) / $ucth1
+			if (ret > 0) { ret } else { 0 }
+		    ")
+
+	# Minimum shaper of 200Mbps on MC TCs should cause about 20% of
+	# degradation on 1Gbps link.
+	check_err $(bc <<< "$deg < 15") "Minimum shaper not in effect"
+	check_err $(bc <<< "$deg > 25") "MC traffic degrades UC performance too much"
+
+	local interval=$((d1 - d0))
+	local mc_ir=$(rate $u0 $u1 $interval)
+	local mc_er=$(rate $t0 $t1 $interval)
+
+	log_test "UC performance under MC overload"
+
+	echo "UC-only throughput  $(humanize $ucth1)"
+	echo "UC+MC throughput    $(humanize $ucth2)"
+	echo "Degradation         $deg %"
+	echo
+	echo "Full report:"
+	echo "  UC only:"
+	echo "    ingress UC throughput $(humanize ${uc_rate[0]})"
+	echo "    egress UC throughput  $(humanize ${uc_rate[1]})"
+	echo "  UC+MC:"
+	echo "    ingress UC throughput $(humanize ${uc_rate_2[0]})"
+	echo "    egress UC throughput  $(humanize ${uc_rate_2[1]})"
+	echo "    ingress MC throughput $(humanize $mc_ir)"
+	echo "    egress MC throughput  $(humanize $mc_er)"
+	echo
+}
+
+test_uc_aware()
+{
+	RET=0
+
+	start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
+	defer stop_traffic $!
+
+	local d0=$(date +%s)
+	local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
+	local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+	sleep 1
+
+	local attempts=50
+	local passes=0
+	local i
+
+	for ((i = 0; i < attempts; ++i)); do
+		if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 1; then
+			((passes++))
+		fi
+
+		sleep 0.1
+	done
+
+	local d1=$(date +%s)
+	local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
+	local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+
+	local interval=$((d1 - d0))
+	local uc_ir=$(rate $u0 $u1 $interval)
+	local uc_er=$(rate $t0 $t1 $interval)
+
+	((attempts == passes))
+	check_err $?
+
+	log_test "MC performance under UC overload"
+	echo "    ingress UC throughput $(humanize ${uc_ir})"
+	echo "    egress UC throughput  $(humanize ${uc_er})"
+	echo "    sent $attempts BC ARPs, got $passes responses"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
new file mode 100755
index 000000000000..0f0f4f05807c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -0,0 +1,417 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority
+# of 1. This stream is consistently prioritized as priority 1, is put to PG
+# buffer 1, and scheduled at TC 1.
+#
+# - the stream first ingresses through $swp1, where it is forwarded to $swp3
+#
+# - then it ingresses through $swp4. Here it is put to a lossless buffer and put
+#   to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is
+#   shaped, and thus the PFC pool eventually fills, therefore the headroom
+#   fills, and $swp3 is paused.
+#
+# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at
+#   a pool ("overflow pool"). The overflow pool needs to be large enough to
+#   contain the whole burst.
+#
+# - eventually the PFC pool gets some traffic out, headroom therefore gets some
+#   traffic to the pool, and $swp3 is unpaused again. This way the traffic is
+#   gradually forwarded from the overflow pool, through the PFC pool, out of
+#   $swp2, and eventually to $h2.
+#
+# - if PFC works, all lossless flow packets that ingress through $swp1 should
+#   also be seen ingressing $h2. If it doesn't, there will be drops due to
+#   discrepancy between the speeds of $swp1 and $h2.
+#
+# - it should all play out relatively quickly, so that SLL and HLL will not
+#   cause drops.
+#
+# +-----------------------+
+# | H1                    |
+# |   + $h1.111           |
+# |   | 192.0.2.33/28     |
+# |   |                   |
+# |   + $h1               |
+# +---|-------------------+  +--------------------+
+#     |                      |                    |
+# +---|----------------------|--------------------|---------------------------+
+# |   + $swp1          $swp3 +                    + $swp4                     |
+# |   | iPOOL1        iPOOL0 |                    | iPOOL2                    |
+# |   | ePOOL4        ePOOL5 |                    | ePOOL4                    |
+# |   |        PFC:enabled=1 |                    | PFC:enabled=1             |
+# | +-|----------------------|-+                +-|------------------------+  |
+# | | + $swp1.111  $swp3.111 + |                | + $swp4.111              |  |
+# | |                          |                |                          |  |
+# | | BR1                      |                | BR2                      |  |
+# | |                          |                |                          |  |
+# | |                          |                |         + $swp2.111      |  |
+# | +--------------------------+                +---------|----------------+  |
+# |                                                       |                   |
+# | iPOOL0: 500KB dynamic                                 |                   |
+# | iPOOL1: 10MB static                                   |                   |
+# | iPOOL2: 1MB static                                    + $swp2             |
+# | ePOOL4: 500KB dynamic                                 | iPOOL0            |
+# | ePOOL5: 10MB static                                   | ePOOL6            |
+# | ePOOL6: "infinite" static                             | 200Mbps shaper    |
+# +-------------------------------------------------------|-------------------+
+#                                                         |
+#                                                     +---|-------------------+
+#                                                     |   + $h2            H2 |
+#                                                     |   |                   |
+#                                                     |   + $h2.111           |
+#                                                     |     192.0.2.34/28     |
+#                                                     +-----------------------+
+#
+# iPOOL0+ePOOL4 is a helper pool for control traffic etc.
+# iPOOL1+ePOOL5 are overflow pools.
+# iPOOL2+ePOOL6 are PFC pools.
+
+ALL_TESTS="
+	ping_ipv4
+	test_qos_pfc
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+_1KB=1000
+_100KB=$((100 * _1KB))
+_500KB=$((500 * _1KB))
+_1MB=$((1000 * _1KB))
+_10MB=$((10 * _1MB))
+
+h1_create()
+{
+	simple_if_init $h1
+	mtu_set $h1 10000
+
+	vlan_create $h1 111 v$h1 192.0.2.33/28
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 111
+
+	mtu_restore $h1
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	mtu_set $h2 10000
+
+	vlan_create $h2 111 v$h2 192.0.2.34/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 111
+
+	mtu_restore $h2
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	local lanes_swp4
+	local pg1_size
+
+	# pools
+	# -----
+
+	devlink_pool_size_thtype_save 0
+	devlink_pool_size_thtype_save 4
+	devlink_pool_size_thtype_save 1
+	devlink_pool_size_thtype_save 5
+	devlink_pool_size_thtype_save 2
+	devlink_pool_size_thtype_save 6
+
+	devlink_port_pool_th_save $swp1 1
+	devlink_port_pool_th_save $swp2 6
+	devlink_port_pool_th_save $swp3 5
+	devlink_port_pool_th_save $swp4 2
+
+	devlink_tc_bind_pool_th_save $swp1 1 ingress
+	devlink_tc_bind_pool_th_save $swp2 1 egress
+	devlink_tc_bind_pool_th_save $swp3 1 egress
+	devlink_tc_bind_pool_th_save $swp4 1 ingress
+
+	# Control traffic pools. Just reduce the size. Keep them dynamic so that
+	# we don't need to change all the uninteresting quotas.
+	devlink_pool_size_thtype_set 0 dynamic $_500KB
+	devlink_pool_size_thtype_set 4 dynamic $_500KB
+
+	# Overflow pools.
+	devlink_pool_size_thtype_set 1 static $_10MB
+	devlink_pool_size_thtype_set 5 static $_10MB
+
+	# PFC pools. As per the writ, the size of egress PFC pool should be
+	# infinice, but actually it just needs to be large enough to not matter
+	# in practice, so reuse the 10MB limit.
+	devlink_pool_size_thtype_set 2 static $_1MB
+	devlink_pool_size_thtype_set 6 static $_10MB
+
+	# $swp1
+	# -----
+
+	ip link set dev $swp1 up
+	mtu_set $swp1 10000
+	vlan_create $swp1 111
+	ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp1 1 $_10MB
+	devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB
+
+	# Configure qdisc so that we can configure PG and therefore pool
+	# assignment.
+	tc qdisc replace dev $swp1 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	dcb buffer set dev $swp1 prio-buffer all:0 1:1
+
+	# $swp2
+	# -----
+
+	ip link set dev $swp2 up
+	mtu_set $swp2 10000
+	vlan_create $swp2 111
+	ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp2 6 $_10MB
+	devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB
+
+	# prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped.
+	tc qdisc replace dev $swp2 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	tc qdisc replace dev $swp2 parent 1:7 handle 17: \
+	   tbf rate 200Mbit burst 131072 limit 1M
+
+	# $swp3
+	# -----
+
+	ip link set dev $swp3 up
+	mtu_set $swp3 10000
+	vlan_create $swp3 111
+	ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp3 5 $_10MB
+	devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB
+
+	# prio 0->TC0 (band 7), 1->TC1 (band 6)
+	tc qdisc replace dev $swp3 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+
+	# Need to enable PFC so that PAUSE takes effect. Therefore need to put
+	# the lossless prio into a buffer of its own. Don't bother with buffer
+	# sizes though, there is not going to be any pressure in the "backward"
+	# direction.
+	dcb buffer set dev $swp3 prio-buffer all:0 1:1
+	dcb pfc set dev $swp3 prio-pfc all:off 1:on
+
+	# $swp4
+	# -----
+
+	ip link set dev $swp4 up
+	mtu_set $swp4 10000
+	vlan_create $swp4 111
+	ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp4 2 $_1MB
+	devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB
+
+	# Configure qdisc so that we can hand-tune headroom.
+	tc qdisc replace dev $swp4 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	dcb buffer set dev $swp4 prio-buffer all:0 1:1
+	dcb pfc set dev $swp4 prio-pfc all:off 1:on
+	# PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
+	# is (-2*MTU) about 80K of delay provision.
+	pg1_size=$_100KB
+
+	setup_wait_dev_with_timeout $swp4
+
+	lanes_swp4=$(ethtool $swp4 | grep 'Lanes:')
+	lanes_swp4=${lanes_swp4#*"Lanes: "}
+
+	# 8-lane ports use two buffers among which the configured buffer
+	# is split, so double the size to get twice (20K + 80K).
+	if [[ $lanes_swp4 -eq 8 ]]; then
+		pg1_size=$((pg1_size * 2))
+	fi
+
+	dcb buffer set dev $swp4 buffer-size all:0 1:$pg1_size
+
+	# bridges
+	# -------
+
+	ip link add name br1 type bridge vlan_filtering 0
+	ip link set dev $swp1.111 master br1
+	ip link set dev $swp3.111 master br1
+	ip link set dev br1 up
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev $swp2.111 master br2
+	ip link set dev $swp4.111 master br2
+	ip link set dev br2 up
+}
+
+switch_destroy()
+{
+	# Do this first so that we can reset the limits to values that are only
+	# valid for the original static / dynamic setting.
+	devlink_pool_size_thtype_restore 6
+	devlink_pool_size_thtype_restore 5
+	devlink_pool_size_thtype_restore 4
+	devlink_pool_size_thtype_restore 2
+	devlink_pool_size_thtype_restore 1
+	devlink_pool_size_thtype_restore 0
+
+	# bridges
+	# -------
+
+	ip link set dev br2 down
+	ip link set dev $swp4.111 nomaster
+	ip link set dev $swp2.111 nomaster
+	ip link del dev br2
+
+	ip link set dev br1 down
+	ip link set dev $swp3.111 nomaster
+	ip link set dev $swp1.111 nomaster
+	ip link del dev br1
+
+	# $swp4
+	# -----
+
+	dcb buffer set dev $swp4 buffer-size all:0
+	dcb pfc set dev $swp4 prio-pfc all:off
+	dcb buffer set dev $swp4 prio-buffer all:0
+	tc qdisc del dev $swp4 root
+
+	devlink_tc_bind_pool_th_restore $swp4 1 ingress
+	devlink_port_pool_th_restore $swp4 2
+
+	vlan_destroy $swp4 111
+	mtu_restore $swp4
+	ip link set dev $swp4 down
+
+	# $swp3
+	# -----
+
+	dcb pfc set dev $swp3 prio-pfc all:off
+	dcb buffer set dev $swp3 prio-buffer all:0
+	tc qdisc del dev $swp3 root
+
+	devlink_tc_bind_pool_th_restore $swp3 1 egress
+	devlink_port_pool_th_restore $swp3 5
+
+	vlan_destroy $swp3 111
+	mtu_restore $swp3
+	ip link set dev $swp3 down
+
+	# $swp2
+	# -----
+
+	tc qdisc del dev $swp2 parent 1:7
+	tc qdisc del dev $swp2 root
+
+	devlink_tc_bind_pool_th_restore $swp2 1 egress
+	devlink_port_pool_th_restore $swp2 6
+
+	vlan_destroy $swp2 111
+	mtu_restore $swp2
+	ip link set dev $swp2 down
+
+	# $swp1
+	# -----
+
+	dcb buffer set dev $swp1 prio-buffer all:0
+	tc qdisc del dev $swp1 root
+
+	devlink_tc_bind_pool_th_restore $swp1 1 ingress
+	devlink_port_pool_th_restore $swp1 1
+
+	vlan_destroy $swp1 111
+	mtu_restore $swp1
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	swp4=${NETIFS[p6]}
+
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.34
+}
+
+test_qos_pfc()
+{
+	RET=0
+
+	# 10M pool, each packet is 8K of payload + headers
+	local pkts=$((_10MB / 8050))
+	local size=$((pkts * 8050))
+	local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1)
+	local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1)
+
+	$MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \
+		-a own -b $h2mac -c $pkts -t udp -q
+	sleep 2
+
+	local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1)
+	local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1)
+
+	local din=$((in1 - in0))
+	local dout=$((out1 - out0))
+
+	local pct_in=$((din * 100 / size))
+
+	((pct_in > 95 && pct_in < 105))
+	check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%"
+
+	((dout == din))
+	check_err $? "$((din - dout)) bytes out of $din ingressed got lost"
+
+	log_test "PFC"
+}
+
+bail_on_lldpad "configure DCB" "configure Qdiscs"
+
+trap cleanup EXIT
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh
new file mode 100755
index 000000000000..4a11bf1d514a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	bridge_rif_add
+	bridge_rif_nomaster
+	bridge_rif_remaster
+	bridge_rif_nomaster_addr
+	bridge_rif_nomaster_port
+	bridge_rif_remaster_port
+"
+
+REQUIRE_TEAMD="yes"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	team_create lag1 lacp
+	ip link set dev lag1 addrgenmode none
+	ip link set dev lag1 address $(mac_get $swp1)
+
+	team_create lag2 lacp
+	ip link set dev lag2 addrgenmode none
+	ip link set dev lag2 address $(mac_get $swp2)
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 addrgenmode none
+	ip link set dev br1 address $(mac_get lag1)
+	ip link set dev br1 up
+
+	ip link set dev lag1 master br1
+
+	ip link set dev $swp1 master lag1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 master lag2
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp2 down
+
+	ip link set dev $swp1 nomaster
+	ip link set dev $swp1 down
+
+	ip link del dev lag2
+	ip link set dev lag1 nomaster
+	ip link del dev lag1
+
+	ip link del dev br1
+}
+
+bridge_rif_add()
+{
+	RET=0
+
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	__addr_add_del br1 add 192.0.2.2/28
+	sleep 1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0 + 1))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	log_test "Add RIF for bridge on address addition"
+}
+
+bridge_rif_nomaster()
+{
+	RET=0
+
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	ip link set dev lag1 nomaster
+	sleep 1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0 - 1))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	log_test "Drop RIF for bridge on LAG deslavement"
+}
+
+bridge_rif_remaster()
+{
+	RET=0
+
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	ip link set dev lag1 master br1
+	sleep 1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0 + 1))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	log_test "Add RIF for bridge on LAG reenslavement"
+}
+
+bridge_rif_nomaster_addr()
+{
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+	# Adding an address while the LAG is enslaved shouldn't generate a RIF.
+	__addr_add_del lag1 add 192.0.2.65/28
+	sleep 1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	# Removing the LAG from the bridge should drop RIF for the bridge (as
+	# tested in bridge_rif_lag_nomaster), but since the LAG now has an
+	# address, it should gain a RIF.
+	ip link set dev lag1 nomaster
+	sleep 1
+	local rifs_occ_t2=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0))
+
+	((expected_rifs == rifs_occ_t2))
+	check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used"
+
+	log_test "Add RIF for LAG on deslavement from bridge"
+
+	__addr_add_del lag1 del 192.0.2.65/28
+	ip link set dev lag1 master br1
+	sleep 1
+}
+
+bridge_rif_nomaster_port()
+{
+	RET=0
+
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	ip link set dev $swp1 nomaster
+	sleep 1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0 - 1))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	log_test "Drop RIF for bridge on deslavement of port from LAG"
+}
+
+bridge_rif_remaster_port()
+{
+	RET=0
+
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master lag1
+	ip link set dev $swp1 up
+	setup_wait_dev $swp1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0 + 1))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	log_test "Add RIF for bridge on reenslavement of port to LAG"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh
new file mode 100644
index 000000000000..a43a9926e690
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0
+
+RIF_COUNTER_NUM_NETIFS=2
+
+rif_counter_addr4()
+{
+	local i=$1; shift
+	local p=$1; shift
+
+	printf 192.0.%d.%d $((i / 64)) $(((4 * i % 256) + p))
+}
+
+rif_counter_addr4pfx()
+{
+	rif_counter_addr4 $@
+	printf /30
+}
+
+rif_counter_h1_create()
+{
+	simple_if_init $h1
+}
+
+rif_counter_h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+rif_counter_h2_create()
+{
+	simple_if_init $h2
+}
+
+rif_counter_h2_destroy()
+{
+	simple_if_fini $h2
+}
+
+rif_counter_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	vrf_prepare
+
+	rif_counter_h1_create
+	rif_counter_h2_create
+}
+
+rif_counter_cleanup()
+{
+	local count=$1; shift
+
+	pre_cleanup
+
+	for ((i = 1; i <= count; i++)); do
+		vlan_destroy $h2 $i
+	done
+
+	rif_counter_h2_destroy
+	rif_counter_h1_destroy
+
+	vrf_cleanup
+
+	if [[ -v RIF_COUNTER_BATCH_FILE ]]; then
+		rm -f $RIF_COUNTER_BATCH_FILE
+	fi
+}
+
+
+rif_counter_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	RIF_COUNTER_BATCH_FILE="$(mktemp)"
+
+	for ((i = 1; i <= count; i++)); do
+		vlan_create $h2 $i v$h2 $(rif_counter_addr4pfx $i 2)
+	done
+	for ((i = 1; i <= count; i++)); do
+		cat >> $RIF_COUNTER_BATCH_FILE <<-EOF
+			stats set dev $h2.$i l3_stats on
+		EOF
+	done
+
+	ip -b $RIF_COUNTER_BATCH_FILE
+	check_err_fail $should_fail $? "RIF counter enablement"
+}
+
+rif_counter_traffic_test()
+{
+	local count=$1; shift
+	local i;
+
+	for ((i = count; i > 0; i /= 2)); do
+		$MZ $h1 -Q $i -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \
+		    -A $(rif_counter_addr4 $i 1) \
+		    -B $(rif_counter_addr4 $i 2) \
+		    -q -t udp sp=54321,dp=12345
+	done
+	for ((i = count; i > 0; i /= 2)); do
+		busywait "$TC_HIT_TIMEOUT" until_counter_is "== 1" \
+			 hw_stats_get l3_stats $h2.$i rx packets > /dev/null
+		check_err $? "Traffic not seen at RIF $h2.$i"
+	done
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh
new file mode 100755
index 000000000000..b8bbe94f4736
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	lag_rif_add
+	lag_rif_nomaster
+	lag_rif_remaster
+	lag_rif_nomaster_addr
+"
+
+REQUIRE_TEAMD="yes"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	team_create lag1 lacp
+	ip link set dev lag1 addrgenmode none
+	ip link set dev lag1 address $(mac_get $swp1)
+
+	team_create lag2 lacp
+	ip link set dev lag2 addrgenmode none
+	ip link set dev lag2 address $(mac_get $swp2)
+
+	ip link set dev $swp1 master lag1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 master lag2
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp2 down
+
+	ip link set dev $swp1 nomaster
+	ip link set dev $swp1 down
+
+	ip link del dev lag2
+	ip link del dev lag1
+}
+
+lag_rif_add()
+{
+	RET=0
+
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	__addr_add_del lag1 add 192.0.2.2/28
+	sleep 1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0 + 1))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	log_test "Add RIF for LAG on address addition"
+}
+
+lag_rif_nomaster()
+{
+	RET=0
+
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	ip link set dev $swp1 nomaster
+	sleep 1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0 - 1))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	log_test "Drop RIF for LAG on port deslavement"
+}
+
+lag_rif_remaster()
+{
+	RET=0
+
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master lag1
+	ip link set dev $swp1 up
+	setup_wait_dev $swp1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0 + 1))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	log_test "Add RIF for LAG on port reenslavement"
+}
+
+lag_rif_nomaster_addr()
+{
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+	# Adding an address while the port is LAG'd shouldn't generate a RIF.
+	__addr_add_del $swp1 add 192.0.2.65/28
+	sleep 1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	# Removing the port from LAG should drop RIF for the LAG (as tested in
+	# lag_rif_nomaster), but since the port now has an address, it should
+	# gain a RIF.
+	ip link set dev $swp1 nomaster
+	sleep 1
+	local rifs_occ_t2=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0))
+
+	((expected_rifs == rifs_occ_t2))
+	check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used"
+
+	__addr_add_del $swp1 del 192.0.2.65/28
+	log_test "Add RIF for port on deslavement from LAG"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh
new file mode 100755
index 000000000000..d1a9d379eaf3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	lag_rif_add
+	lag_rif_nomaster
+	lag_rif_remaster
+	lag_rif_nomaster_addr
+"
+
+REQUIRE_TEAMD="yes"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	team_create lag1 lacp
+	ip link set dev lag1 addrgenmode none
+	ip link set dev lag1 address $(mac_get $swp1)
+
+	team_create lag2 lacp
+	ip link set dev lag2 addrgenmode none
+	ip link set dev lag2 address $(mac_get $swp2)
+
+	ip link set dev $swp1 master lag1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 master lag2
+	ip link set dev $swp2 up
+
+	vlan_create lag1 100
+	ip link set dev lag1.100 addrgenmode none
+
+	vlan_create lag1 200
+	ip link set dev lag1.200 addrgenmode none
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link del dev lag1.200
+	ip link del dev lag1.100
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp2 down
+
+	ip link set dev $swp1 nomaster
+	ip link set dev $swp1 down
+
+	ip link del dev lag2
+	ip link del dev lag1
+}
+
+lag_rif_add()
+{
+	RET=0
+
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	__addr_add_del lag1.100 add 192.0.2.2/28
+	__addr_add_del lag1.200 add 192.0.2.18/28
+	sleep 1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0 + 2))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	log_test "Add RIFs for LAG VLANs on address addition"
+}
+
+lag_rif_nomaster()
+{
+	RET=0
+
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	ip link set dev $swp1 nomaster
+	sleep 1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0 - 2))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	log_test "Drop RIFs for LAG VLANs on port deslavement"
+}
+
+lag_rif_remaster()
+{
+	RET=0
+
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master lag1
+	ip link set dev $swp1 up
+	setup_wait_dev $swp1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0 + 2))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	log_test "Add RIFs for LAG VLANs on port reenslavement"
+}
+
+lag_rif_nomaster_addr()
+{
+	local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+	# Adding an address while the port is LAG'd shouldn't generate a RIF.
+	__addr_add_del $swp1 add 192.0.2.65/28
+	sleep 1
+	local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0))
+
+	((expected_rifs == rifs_occ_t1))
+	check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	# Removing the port from LAG should drop two RIFs for the LAG VLANs (as
+	# tested in lag_rif_nomaster), but since the port now has an address, it
+	# should gain a RIF.
+	ip link set dev $swp1 nomaster
+	sleep 1
+	local rifs_occ_t2=$(devlink_resource_occ_get rifs)
+	local expected_rifs=$((rifs_occ_t0 - 1))
+
+	((expected_rifs == rifs_occ_t2))
+	check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used"
+
+	__addr_add_del $swp1 del 192.0.2.65/28
+	log_test "Add RIF for port on deslavement from LAG"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh
new file mode 100644
index 000000000000..71e7681f15f6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for RIF MAC profiles resource. The test adds VLAN netdevices according to
+# the maximum number of RIF MAC profiles, sets each of them with a random
+# MAC address, and checks that eventually the number of occupied RIF MAC
+# profiles equals the maximum number of RIF MAC profiles.
+
+
+RIF_MAC_PROFILE_NUM_NETIFS=2
+
+rif_mac_profiles_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+	local batch_file="$(mktemp)"
+
+	for ((i = 1; i <= count; i++)); do
+		vlan=$(( i*10 ))
+		m=$(( i*11 ))
+
+		cat >> $batch_file <<-EOF
+			link add link $h1 name $h1.$vlan \
+				address 00:$m:$m:$m:$m:$m type vlan id $vlan
+			address add 192.0.$m.1/24 dev $h1.$vlan
+		EOF
+	done
+
+	ip -b $batch_file &> /dev/null
+	check_err_fail $should_fail $? "RIF creation"
+
+	rm -f $batch_file
+}
+
+rif_mac_profile_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	rif_mac_profiles_create $count $should_fail
+
+	occ=$(devlink -j resource show $DEVLINK_DEV \
+	      | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]')
+
+	[[ $occ -eq $count ]]
+	check_err_fail $should_fail $? "Attempt to use $count profiles (actual result $occ)"
+}
+
+rif_mac_profile_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	# Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses
+	# being generated and RIFs being created.
+	sysctl_set net.ipv6.conf.$h1.disable_ipv6 1
+	sysctl_set net.ipv6.conf.$h2.disable_ipv6 1
+
+	ip link set $h1 up
+	ip link set $h2 up
+}
+
+rif_mac_profile_cleanup()
+{
+	pre_cleanup
+
+	ip link set $h2 down
+	ip link set $h1 down
+
+	sysctl_restore net.ipv6.conf.$h2.disable_ipv6
+	sysctl_restore net.ipv6.conf.$h1.disable_ipv6
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh
new file mode 100755
index 000000000000..c18340cee55d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh
@@ -0,0 +1,213 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	mac_profile_test
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+	ip route add 198.51.100.0/24 vrf v$h1 nexthop via 192.0.2.2
+
+	tc qdisc add dev $h1 ingress
+}
+
+h1_destroy()
+{
+	tc qdisc del dev $h1 ingress
+
+	ip route del 198.51.100.0/24 vrf v$h1
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24
+	ip route add 192.0.2.0/24 vrf v$h2 nexthop via 198.51.100.2
+
+	tc qdisc add dev $h2 ingress
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 ingress
+
+	ip route del 192.0.2.0/24 vrf v$h2
+	simple_if_fini $h2 198.51.100.1/24
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	tc qdisc add dev $rp1 clsact
+	tc qdisc add dev $rp2 clsact
+	ip address add 192.0.2.2/24 dev $rp1
+	ip address add 198.51.100.2/24 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 198.51.100.2/24 dev $rp2
+	ip address del 192.0.2.2/24 dev $rp1
+	tc qdisc del dev $rp2 clsact
+	tc qdisc del dev $rp1 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+h1_to_h2()
+{
+	local test_name=$@; shift
+	local smac=$(mac_get $rp2)
+
+	RET=0
+
+	# Replace neighbour to avoid first packet being forwarded in software
+	ip neigh replace dev $rp2 198.51.100.1 lladdr $(mac_get $h2)
+
+	# Add a filter to ensure that packets are forwarded in hardware. Cannot
+	# match on source MAC because it is not set in eACL after routing
+	tc filter add dev $rp2 egress proto ip pref 1 handle 101 \
+		flower skip_sw ip_proto udp src_port 12345 dst_port 54321 \
+		action pass
+
+	# Add a filter to ensure that packets are received with the correct
+	# source MAC
+	tc filter add dev $h2 ingress proto ip pref 1 handle 101 \
+		flower skip_sw src_mac $smac ip_proto udp src_port 12345 \
+		dst_port 54321 action pass
+
+	$MZ $h1 -a own -b $(mac_get $rp1) -t udp "sp=12345,dp=54321" \
+		-A 192.0.2.1 -B 198.51.100.1 -c 10 -p 100 -d 1msec -q
+
+	tc_check_packets "dev $rp2 egress" 101 10
+	check_err $? "packets not forwarded in hardware"
+
+	tc_check_packets "dev $h2 ingress" 101 10
+	check_err $? "packets not forwarded with correct source mac"
+
+	log_test "h1->h2: $test_name"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+	ip neigh del dev $rp2 198.51.100.1 lladdr $(mac_get $h2)
+}
+
+h2_to_h1()
+{
+	local test_name=$@; shift
+	local rp1_mac=$(mac_get $rp1)
+
+	RET=0
+
+	ip neigh replace dev $rp1 192.0.2.1 lladdr $(mac_get $h1)
+
+	tc filter add dev $rp1 egress proto ip pref 1 handle 101 \
+		flower skip_sw ip_proto udp src_port 54321 dst_port 12345 \
+		action pass
+
+	tc filter add dev $h1 ingress proto ip pref 1 handle 101 \
+		flower skip_sw src_mac $rp1_mac ip_proto udp src_port 54321 \
+		dst_port 12345 action pass
+
+	$MZ $h2 -a own -b $(mac_get $rp2) -t udp "sp=54321,dp=12345" \
+		-A 198.51.100.1 -B 192.0.2.1 -c 10 -p 100 -d 1msec -q
+
+	tc_check_packets "dev $rp1 egress" 101 10
+	check_err $? "packets not forwarded in hardware"
+
+	tc_check_packets "dev $h1 ingress" 101 10
+	check_err $? "packets not forwarded with correct source mac"
+
+	log_test "h2->h1: $test_name"
+
+	tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $rp1 egress protocol ip pref 1 handle 101 flower
+	ip neigh del dev $rp1 192.0.2.1 lladdr $(mac_get $h1)
+}
+
+smac_test()
+{
+	local test_name=$@; shift
+
+	# Test that packets forwarded to $h2 via $rp2 are forwarded with the
+	# current source MAC of $rp2
+	h1_to_h2 $test_name
+
+	# Test that packets forwarded to $h1 via $rp1 are forwarded with the
+	# current source MAC of $rp1. This MAC is never changed during the test,
+	# but given the shared nature of MAC profile, the point is to see that
+	# changes to the MAC of $rp2 do not affect that of $rp1
+	h2_to_h1 $test_name
+}
+
+mac_profile_test()
+{
+	local rp2_mac=$(mac_get $rp2)
+
+	# Test behavior when the RIF backing $rp2 is transitioned to use
+	# a new MAC profile
+	ip link set dev $rp2 addr 00:11:22:33:44:55
+	smac_test "new mac profile"
+
+	# Test behavior when the MAC profile used by the RIF is edited
+	ip link set dev $rp2 address 00:22:22:22:22:22
+	smac_test "edit mac profile"
+
+	# Restore original MAC
+	ip link set dev $rp2 addr $rp2_mac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+mac_profiles=$(devlink_resource_size_get rif_mac_profiles)
+if [[ $mac_profiles -ne 1 ]]; then
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh
new file mode 100755
index 000000000000..026a126f584d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	rif_mac_profile_edit_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	# Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses
+	# being generated and RIFs being created
+	sysctl_set net.ipv6.conf.$h1.disable_ipv6 1
+	sysctl_set net.ipv6.conf.$h2.disable_ipv6 1
+
+	ip link set $h1 up
+	ip link set $h2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set $h2 down
+	ip link set $h1 down
+
+	sysctl_restore net.ipv6.conf.$h2.disable_ipv6
+	sysctl_restore net.ipv6.conf.$h1.disable_ipv6
+
+	# Reload in order to clean all the RIFs and RIF MAC profiles created
+	devlink_reload
+}
+
+create_max_rif_mac_profiles()
+{
+	local count=$1; shift
+	local batch_file="$(mktemp)"
+
+	for ((i = 1; i <= count; i++)); do
+		vlan=$(( i*10 ))
+		m=$(( i*11 ))
+
+		cat >> $batch_file <<-EOF
+			link add link $h1 name $h1.$vlan \
+				address 00:$m:$m:$m:$m:$m type vlan id $vlan
+			address add 192.0.$m.1/24 dev $h1.$vlan
+		EOF
+	done
+
+	ip -b $batch_file &> /dev/null
+	rm -f $batch_file
+}
+
+rif_mac_profile_replacement_test()
+{
+	local h1_10_mac=$(mac_get $h1.10)
+
+	RET=0
+
+	ip link set $h1.10 address 00:12:34:56:78:99
+	check_err $?
+
+	log_test "RIF MAC profile replacement"
+
+	ip link set $h1.10 address $h1_10_mac
+}
+
+rif_mac_profile_consolidation_test()
+{
+	local count=$1; shift
+	local h1_20_mac
+
+	RET=0
+
+	if [[ $count -eq 1 ]]; then
+		return
+	fi
+
+	h1_20_mac=$(mac_get $h1.20)
+
+	# Set the MAC of $h1.20 to that of $h1.10 and confirm that they are
+	# using the same MAC profile.
+	ip link set $h1.20 address 00:11:11:11:11:11
+	check_err $?
+
+	occ=$(devlink -j resource show $DEVLINK_DEV \
+	      | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]')
+
+	[[ $occ -eq $((count - 1)) ]]
+	check_err $? "MAC profile occupancy did not decrease"
+
+	log_test "RIF MAC profile consolidation"
+
+	ip link set $h1.20 address $h1_20_mac
+}
+
+rif_mac_profile_shared_replacement_test()
+{
+	local count=$1; shift
+	local i=$((count + 1))
+	local vlan=$(( i*10 ))
+	local m=11
+
+	RET=0
+
+	# Create a VLAN netdevice that has the same MAC as the first one.
+	ip link add link $h1 name $h1.$vlan address 00:$m:$m:$m:$m:$m \
+		type vlan id $vlan
+	ip address add 192.0.$m.1/24 dev $h1.$vlan
+
+	# MAC replacement should fail because all the MAC profiles are in use
+	# and the profile is shared between multiple RIFs
+	m=$(( i*11 ))
+	ip link set $h1.$vlan address 00:$m:$m:$m:$m:$m &> /dev/null
+	check_fail $?
+
+	log_test "RIF MAC profile shared replacement"
+
+	ip link del dev $h1.$vlan
+}
+
+rif_mac_profile_edit_test()
+{
+	local count=$(devlink_resource_size_get rif_mac_profiles)
+
+	create_max_rif_mac_profiles $count
+
+	rif_mac_profile_replacement_test
+	rif_mac_profile_consolidation_test $count
+	rif_mac_profile_shared_replacement_test $count
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh
new file mode 100755
index 000000000000..6ce317cfaf9b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test enslavement to LAG with a clean slate.
+# See $lib_dir/router_bridge_lag.sh for further details.
+
+ALL_TESTS="
+	config_devlink_reload
+	config_enslave_h1
+	config_enslave_h2
+	config_enslave_h3
+	config_enslave_h4
+	config_enslave_swp1
+	config_enslave_swp2
+	config_enslave_swp3
+	config_enslave_swp4
+	config_wait
+	ping_ipv4
+	ping_ipv6
+"
+
+config_devlink_reload()
+{
+	log_info "Devlink reload"
+	devlink_reload
+}
+
+config_enslave_h1()
+{
+	config_enslave $h1 lag1
+}
+
+config_enslave_h2()
+{
+	config_enslave $h2 lag4
+}
+
+config_enslave_h3()
+{
+	config_enslave $h3 lag4
+}
+
+config_enslave_h4()
+{
+	config_enslave $h4 lag1
+}
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+EXTRA_SOURCE="source $lib_dir/devlink_lib.sh"
+source $lib_dir/router_bridge_lag.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
new file mode 100644
index 000000000000..683759d29199
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ROUTER_NUM_NETIFS=4
+: ${TIMEOUT:=20000} # ms
+
+router_h1_create()
+{
+	simple_if_init $h1 192.0.1.1/24
+}
+
+router_h1_destroy()
+{
+	simple_if_fini $h1 192.0.1.1/24
+}
+
+router_h2_create()
+{
+	simple_if_init $h2 192.0.2.1/24
+	tc qdisc add dev $h2 handle ffff: ingress
+}
+
+router_h2_destroy()
+{
+	tc qdisc del dev $h2 handle ffff: ingress
+	simple_if_fini $h2 192.0.2.1/24
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	ip address add 192.0.1.2/24 dev $rp1
+	ip address add 192.0.2.2/24 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 192.0.2.2/24 dev $rp2
+	ip address del 192.0.1.2/24 dev $rp1
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+router_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	rp1mac=$(mac_get $rp1)
+
+	vrf_prepare
+
+	router_h1_create
+	router_h2_create
+
+	router_create
+}
+
+wait_for_routes()
+{
+	local t0=$1; shift
+	local route_count=$1; shift
+
+	local t1=$(ip route | grep 'offload' | grep -v 'offload_failed' | wc -l)
+	local delta=$((t1 - t0))
+	echo $delta
+	[[ $delta -ge $route_count ]]
+}
+
+router_routes_create()
+{
+	local route_count=$1
+	local count=0
+
+	ROUTE_FILE="$(mktemp)"
+
+	for i in {0..255}
+	do
+		for j in {0..255}
+		do
+			for k in {0..255}
+			do
+				if [[ $count -eq $route_count ]]; then
+					break 3
+				fi
+
+				echo route add 193.${i}.${j}.${k}/32 dev $rp2 \
+					>> $ROUTE_FILE
+				((count++))
+			done
+		done
+	done
+
+	ip -b $ROUTE_FILE &> /dev/null
+}
+
+router_routes_destroy()
+{
+	if [[ -v ROUTE_FILE ]]; then
+		rm -f $ROUTE_FILE
+	fi
+}
+
+router_test()
+{
+	local route_count=$1
+	local should_fail=$2
+	local delta
+
+	RET=0
+
+	local t0=$(ip route | grep -o 'offload' | wc -l)
+	router_routes_create $route_count
+	delta=$(busywait "$TIMEOUT" wait_for_routes $t0 $route_count)
+
+	check_err_fail $should_fail $? "Offload routes: Expected $route_count, got $delta."
+	if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then
+		return
+	fi
+
+	router_routes_destroy
+}
+
+router_cleanup()
+{
+	pre_cleanup
+
+	router_routes_destroy
+	router_destroy
+
+	router_h2_destroy
+	router_h1_destroy
+
+	vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
new file mode 100755
index 000000000000..45a569618424
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -0,0 +1,935 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test various interface configuration scenarios. Observe that configurations
+# deemed valid by mlxsw succeed, invalid configurations fail and that no traces
+# are produced. To prevent the test from passing in case traces are produced,
+# the user can set the 'kernel.panic_on_warn' and 'kernel.panic_on_oops'
+# sysctls in its environment.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	rif_vrf_set_addr_test
+	rif_non_inherit_bridge_addr_test
+	vlan_interface_deletion_test
+	bridge_deletion_test
+	bridge_vlan_flags_test
+	vlan_1_test
+	duplicate_vlans_test
+	vlan_rif_refcount_test
+	subport_rif_refcount_test
+	subport_rif_lag_join_test
+	vlan_dev_deletion_test
+	lag_unlink_slaves_test
+	lag_dev_deletion_test
+	vlan_interface_uppers_test
+	bridge_extern_learn_test
+	neigh_offload_test
+	nexthop_offload_test
+	nexthop_obj_invalid_test
+	nexthop_obj_offload_test
+	nexthop_obj_group_offload_test
+	nexthop_obj_bucket_offload_test
+	nexthop_obj_blackhole_offload_test
+	nexthop_obj_route_offload_test
+	bridge_locked_port_test
+	devlink_reload_test
+"
+NUM_NETIFS=2
+: ${TIMEOUT:=20000} # ms
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+rif_vrf_set_addr_test()
+{
+	# Test that it is possible to set an IP address on a VRF upper despite
+	# its random MAC address.
+	RET=0
+
+	ip link add name vrf-test type vrf table 10
+	ip link set dev $swp1 master vrf-test
+
+	ip -4 address add 192.0.2.1/24 dev vrf-test
+	check_err $? "failed to set IPv4 address on VRF"
+	ip -6 address add 2001:db8:1::1/64 dev vrf-test
+	check_err $? "failed to set IPv6 address on VRF"
+
+	log_test "RIF - setting IP address on VRF"
+
+	ip link del dev vrf-test
+}
+
+rif_non_inherit_bridge_addr_test()
+{
+	local swp2_mac=$(mac_get $swp2)
+
+	RET=0
+
+	# Create first RIF
+	ip addr add dev $swp1 192.0.2.1/28
+	check_err $?
+
+	# Create a FID RIF
+	ip link add name br1 up type bridge vlan_filtering 0
+	ip link set dev br1 addr $swp2_mac
+	ip link set dev $swp2 master br1
+	ip addr add dev br1 192.0.2.17/28
+	check_err $?
+
+	# Prepare a device with a low MAC address
+	ip link add name d up type dummy
+	ip link set dev d addr 00:11:22:33:44:55
+
+	# Attach the device to br1. Since the bridge address was set, it should
+	# work.
+	ip link set dev d master br1 &>/dev/null
+	check_err $? "Could not attach a device with low MAC to a bridge with RIF"
+
+	# Port MAC address change should be allowed for a bridge with set MAC.
+	ip link set dev $swp2 addr 00:11:22:33:44:55
+	check_err $? "Changing swp2's MAC address not permitted"
+
+	log_test "RIF - attach port with bad MAC to bridge with set MAC"
+
+	ip link set dev $swp2 addr $swp2_mac
+	ip link del dev d
+	ip link del dev br1
+	ip addr del dev $swp1 192.0.2.1/28
+}
+
+vlan_interface_deletion_test()
+{
+	# Test that when a VLAN interface is deleted, its associated router
+	# interface (RIF) is correctly deleted and not leaked. See commit
+	# c360867ec46a ("mlxsw: spectrum: Delete RIF when VLAN device is
+	# removed") for more details
+	RET=0
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev $swp1 master br0
+
+	ip link add link br0 name br0.10 type vlan id 10
+	ip -6 address add 2001:db8:1::1/64 dev br0.10
+	ip link del dev br0.10
+
+	# If we leaked the previous RIF, then this should produce a trace
+	ip link add link br0 name br0.20 type vlan id 20
+	ip -6 address add 2001:db8:1::1/64 dev br0.20
+	ip link del dev br0.20
+
+	log_test "vlan interface deletion"
+
+	ip link del dev br0
+}
+
+bridge_deletion_test()
+{
+	# Test that when a bridge with VLAN interfaces is deleted, we correctly
+	# delete the associated RIFs. See commit 602b74eda813 ("mlxsw:
+	# spectrum_switchdev: Do not leak RIFs when removing bridge") for more
+	# details
+	RET=0
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev $swp1 master br0
+	ip -6 address add 2001:db8::1/64 dev br0
+
+	ip link add link br0 name br0.10 type vlan id 10
+	ip -6 address add 2001:db8:1::1/64 dev br0.10
+
+	ip link add link br0 name br0.20 type vlan id 20
+	ip -6 address add 2001:db8:2::1/64 dev br0.20
+
+	ip link del dev br0
+
+	# If we leaked previous RIFs, then this should produce a trace
+	ip -6 address add 2001:db8:1::1/64 dev $swp1
+	ip -6 address del 2001:db8:1::1/64 dev $swp1
+
+	log_test "bridge deletion"
+}
+
+bridge_vlan_flags_test()
+{
+	# Test that when bridge VLAN flags are toggled, we do not take
+	# unnecessary references on related structs. See commit 9e25826ffc94
+	# ("mlxsw: spectrum_switchdev: Fix port_vlan refcounting") for more
+	# details
+	RET=0
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev $swp1 master br0
+
+	bridge vlan add vid 10 dev $swp1 pvid untagged
+	bridge vlan add vid 10 dev $swp1 untagged
+	bridge vlan add vid 10 dev $swp1 pvid
+	bridge vlan add vid 10 dev $swp1
+	ip link del dev br0
+
+	# If we did not handle references correctly, then this should produce a
+	# trace
+	devlink_reload
+
+	log_test "bridge vlan flags"
+}
+
+vlan_1_test()
+{
+	# Test that VLAN 1 can be configured over mlxsw ports. In the past it
+	# was used internally for untagged traffic. See commit 47bf9df2e820
+	# ("mlxsw: spectrum: Forbid creation of VLAN 1 over port/LAG") for more
+	# details
+	RET=0
+
+	ip link add link $swp1 name $swp1.1 type vlan id 1
+	check_err $? "did not manage to create vlan 1 when should"
+
+	log_test "vlan 1"
+
+	ip link del dev $swp1.1
+}
+
+duplicate_vlans_test()
+{
+	# Test that on a given port a VLAN is only used once. Either as VLAN
+	# in a VLAN-aware bridge or as a VLAN device
+	RET=0
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev $swp1 master br0
+	bridge vlan add vid 10 dev $swp1
+
+	ip link add link $swp1 name $swp1.10 type vlan id 10 &> /dev/null
+	check_fail $? "managed to create vlan device when should not"
+
+	bridge vlan del vid 10 dev $swp1
+	ip link add link $swp1 name $swp1.10 type vlan id 10
+	check_err $? "did not manage to create vlan device when should"
+	bridge vlan add vid 10 dev $swp1 &> /dev/null
+	check_fail $? "managed to add bridge vlan when should not"
+
+	log_test "duplicate vlans"
+
+	ip link del dev $swp1.10
+	ip link del dev br0
+}
+
+vlan_rif_refcount_test()
+{
+	# Test that RIFs representing VLAN interfaces are not affected from
+	# ports member in the VLAN. We use the offload indication on routes
+	# configured on the RIF to understand if it was created / destroyed
+	RET=0
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev $swp1 master br0
+
+	ip link set dev $swp1 up
+	ip link set dev br0 up
+
+	ip link add link br0 name br0.10 up type vlan id 10
+	ip -6 address add 2001:db8:1::1/64 dev br0.10
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
+	check_err $? "vlan rif was not created before adding port to vlan"
+
+	bridge vlan add vid 10 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
+	check_err $? "vlan rif was destroyed after adding port to vlan"
+
+	bridge vlan del vid 10 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
+	check_err $? "vlan rif was destroyed after removing port from vlan"
+
+	ip link set dev $swp1 nomaster
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
+	check_err $? "vlan rif was not destroyed after unlinking port from bridge"
+
+	log_test "vlan rif refcount"
+
+	ip link del dev br0.10
+	ip link set dev $swp1 down
+	ip link del dev br0
+}
+
+subport_rif_refcount_test()
+{
+	# Test that RIFs representing upper devices of physical ports are
+	# reference counted correctly and destroyed when should. We use the
+	# offload indication on routes configured on the RIF to understand if
+	# it was created / destroyed
+	RET=0
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link set dev $swp1 master bond1
+	ip link set dev $swp2 master bond1
+
+	ip link set dev bond1 up
+	ip link add link bond1 name bond1.10 up type vlan id 10
+	ip -6 address add 2001:db8:1::1/64 dev bond1
+	ip -6 address add 2001:db8:2::1/64 dev bond1.10
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif was not created on lag device"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
+	check_err $? "subport rif was not created on vlan device"
+
+	ip link set dev $swp1 nomaster
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was destroyed when should not"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
+	check_err $? "subport rif of vlan device was destroyed when should not"
+
+	ip link set dev $swp2 nomaster
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was not destroyed when should"
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
+	check_err $? "subport rif of vlan device was not destroyed when should"
+
+	log_test "subport rif refcount"
+
+	ip link del dev bond1.10
+	ip link del dev bond1
+}
+
+subport_rif_lag_join_test()
+{
+	# Test that the reference count of a RIF configured for a LAG is
+	# incremented / decremented when ports join / leave the LAG. We use the
+	# offload indication on routes configured on the RIF to understand if
+	# it was created / destroyed
+	RET=0
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link set dev $swp1 master bond1
+	ip link set dev $swp2 master bond1
+
+	ip link set dev bond1 up
+	ip -6 address add 2001:db8:1::1/64 dev bond1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif was not created on lag device"
+
+	ip link set dev $swp1 nomaster
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was destroyed after removing one port"
+
+	ip link set dev $swp1 master bond1
+	ip link set dev $swp2 nomaster
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was destroyed after re-adding a port and removing another"
+
+	ip link set dev $swp1 nomaster
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was not destroyed when should"
+
+	log_test "subport rif lag join"
+
+	ip link del dev bond1
+}
+
+vlan_dev_deletion_test()
+{
+	# Test that VLAN devices are correctly deleted / unlinked when enslaved
+	# to bridge
+	RET=0
+
+	ip link add name br10 type bridge
+	ip link add name br20 type bridge
+	ip link add name br30 type bridge
+	ip link add link $swp1 name $swp1.10 type vlan id 10
+	ip link add link $swp1 name $swp1.20 type vlan id 20
+	ip link add link $swp1 name $swp1.30 type vlan id 30
+	ip link set dev $swp1.10 master br10
+	ip link set dev $swp1.20 master br20
+	ip link set dev $swp1.30 master br30
+
+	# If we did not handle the situation correctly, then these operations
+	# might produce a trace
+	ip link set dev $swp1.30 nomaster
+	ip link del dev $swp1.20
+	# Deletion via ioctl uses different code paths from netlink
+	vconfig rem $swp1.10 &> /dev/null
+
+	log_test "vlan device deletion"
+
+	ip link del dev $swp1.30
+	ip link del dev br30
+	ip link del dev br20
+	ip link del dev br10
+}
+
+lag_create()
+{
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link set dev $swp1 master bond1
+	ip link set dev $swp2 master bond1
+
+	ip link add link bond1 name bond1.10 type vlan id 10
+	ip link add link bond1 name bond1.20 type vlan id 20
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev bond1 master br0
+
+	ip link add name br10 type bridge
+	ip link set dev bond1.10 master br10
+
+	ip link add name br20 type bridge
+	ip link set dev bond1.20 master br20
+}
+
+lag_unlink_slaves_test()
+{
+	# Test that ports are correctly unlinked from their LAG master, when
+	# the LAG and its VLAN uppers are enslaved to bridges
+	RET=0
+
+	lag_create
+
+	ip link set dev $swp1 nomaster
+	check_err $? "lag slave $swp1 was not unlinked from master"
+	ip link set dev $swp2 nomaster
+	check_err $? "lag slave $swp2 was not unlinked from master"
+
+	# Try to configure corresponding VLANs as router interfaces
+	ip -6 address add 2001:db8:1::1/64 dev $swp1
+	check_err $? "failed to configure ip address on $swp1"
+
+	ip link add link $swp1 name $swp1.10 type vlan id 10
+	ip -6 address add 2001:db8:10::1/64 dev $swp1.10
+	check_err $? "failed to configure ip address on $swp1.10"
+
+	ip link add link $swp1 name $swp1.20 type vlan id 20
+	ip -6 address add 2001:db8:20::1/64 dev $swp1.20
+	check_err $? "failed to configure ip address on $swp1.20"
+
+	log_test "lag slaves unlinking"
+
+	ip link del dev $swp1.20
+	ip link del dev $swp1.10
+	ip address flush dev $swp1
+
+	ip link del dev br20
+	ip link del dev br10
+	ip link del dev br0
+	ip link del dev bond1
+}
+
+lag_dev_deletion_test()
+{
+	# Test that LAG device is correctly deleted, when the LAG and its VLAN
+	# uppers are enslaved to bridges
+	RET=0
+
+	lag_create
+
+	ip link del dev bond1
+
+	log_test "lag device deletion"
+
+	ip link del dev br20
+	ip link del dev br10
+	ip link del dev br0
+}
+
+vlan_interface_uppers_test()
+{
+	# Test that uppers of a VLAN interface are correctly sanitized
+	RET=0
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev $swp1 master br0
+
+	ip link add link br0 name br0.10 type vlan id 10
+
+	ip -6 address add 2001:db8:1::1/64 dev br0.10
+	ip link add link br0.10 name macvlan0 type macvlan mode private
+	check_err $? "did not manage to create a macvlan when should"
+
+	ip link del dev macvlan0
+
+	ip link add name vrf-test type vrf table 10
+	ip link set dev br0.10 master vrf-test
+	check_err $? "did not manage to enslave vlan interface to vrf"
+	ip link del dev vrf-test
+
+	ip link add name br-test type bridge
+	ip link set dev br0.10 master br-test &> /dev/null
+	check_fail $? "managed to enslave vlan interface to bridge when should not"
+	ip link del dev br-test
+
+	log_test "vlan interface uppers"
+
+	ip link del dev br0
+}
+
+bridge_extern_learn_test()
+{
+	# Test that externally learned entries added from user space are
+	# marked as offloaded
+	RET=0
+
+	ip link add name br0 type bridge
+	ip link set dev $swp1 master br0
+
+	bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn
+
+	busywait "$TIMEOUT" wait_for_offload \
+		bridge fdb show brport $swp1 de:ad:be:ef:13:37
+	check_err $? "fdb entry not marked as offloaded when should"
+
+	log_test "externally learned fdb entry"
+
+	ip link del dev br0
+}
+
+neigh_offload_test()
+{
+	# Test that IPv4 and IPv6 neighbour entries are marked as offloaded
+	RET=0
+
+	ip -4 address add 192.0.2.1/24 dev $swp1
+	ip -6 address add 2001:db8:1::1/64 dev $swp1
+
+	ip -4 neigh add 192.0.2.2 lladdr de:ad:be:ef:13:37 nud perm dev $swp1
+	ip -6 neigh add 2001:db8:1::2 lladdr de:ad:be:ef:13:37 nud perm \
+		dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -4 neigh show dev $swp1 192.0.2.2
+	check_err $? "ipv4 neigh entry not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 neigh show dev $swp1 2001:db8:1::2
+	check_err $? "ipv6 neigh entry not marked as offloaded when should"
+
+	log_test "neighbour offload indication"
+
+	ip -6 neigh del 2001:db8:1::2 dev $swp1
+	ip -4 neigh del 192.0.2.2 dev $swp1
+	ip -6 address del 2001:db8:1::1/64 dev $swp1
+	ip -4 address del 192.0.2.1/24 dev $swp1
+}
+
+nexthop_offload_test()
+{
+	# Test that IPv4 and IPv6 nexthops are marked as offloaded
+	RET=0
+
+	sysctl_set net.ipv6.conf.$swp2.keep_addr_on_down 1
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2 192.0.2.2/24 2001:db8:1::2/64
+	setup_wait
+
+	ip -4 route add 198.51.100.0/24 vrf v$swp1 \
+		nexthop via 192.0.2.2 dev $swp1
+	ip -6 route add 2001:db8:2::/64 vrf v$swp1 \
+		nexthop via 2001:db8:1::2 dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -4 route show 198.51.100.0/24 vrf v$swp1
+	check_err $? "ipv4 nexthop not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route show 2001:db8:2::/64 vrf v$swp1
+	check_err $? "ipv6 nexthop not marked as offloaded when should"
+
+	ip link set dev $swp2 down
+	sleep 1
+
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -4 route show 198.51.100.0/24 vrf v$swp1
+	check_err $? "ipv4 nexthop marked as offloaded when should not"
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route show 2001:db8:2::/64 vrf v$swp1
+	check_err $? "ipv6 nexthop marked as offloaded when should not"
+
+	ip link set dev $swp2 up
+	setup_wait
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -4 route show 198.51.100.0/24 vrf v$swp1
+	check_err $? "ipv4 nexthop not marked as offloaded after neigh add"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route show 2001:db8:2::/64 vrf v$swp1
+	check_err $? "ipv6 nexthop not marked as offloaded after neigh add"
+
+	log_test "nexthop offload indication"
+
+	ip -6 route del 2001:db8:2::/64 vrf v$swp1
+	ip -4 route del 198.51.100.0/24 vrf v$swp1
+
+	simple_if_fini $swp2 192.0.2.2/24 2001:db8:1::2/64
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+	sysctl_restore net.ipv6.conf.$swp2.keep_addr_on_down
+}
+
+nexthop_obj_invalid_test()
+{
+	# Test that invalid nexthop object configurations are rejected
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2 192.0.2.2/24 2001:db8:1::2/64
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.3 fdb
+	check_fail $? "managed to configure an FDB nexthop when should not"
+
+	ip nexthop add id 1 encap mpls 200/300 via 192.0.2.3 dev $swp1
+	check_fail $? "managed to configure a nexthop with MPLS encap when should not"
+
+	ip nexthop add id 1 dev $swp1
+	ip nexthop add id 2 dev $swp1
+	ip nexthop add id 3 via 192.0.2.3 dev $swp1
+	ip nexthop add id 10 group 1/2
+	check_fail $? "managed to configure a nexthop group with device-only nexthops when should not"
+
+	ip nexthop add id 10 group 3 type resilient buckets 7
+	check_fail $? "managed to configure a too small resilient nexthop group when should not"
+
+	ip nexthop add id 10 group 3 type resilient buckets 129
+	check_fail $? "managed to configure a resilient nexthop group with invalid number of buckets when should not"
+
+	ip nexthop add id 10 group 1/2 type resilient buckets 32
+	check_fail $? "managed to configure a resilient nexthop group with device-only nexthops when should not"
+
+	ip nexthop add id 10 group 3 type resilient buckets 32
+	check_err $? "failed to configure a valid resilient nexthop group"
+	ip nexthop replace id 3 dev $swp1
+	check_fail $? "managed to populate a nexthop bucket with a device-only nexthop when should not"
+
+	log_test "nexthop objects - invalid configurations"
+
+	ip nexthop del id 10
+	ip nexthop del id 3
+	ip nexthop del id 2
+	ip nexthop del id 1
+
+	simple_if_fini $swp2 192.0.2.2/24 2001:db8:1::2/64
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_offload_test()
+{
+	# Test offload indication of nexthop objects
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.2 dev $swp1
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
+		dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop not marked as offloaded when should"
+
+	ip neigh replace 192.0.2.2 nud failed dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop marked as offloaded after setting neigh to failed state"
+
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
+		dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop not marked as offloaded after neigh replace"
+
+	ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop marked as offloaded after replacing to use an invalid address"
+
+	ip nexthop replace id 1 via 192.0.2.2 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop not marked as offloaded after replacing to use a valid address"
+
+	log_test "nexthop objects offload indication"
+
+	ip neigh del 192.0.2.2 dev $swp1
+	ip nexthop del id 1
+
+	simple_if_fini $swp2
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_group_offload_test()
+{
+	# Test offload indication of nexthop group objects
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.2 dev $swp1
+	ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
+	ip nexthop add id 10 group 1/2
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
+		dev $swp1
+	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \
+		dev $swp1
+	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \
+		dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "IPv4 nexthop not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 2
+	check_err $? "IPv6 nexthop not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "nexthop group not marked as offloaded when should"
+
+	# Invalidate nexthop id 1
+	ip neigh replace 192.0.2.2 nud failed dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip nexthop show id 10
+	check_fail $? "nexthop group not marked as offloaded with one valid nexthop"
+
+	# Invalidate nexthop id 2
+	ip neigh replace 2001:db8:1::2 nud failed dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "nexthop group marked as offloaded when should not"
+
+	# Revalidate nexthop id 1
+	ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "nexthop group not marked as offloaded after revalidating nexthop"
+
+	log_test "nexthop group objects offload indication"
+
+	ip neigh del 2001:db8:1::2 dev $swp1
+	ip neigh del 192.0.2.3 dev $swp1
+	ip neigh del 192.0.2.2 dev $swp1
+	ip nexthop del id 10
+	ip nexthop del id 2
+	ip nexthop del id 1
+
+	simple_if_fini $swp2
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_bucket_offload_test()
+{
+	# Test offload indication of nexthop buckets
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.2 dev $swp1
+	ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
+	ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
+		dev $swp1
+	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \
+		dev $swp1
+	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \
+		dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop bucket show nhid 1
+	check_err $? "IPv4 nexthop buckets not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop bucket show nhid 2
+	check_err $? "IPv6 nexthop buckets not marked as offloaded when should"
+
+	# Invalidate nexthop id 1
+	ip neigh replace 192.0.2.2 nud failed dev $swp1
+	busywait "$TIMEOUT" wait_for_trap \
+		ip nexthop bucket show nhid 1
+	check_err $? "IPv4 nexthop buckets not marked with trap when should"
+
+	# Invalidate nexthop id 2
+	ip neigh replace 2001:db8:1::2 nud failed dev $swp1
+	busywait "$TIMEOUT" wait_for_trap \
+		ip nexthop bucket show nhid 2
+	check_err $? "IPv6 nexthop buckets not marked with trap when should"
+
+	# Revalidate nexthop id 1 by changing its configuration
+	ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop bucket show nhid 1
+	check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop"
+
+	# Revalidate nexthop id 2 by changing its neighbour
+	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \
+		dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop bucket show nhid 2
+	check_err $? "nexthop bucket not marked as offloaded after revalidating neighbour"
+
+	log_test "nexthop bucket offload indication"
+
+	ip neigh del 2001:db8:1::2 dev $swp1
+	ip neigh del 192.0.2.3 dev $swp1
+	ip neigh del 192.0.2.2 dev $swp1
+	ip nexthop del id 10
+	ip nexthop del id 2
+	ip nexthop del id 1
+
+	simple_if_fini $swp2
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_blackhole_offload_test()
+{
+	# Test offload indication of blackhole nexthop objects
+	RET=0
+
+	ip nexthop add id 1 blackhole
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "Blackhole nexthop not marked as offloaded when should"
+
+	ip nexthop add id 10 group 1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "Nexthop group not marked as offloaded when should"
+
+	log_test "blackhole nexthop objects offload indication"
+
+	ip nexthop del id 10
+	ip nexthop del id 1
+}
+
+nexthop_obj_route_offload_test()
+{
+	# Test offload indication of routes using nexthop objects
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.2 dev $swp1
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
+		dev $swp1
+	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \
+		dev $swp1
+
+	ip route replace 198.51.100.0/24 nhid 1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show 198.51.100.0/24
+	check_err $? "route not marked as offloaded when using valid nexthop"
+
+	ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show 198.51.100.0/24
+	check_err $? "route not marked as offloaded after replacing valid nexthop with a valid one"
+
+	ip nexthop replace id 1 via 192.0.2.4 dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show 198.51.100.0/24
+	check_err $? "route marked as offloaded after replacing valid nexthop with an invalid one"
+
+	ip nexthop replace id 1 via 192.0.2.2 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show 198.51.100.0/24
+	check_err $? "route not marked as offloaded after replacing invalid nexthop with a valid one"
+
+	log_test "routes using nexthop objects offload indication"
+
+	ip route del 198.51.100.0/24
+	ip neigh del 192.0.2.3 dev $swp1
+	ip neigh del 192.0.2.2 dev $swp1
+	ip nexthop del id 1
+
+	simple_if_fini $swp2
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+bridge_locked_port_test()
+{
+	RET=0
+
+	ip link add name br1 up type bridge vlan_filtering 0
+
+	ip link add link $swp1 name $swp1.10 type vlan id 10
+	ip link set dev $swp1.10 master br1
+
+	bridge link set dev $swp1.10 locked on
+	check_fail $? "managed to set locked flag on a VLAN upper"
+
+	ip link set dev $swp1.10 nomaster
+	ip link set dev $swp1 master br1
+
+	bridge link set dev $swp1 locked on
+	check_fail $? "managed to set locked flag on a bridge port that has a VLAN upper"
+
+	ip link del dev $swp1.10
+	bridge link set dev $swp1 locked on
+
+	ip link add link $swp1 name $swp1.10 type vlan id 10
+	check_fail $? "managed to configure a VLAN upper on a locked port"
+
+	log_test "bridge locked port"
+
+	ip link del dev $swp1.10 &> /dev/null
+	ip link del dev br1
+}
+
+devlink_reload_test()
+{
+	# Test that after executing all the above configuration tests, a
+	# devlink reload can be performed without errors
+	RET=0
+
+	devlink_reload
+
+	log_test "devlink reload - last test"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
new file mode 100755
index 000000000000..4aaceb6b2b60
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A driver for the ETS selftest that implements testing in offloaded datapath.
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/sch_ets_core.sh
+source $lib_dir/devlink_lib.sh
+
+ALL_TESTS="
+	ping_ipv4
+	priomap_mode
+	ets_test_strict
+	ets_test_mixed
+	ets_test_dwrr
+"
+
+PARENT="parent 3:3"
+
+switch_create()
+{
+	# Create a bottleneck so that the DWRR process can kick in.
+	tc qdisc replace dev $swp2 root handle 3: tbf rate 1gbit \
+		burst 128K limit 1G
+	defer tc qdisc del dev $swp2 root handle 3:
+
+	ets_switch_create
+
+	# Set the ingress quota high and use the three egress TCs to limit the
+	# amount of traffic that is admitted to the shared buffers. This makes
+	# sure that there is always enough traffic of all types to select from
+	# for the DWRR process.
+	devlink_port_pool_th_save $swp1 0
+	devlink_port_pool_th_set $swp1 0 12
+	defer devlink_port_pool_th_restore $swp1 0
+
+	devlink_tc_bind_pool_th_save $swp1 0 ingress
+	devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12
+	defer devlink_tc_bind_pool_th_restore $swp1 0 ingress
+
+	devlink_port_pool_th_save $swp2 4
+	devlink_port_pool_th_set $swp2 4 12
+	defer devlink_port_pool_th_restore $swp2 4
+
+	devlink_tc_bind_pool_th_save $swp2 7 egress
+	devlink_tc_bind_pool_th_set $swp2 7 egress 4 5
+	defer devlink_tc_bind_pool_th_restore $swp2 7 egress
+
+	devlink_tc_bind_pool_th_save $swp2 6 egress
+	devlink_tc_bind_pool_th_set $swp2 6 egress 4 5
+	defer devlink_tc_bind_pool_th_restore $swp2 6 egress
+
+	devlink_tc_bind_pool_th_save $swp2 5 egress
+	devlink_tc_bind_pool_th_set $swp2 5 egress 4 5
+	defer devlink_tc_bind_pool_th_restore $swp2 5 egress
+
+	# Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet
+	# priorities at $swp1 based on their 802.1p headers. ingress-qos-map is
+	# not offloaded by mlxsw as of this writing, but the mapping used is
+	# 1:1, which is the mapping currently hard-coded by the driver.
+}
+
+# Callback from sch_ets_tests.sh
+collect_stats()
+{
+	local -a streams=("$@")
+	local stream
+
+	# Wait for qdisc counter update so that we don't get it mid-way through.
+	busywait_for_counter 1000 +1 \
+		qdisc_parent_stats_get $swp2 10:$((${streams[0]} + 1)) .bytes \
+		> /dev/null
+
+	for stream in ${streams[@]}; do
+		qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes
+	done
+}
+
+bail_on_lldpad "configure DCB" "configure Qdiscs"
+ets_run
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh
new file mode 100755
index 000000000000..071a33d10c20
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh
@@ -0,0 +1,290 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test qdisc offload indication
+
+
+ALL_TESTS="
+	test_root
+	test_port_tbf
+	test_etsprio
+	test_etsprio_port_tbf
+"
+NUM_NETIFS=1
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+
+check_not_offloaded()
+{
+	local handle=$1; shift
+	local h
+	local offloaded
+
+	h=$(qdisc_stats_get $h1 "$handle" .handle)
+	[[ $h == '"'$handle'"' ]]
+	check_err $? "Qdisc with handle $handle does not exist"
+
+	offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded)
+	[[ $offloaded == true ]]
+	check_fail $? "Qdisc with handle $handle offloaded, but should not be"
+}
+
+check_all_offloaded()
+{
+	local handle=$1; shift
+
+	if [[ ! -z $handle ]]; then
+		local offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded)
+		[[ $offloaded == true ]]
+		check_err $? "Qdisc with handle $handle not offloaded"
+	fi
+
+	local unoffloaded=$(tc q sh dev $h1 invisible |
+				grep -v offloaded |
+				sed s/root/parent\ root/ |
+				cut -d' ' -f 5)
+	[[ -z $unoffloaded ]]
+	check_err $? "Qdiscs with following parents not offloaded: $unoffloaded"
+
+	pre_cleanup
+}
+
+with_ets()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle \
+	   ets bands 8 priomap 7 6 5 4 3 2 1 0
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_prio()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle \
+	   prio bands 8 priomap 7 6 5 4 3 2 1 0
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_red()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle \
+	   red limit 1000000 min 200000 max 300000 probability 0.5 avpkt 1500
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_tbf()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle \
+	   tbf rate 400Mbit burst 128K limit 1M
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_pfifo()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle pfifo limit 100K
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_bfifo()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle bfifo limit 100K
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_drr()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle drr
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_qdiscs()
+{
+	local handle=$1; shift
+	local parent=$1; shift
+	local kind=$1; shift
+	local next_handle=$((handle * 2))
+	local locus;
+
+	if [[ $kind == "--" ]]; then
+		local cmd=$1; shift
+		$cmd $(printf %x: $parent) "$@"
+	else
+		if ((parent == 0)); then
+			locus=root
+		else
+			locus=$(printf "parent %x:1" $parent)
+		fi
+
+		with_$kind $(printf %x: $handle) "$locus" \
+			with_qdiscs $next_handle $handle "$@"
+	fi
+}
+
+get_name()
+{
+	local parent=$1; shift
+	local name=$(echo "" "${@^^}" | tr ' ' -)
+
+	if ((parent != 0)); then
+		kind=$(qdisc_stats_get $h1 $parent: .kind)
+		kind=${kind%\"}
+		kind=${kind#\"}
+		name="-${kind^^}$name"
+	fi
+
+	echo root$name
+}
+
+do_test_offloaded()
+{
+	local handle=$1; shift
+	local parent=$1; shift
+
+	RET=0
+	with_qdiscs $handle $parent "$@" -- check_all_offloaded
+	log_test $(get_name $parent "$@")" offloaded"
+}
+
+do_test_nooffload()
+{
+	local handle=$1; shift
+	local parent=$1; shift
+
+	local name=$(echo "${@^^}" | tr ' ' -)
+	local kind
+
+	RET=0
+	with_qdiscs $handle $parent "$@" -- check_not_offloaded
+	log_test $(get_name $parent "$@")" not offloaded"
+}
+
+do_test_combinations()
+{
+	local handle=$1; shift
+	local parent=$1; shift
+
+	local cont
+	local leaf
+	local fifo
+
+	for cont in "" ets prio; do
+		for leaf in "" red tbf "red tbf" "tbf red"; do
+			for fifo in "" pfifo bfifo; do
+				if [[ -z "$cont$leaf$fifo" ]]; then
+					continue
+				fi
+				do_test_offloaded $handle $parent \
+						  $cont $leaf $fifo
+			done
+		done
+	done
+
+	for cont in ets prio; do
+		for leaf in red tbf; do
+			do_test_nooffload $handle $parent $cont red tbf $leaf
+			do_test_nooffload $handle $parent $cont tbf red $leaf
+		done
+		for leaf in "red red" "tbf tbf"; do
+			do_test_nooffload $handle $parent $cont $leaf
+		done
+	done
+
+	do_test_nooffload $handle $parent drr
+}
+
+test_root()
+{
+	do_test_combinations 1 0
+}
+
+test_port_tbf()
+{
+	with_tbf 1: root \
+		do_test_combinations 8 1
+}
+
+do_test_etsprio()
+{
+	local parent=$1; shift
+	local tbfpfx=$1; shift
+	local cont
+
+	for cont in ets prio; do
+		RET=0
+		with_$cont 8: "$parent" \
+			with_red 11: "parent 8:1" \
+			with_red 12: "parent 8:2" \
+			with_tbf 13: "parent 8:3" \
+			with_tbf 14: "parent 8:4" \
+			check_all_offloaded
+		log_test "root$tbfpfx-ETS-{RED,TBF} offloaded"
+
+		RET=0
+		with_$cont 8: "$parent" \
+			with_red 81: "parent 8:1" \
+				with_tbf 811: "parent 81:1" \
+			with_tbf 84: "parent 8:4" \
+				with_red 841: "parent 84:1" \
+			check_all_offloaded
+		log_test "root$tbfpfx-ETS-{RED-TBF,TBF-RED} offloaded"
+
+		RET=0
+		with_$cont 8: "$parent" \
+			with_red 81: "parent 8:1" \
+				with_tbf 811: "parent 81:1" \
+					with_bfifo 8111: "parent 811:1" \
+			with_tbf 82: "parent 8:2" \
+				with_red 821: "parent 82:1" \
+					with_bfifo 8211: "parent 821:1" \
+			check_all_offloaded
+		log_test "root$tbfpfx-ETS-{RED-TBF-bFIFO,TBF-RED-bFIFO} offloaded"
+	done
+}
+
+test_etsprio()
+{
+	do_test_etsprio root ""
+}
+
+test_etsprio_port_tbf()
+{
+	with_tbf 1: root \
+		do_test_etsprio "parent 1:1" "-TBF"
+}
+
+cleanup()
+{
+	tc qdisc del dev $h1 root &>/dev/null
+}
+
+trap cleanup EXIT
+h1=${NETIFS[p1]}
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
new file mode 100644
index 000000000000..47d2ffcf366e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -0,0 +1,761 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends a >1Gbps stream of traffic from H1, to the switch, which
+# forwards it to a 1Gbps port. This 1Gbps stream is then looped back to the
+# switch and forwarded to the port under test $swp3, which is also 1Gbps.
+#
+# This way, $swp3 should be 100% filled with traffic without any of it spilling
+# to the backlog. Any extra packets sent should almost 1:1 go to backlog. That
+# is what H2 is used for--it sends the extra traffic to create backlog.
+#
+# A RED Qdisc is installed on $swp3. The configuration is such that the minimum
+# and maximum size are 1 byte apart, so there is a very clear border under which
+# no marking or dropping takes place, and above which everything is marked or
+# dropped.
+#
+# The test uses the buffer build-up behavior to test the installed RED.
+#
+# In order to test WRED, $swp3 actually contains RED under PRIO, with two
+# different configurations. Traffic is prioritized using 802.1p and relies on
+# the implicit mlxsw configuration, where packet priority is taken 1:1 from the
+# 802.1p marking.
+#
+# +--------------------------+                     +--------------------------+
+# | H1                       |                     | H2                       |
+# |     + $h1.10             |                     |     + $h2.10             |
+# |     | 192.0.2.1/28       |                     |     | 192.0.2.2/28       |
+# |     |                    |                     |     |                    |
+# |     |         $h1.11 +   |                     |     |         $h2.11 +   |
+# |     |  192.0.2.17/28 |   |                     |     |  192.0.2.18/28 |   |
+# |     |                |   |                     |     |                |   |
+# |     \______    ______/   |                     |     \______    ______/   |
+# |            \ /           |                     |            \ /           |
+# |             + $h1        |                     |             + $h2        |
+# +-------------|------------+                     +-------------|------------+
+#               | >1Gbps                                         |
+# +-------------|------------------------------------------------|------------+
+# | SW          + $swp1                                          + $swp2      |
+# |     _______/ \___________                        ___________/ \_______    |
+# |    /                     \                      /                     \   |
+# |  +-|-----------------+   |                    +-|-----------------+   |   |
+# |  | + $swp1.10        |   |                    | + $swp2.10        |   |   |
+# |  |                   |   |        .-------------+ $swp5.10        |   |   |
+# |  |     BR1_10        |   |        |           |                   |   |   |
+# |  |                   |   |        |           |     BR2_10        |   |   |
+# |  | + $swp2.10        |   |        |           |                   |   |   |
+# |  +-|-----------------+   |        |           | + $swp3.10        |   |   |
+# |    |                     |        |           +-|-----------------+   |   |
+# |    |   +-----------------|-+      |             |   +-----------------|-+ |
+# |    |   |        $swp1.11 + |      |             |   |        $swp2.11 + | |
+# |    |   |                   |      | .-----------------+ $swp5.11        | |
+# |    |   |      BR1_11       |      | |           |   |                   | |
+# |    |   |                   |      | |           |   |      BR2_11       | |
+# |    |   |        $swp2.11 + |      | |           |   |                   | |
+# |    |   +-----------------|-+      | |           |   |        $swp3.11 + | |
+# |    |                     |        | |           |   +-----------------|-+ |
+# |    \_______   ___________/        | |           \___________   _______/   |
+# |            \ /                    \ /                       \ /           |
+# |             + $swp4                + $swp5                   + $swp3      |
+# +-------------|----------------------|-------------------------|------------+
+#               |                      |                         | 1Gbps
+#               \________1Gbps_________/                         |
+#                                   +----------------------------|------------+
+#                                   | H3                         + $h3        |
+#                                   |      _____________________/ \_______    |
+#                                   |     /                               \   |
+#                                   |     |                               |   |
+#                                   |     + $h3.10                 $h3.11 +   |
+#                                   |       192.0.2.3/28    192.0.2.19/28     |
+#                                   +-----------------------------------------+
+
+NUM_NETIFS=8
+CHECK_TC="yes"
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
+
+stop_traffic_sleep()
+{
+	local pid=$1; shift
+
+	# Issuing a kill still leaves a bunch of packets lingering in the
+	# buffers. This traffic then arrives at the point where a follow-up test
+	# is already running, and can confuse the test. Therefore sleep after
+	# stopping traffic to flush any leftover packets.
+	stop_traffic "$pid"
+	sleep 1
+}
+
+ipaddr()
+{
+	local host=$1; shift
+	local vlan=$1; shift
+
+	echo 192.0.2.$((16 * (vlan - 10) + host))
+}
+
+host_create()
+{
+	local dev=$1; shift
+	local host=$1; shift
+
+	adf_simple_if_init $dev
+
+	mtu_set $dev 10000
+	defer mtu_restore $dev
+
+	vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
+	defer vlan_destroy $dev 10
+	ip link set dev $dev.10 type vlan egress 0:0
+
+	vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
+	defer vlan_destroy $dev 11
+	ip link set dev $dev.11 type vlan egress 0:1
+}
+
+h1_create()
+{
+	host_create $h1 1
+}
+
+h2_create()
+{
+	host_create $h2 2
+
+	tc qdisc add dev $h2 clsact
+	defer tc qdisc del dev $h2 clsact
+
+	# Some of the tests in this suite use multicast traffic. As this traffic
+	# enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
+	# e.g. traffic ingressing through $swp2 is flooded to $swp3 (the
+	# intended destination) and $swp5 (which is intended as ingress for
+	# another stream of traffic).
+	#
+	# This is generally not a problem, but if the $swp5 throughput is lower
+	# than $swp2 throughput, there will be a build-up at $swp5. That may
+	# cause packets to fail to queue up at $swp3 due to shared buffer
+	# quotas, and the test to spuriously fail.
+	#
+	# Prevent this by adding a shaper which limits the traffic in $h2 to
+	# 1Gbps.
+
+	tc qdisc replace dev $h2 root handle 10: tbf rate 200mbit \
+		burst 128K limit 1G
+	defer tc qdisc del dev $h2 root handle 10:
+}
+
+h3_create()
+{
+	host_create $h3 3
+}
+
+switch_create()
+{
+	local intf
+	local vlan
+
+	ip link add dev br1_10 type bridge
+	defer ip link del dev br1_10
+
+	ip link add dev br1_11 type bridge
+	defer ip link del dev br1_11
+
+	ip link add dev br2_10 type bridge
+	defer ip link del dev br2_10
+
+	ip link add dev br2_11 type bridge
+	defer ip link del dev br2_11
+
+	for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do
+		ip link set dev $intf up
+		defer ip link set dev $intf down
+
+		mtu_set $intf 10000
+		defer mtu_restore $intf
+	done
+
+	for intf in $swp1 $swp4; do
+		for vlan in 10 11; do
+			vlan_create $intf $vlan
+			defer vlan_destroy $intf $vlan
+
+			ip link set dev $intf.$vlan master br1_$vlan
+			defer ip link set dev $intf.$vlan nomaster
+
+			ip link set dev $intf.$vlan up
+			defer ip link set dev $intf.$vlan up
+		done
+	done
+
+	for intf in $swp2 $swp3 $swp5; do
+		for vlan in 10 11; do
+			vlan_create $intf $vlan
+			defer vlan_destroy $intf $vlan
+
+			ip link set dev $intf.$vlan master br2_$vlan
+			defer ip link set dev $intf.$vlan nomaster
+
+			ip link set dev $intf.$vlan up
+			defer ip link set dev $intf.$vlan up
+		done
+	done
+
+	ip link set dev $swp4.10 type vlan egress 0:0
+	ip link set dev $swp4.11 type vlan egress 0:1
+	for intf in $swp1 $swp2 $swp5; do
+		for vlan in 10 11; do
+			ip link set dev $intf.$vlan type vlan ingress 0:0 1:1
+		done
+	done
+
+	for intf in $swp3 $swp4; do
+		tc qdisc replace dev $intf root handle 1: tbf rate 200mbit \
+			burst 128K limit 1G
+		defer tc qdisc del dev $intf root handle 1:
+	done
+
+	ip link set dev br1_10 up
+	defer ip link set dev br1_10 down
+
+	ip link set dev br1_11 up
+	defer ip link set dev br1_11 down
+
+	ip link set dev br2_10 up
+	defer ip link set dev br2_10 down
+
+	ip link set dev br2_11 up
+	defer ip link set dev br2_11 down
+
+	local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
+	devlink_port_pool_th_save $swp3 8
+	devlink_port_pool_th_set $swp3 8 $size
+	defer devlink_port_pool_th_restore $swp3 8
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	swp4=${NETIFS[p7]}
+	swp5=${NETIFS[p8]}
+
+	h3_mac=$(mac_get $h3)
+
+	adf_vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+ping_ipv4()
+{
+	ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10"
+	ping_test $h1.11 $(ipaddr 3 11) " from host 1, vlan 11"
+	ping_test $h2.10 $(ipaddr 3 10) " from host 2, vlan 10"
+	ping_test $h2.11 $(ipaddr 3 11) " from host 2, vlan 11"
+}
+
+get_tc()
+{
+	local vlan=$1; shift
+
+	echo $((vlan - 10))
+}
+
+get_qdisc_handle()
+{
+	local vlan=$1; shift
+
+	local tc=$(get_tc $vlan)
+	local band=$((8 - tc))
+
+	# Handle is 107: for TC1, 108: for TC0.
+	echo "10$band:"
+}
+
+get_qdisc_backlog()
+{
+	local vlan=$1; shift
+
+	qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .backlog
+}
+
+get_mc_transmit_queue()
+{
+	local vlan=$1; shift
+
+	local tc=$(($(get_tc $vlan) + 8))
+	ethtool_stats_get $swp3 tc_transmit_queue_tc_$tc
+}
+
+get_nmarked()
+{
+	local vlan=$1; shift
+
+	ethtool_stats_get $swp3 ecn_marked
+}
+
+get_qdisc_nmarked()
+{
+	local vlan=$1; shift
+
+	busywait_for_counter 1100 +1 \
+		qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .marked
+}
+
+get_qdisc_npackets()
+{
+	local vlan=$1; shift
+
+	busywait_for_counter 1100 +1 \
+		qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
+}
+
+send_packets()
+{
+	local vlan=$1; shift
+	local proto=$1; shift
+	local pkts=$1; shift
+
+	$MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
+	    -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
+	    -t $proto -q -c $pkts "$@"
+}
+
+# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
+# success. After 10 failed attempts it bails out and returns 1. It dumps the
+# backlog size to stdout.
+build_backlog()
+{
+	local vlan=$1; shift
+	local size=$1; shift
+	local proto=$1; shift
+
+	local tc=$((vlan - 10))
+	local band=$((8 - tc))
+	local cur=-1
+	local i=0
+
+	while :; do
+		sleep 1
+		local cur=$(busywait 1100 until_counter_is "> $cur" \
+					    get_qdisc_backlog $vlan)
+		local diff=$((size - cur))
+		local pkts=$(((diff + 7999) / 8000))
+
+		if ((cur >= size)); then
+			echo $cur
+			return 0
+		elif ((i++ > 10)); then
+			echo $cur
+			return 1
+		fi
+
+		send_packets $vlan $proto $pkts "$@"
+	done
+}
+
+check_marking()
+{
+	local get_nmarked=$1; shift
+	local vlan=$1; shift
+	local cond=$1; shift
+
+	local npackets_0=$(get_qdisc_npackets $vlan)
+	local nmarked_0=$($get_nmarked $vlan)
+	sleep 5
+	local npackets_1=$(get_qdisc_npackets $vlan)
+	local nmarked_1=$($get_nmarked $vlan)
+
+	local nmarked_d=$((nmarked_1 - nmarked_0))
+	local npackets_d=$((npackets_1 - npackets_0))
+	local pct=$((100 * nmarked_d / npackets_d))
+
+	echo $pct
+	((pct $cond))
+}
+
+ecn_test_common()
+{
+	local name=$1; shift
+	local get_nmarked=$1; shift
+	local vlan=$1; shift
+	local limit=$1; shift
+	local backlog
+	local pct
+
+	# Build the below-the-limit backlog using UDP. We could use TCP just
+	# fine, but this way we get a proof that UDP is accepted when queue
+	# length is below the limit. The main stream is using TCP, and if the
+	# limit is misconfigured, we would see this traffic being ECN marked.
+	RET=0
+	backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_marking "$get_nmarked" $vlan "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+	log_test "TC $((vlan - 10)): $name backlog < limit"
+
+	# Now push TCP, because non-TCP traffic would be early-dropped after the
+	# backlog crosses the limit, and we want to make sure that the backlog
+	# is above the limit.
+	RET=0
+	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_marking "$get_nmarked" $vlan ">= 95")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
+	log_test "TC $((vlan - 10)): $name backlog > limit"
+}
+
+__do_ecn_test()
+{
+	local get_nmarked=$1; shift
+	local vlan=$1; shift
+	local limit=$1; shift
+	local name=${1-ECN}; shift
+
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+			  $h3_mac tos=0x01
+	defer stop_traffic_sleep $!
+	sleep 1
+
+	ecn_test_common "$name" "$get_nmarked" $vlan $limit
+
+	# Up there we saw that UDP gets accepted when backlog is below the
+	# limit. Now that it is above, it should all get dropped, and backlog
+	# building should fail.
+	RET=0
+	build_backlog $vlan $((2 * limit)) udp >/dev/null
+	check_fail $? "UDP traffic went into backlog instead of being early-dropped"
+	log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped"
+}
+
+do_ecn_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+
+	in_defer_scope \
+		__do_ecn_test get_nmarked "$vlan" "$limit"
+}
+
+do_ecn_test_perband()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+
+	mlxsw_only_on_spectrum 3+ || return
+	in_defer_scope \
+		__do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN"
+}
+
+__do_ecn_nodrop_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local name="ECN nodrop"
+
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+			  $h3_mac tos=0x01
+	defer stop_traffic_sleep $!
+	sleep 1
+
+	ecn_test_common "$name" get_nmarked $vlan $limit
+
+	# Up there we saw that UDP gets accepted when backlog is below the
+	# limit. Now that it is above, in nodrop mode, make sure it goes to
+	# backlog as well.
+	RET=0
+	build_backlog $vlan $((2 * limit)) udp >/dev/null
+	check_err $? "UDP traffic was early-dropped instead of getting into backlog"
+	log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped"
+}
+
+do_ecn_nodrop_test()
+{
+	in_defer_scope \
+		__do_ecn_nodrop_test "$@"
+}
+
+__do_red_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local backlog
+	local pct
+
+	# Use ECN-capable TCP to verify there's no marking even though the queue
+	# is above limit.
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+			  $h3_mac tos=0x01
+	defer stop_traffic_sleep $!
+
+	# Pushing below the queue limit should work.
+	RET=0
+	backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_marking get_nmarked $vlan "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+	log_test "TC $((vlan - 10)): RED backlog < limit"
+
+	# Pushing above should not.
+	RET=0
+	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
+	check_fail $? "Traffic went into backlog instead of being early-dropped"
+	pct=$(check_marking get_nmarked $vlan "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+	backlog=$(get_qdisc_backlog $vlan)
+	local diff=$((limit - backlog))
+	pct=$((100 * diff / limit))
+	((-15 <= pct && pct <= 15))
+	check_err $? "backlog $backlog / $limit expected <= 15% distance"
+	log_test "TC $((vlan - 10)): RED backlog > limit"
+}
+
+do_red_test()
+{
+	in_defer_scope \
+		__do_red_test "$@"
+}
+
+__do_mc_backlog_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local backlog
+	local pct
+
+	RET=0
+
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc
+	defer stop_traffic_sleep $!
+
+	start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc
+	defer stop_traffic_sleep $!
+
+	qbl=$(busywait 5000 until_counter_is ">= 500000" \
+		       get_qdisc_backlog $vlan)
+	check_err $? "Could not build MC backlog"
+
+	# Verify that we actually see the backlog on BUM TC. Do a busywait as
+	# well, performance blips might cause false fail.
+	local ebl
+	ebl=$(busywait 5000 until_counter_is ">= 500000" \
+		       get_mc_transmit_queue $vlan)
+	check_err $? "MC backlog reported by qdisc not visible in ethtool"
+
+	log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
+}
+
+do_mc_backlog_test()
+{
+	in_defer_scope \
+		__do_mc_backlog_test "$@"
+}
+
+__do_mark_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local subtest=$1; shift
+	local fetch_counter=$1; shift
+	local should_fail=$1; shift
+	local base
+
+	mlxsw_only_on_spectrum 2+ || return
+
+	RET=0
+
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+			  $h3_mac tos=0x01
+	defer stop_traffic_sleep $!
+
+	# Create a bit of a backlog and observe no mirroring due to marks.
+	qevent_rule_install_$subtest
+
+	build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01 >/dev/null
+
+	base=$($fetch_counter)
+	count=$(busywait 1100 until_counter_is ">= $((base + 1))" \
+		$fetch_counter)
+	check_fail $? "Spurious packets ($base -> $count) observed without buffer pressure"
+
+	# Above limit, everything should be mirrored, we should see lots of
+	# packets.
+	build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01 >/dev/null
+	busywait_for_counter 1100 +2500 \
+		 $fetch_counter > /dev/null
+	check_err_fail "$should_fail" $? "ECN-marked packets $subtest'd"
+
+	# When the rule is uninstalled, there should be no mirroring.
+	qevent_rule_uninstall_$subtest
+	busywait_for_counter 1100 +10 \
+		 $fetch_counter > /dev/null
+	check_fail $? "Spurious packets observed after uninstall"
+
+	if ((should_fail)); then
+		log_test "TC $((vlan - 10)): marked packets not $subtest'd"
+	else
+		log_test "TC $((vlan - 10)): marked packets $subtest'd"
+	fi
+}
+
+do_mark_test()
+{
+	in_defer_scope \
+		__do_mark_test "$@"
+}
+
+__do_drop_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local trigger=$1; shift
+	local subtest=$1; shift
+	local fetch_counter=$1; shift
+	local base
+	local now
+
+	mlxsw_only_on_spectrum 2+ || return
+
+	RET=0
+
+	start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac
+	defer stop_traffic_sleep $!
+
+	# Create a bit of a backlog and observe no mirroring due to drops.
+	qevent_rule_install_$subtest
+	base=$($fetch_counter)
+
+	build_backlog $vlan $((2 * limit / 3)) udp >/dev/null
+
+	busywait 1100 until_counter_is ">= $((base + 1))" $fetch_counter >/dev/null
+	check_fail $? "Spurious packets observed without buffer pressure"
+
+	# Push to the queue until it's at the limit. The configured limit is
+	# rounded by the qdisc and then by the driver, so this is the best we
+	# can do to get to the real limit of the system.
+	build_backlog $vlan $((3 * limit / 2)) udp >/dev/null
+
+	base=$($fetch_counter)
+	send_packets $vlan udp 100
+
+	now=$(busywait 1100 until_counter_is ">= $((base + 95))" $fetch_counter)
+	check_err $? "${trigger}ped packets not observed: 100 expected, $((now - base)) seen"
+
+	# When no extra traffic is injected, there should be no mirroring.
+	busywait 1100 until_counter_is ">= $((base + 110))" \
+		 $fetch_counter >/dev/null
+	check_fail $? "Spurious packets observed"
+
+	# When the rule is uninstalled, there should be no mirroring.
+	qevent_rule_uninstall_$subtest
+	send_packets $vlan udp 100
+	now=$(busywait 1100 until_counter_is ">= $((base + 110))" \
+		       $fetch_counter)
+	check_fail $? "$((now - base)) spurious packets observed after uninstall"
+
+	log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd"
+}
+
+do_drop_test()
+{
+	in_defer_scope \
+		__do_drop_test "$@"
+}
+
+qevent_rule_install_mirror()
+{
+	tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+	   action mirred egress mirror dev $swp2 hw_stats disabled
+}
+
+qevent_rule_uninstall_mirror()
+{
+	tc filter del block 10 pref 1234 handle 102 matchall
+}
+
+qevent_counter_fetch_mirror()
+{
+	tc_rule_handle_stats_get "dev $h2 ingress" 101
+}
+
+do_drop_mirror_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local qevent_name=$1; shift
+
+	tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
+	   flower skip_sw ip_proto udp \
+	   action drop
+
+	do_drop_test "$vlan" "$limit" "$qevent_name" mirror \
+		     qevent_counter_fetch_mirror
+
+	tc filter del dev $h2 ingress pref 1 handle 101 flower
+}
+
+do_mark_mirror_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+
+	tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
+	   flower skip_sw ip_proto tcp \
+	   action drop
+
+	do_mark_test "$vlan" "$limit" mirror \
+		     qevent_counter_fetch_mirror \
+		     $(: should_fail=)0
+
+	tc filter del dev $h2 ingress pref 1 handle 101 flower
+}
+
+qevent_rule_install_trap()
+{
+	tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+	   action trap hw_stats disabled
+}
+
+qevent_rule_uninstall_trap()
+{
+	tc filter del block 10 pref 1234 handle 102 matchall
+}
+
+qevent_counter_fetch_trap()
+{
+	local trap_name=$1; shift
+
+	devlink_trap_rx_packets_get "$trap_name"
+}
+
+do_drop_trap_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local trap_name=$1; shift
+
+	do_drop_test "$vlan" "$limit" "$trap_name" trap \
+		     "qevent_counter_fetch_trap $trap_name"
+}
+
+qevent_rule_install_trap_fwd()
+{
+	tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+	   action trap_fwd hw_stats disabled
+}
+
+qevent_rule_uninstall_trap_fwd()
+{
+	tc filter del block 10 pref 1234 handle 102 matchall
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
new file mode 100755
index 000000000000..8902a115d9cd
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ecn_test
+	ecn_test_perband
+	ecn_nodrop_test
+	red_test
+	mc_backlog_test
+	red_mirror_test
+	red_trap_test
+	ecn_mirror_test
+"
+: ${QDISC:=ets}
+source sch_red_core.sh
+
+# do_ecn_test first build 2/3 of the requested backlog and expects no marking,
+# and then builds 3/2 of it and does expect marking. The values of $BACKLOG1 and
+# $BACKLOG2 are far enough not to overlap, so that we can assume that if we do
+# see (do not see) marking, it is actually due to the configuration of that one
+# TC, and not due to configuration of the other TC leaking over.
+BACKLOG1=400000
+BACKLOG2=1000000
+
+install_root_qdisc()
+{
+	tc qdisc add dev $swp3 parent 1: handle 10: $QDISC \
+	   bands 8 priomap 7 6 5 4 3 2 1 0
+}
+
+install_qdisc_tc0()
+{
+	local -a args=("$@")
+
+	tc qdisc add dev $swp3 parent 10:8 handle 108: red \
+	   limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \
+	   probability 1.0 avpkt 8000 burst 51 "${args[@]}"
+}
+
+install_qdisc_tc1()
+{
+	local -a args=("$@")
+
+	tc qdisc add dev $swp3 parent 10:7 handle 107: red \
+	   limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \
+	   probability 1.0 avpkt 8000 burst 126 "${args[@]}"
+}
+
+install_qdisc()
+{
+	install_root_qdisc
+	install_qdisc_tc0 "$@"
+	install_qdisc_tc1 "$@"
+	sleep 1
+}
+
+uninstall_qdisc_tc0()
+{
+	tc qdisc del dev $swp3 parent 10:8
+}
+
+uninstall_qdisc_tc1()
+{
+	tc qdisc del dev $swp3 parent 10:7
+}
+
+uninstall_root_qdisc()
+{
+	tc qdisc del dev $swp3 parent 1:
+}
+
+uninstall_qdisc()
+{
+	uninstall_qdisc_tc0
+	uninstall_qdisc_tc1
+	uninstall_root_qdisc
+}
+
+ecn_test()
+{
+	install_qdisc ecn
+	defer uninstall_qdisc
+
+	do_ecn_test 10 $BACKLOG1
+	do_ecn_test 11 $BACKLOG2
+}
+
+ecn_test_perband()
+{
+	install_qdisc ecn
+	defer uninstall_qdisc
+
+	do_ecn_test_perband 10 $BACKLOG1
+	do_ecn_test_perband 11 $BACKLOG2
+}
+
+ecn_nodrop_test()
+{
+	install_qdisc ecn nodrop
+	defer uninstall_qdisc
+
+	do_ecn_nodrop_test 10 $BACKLOG1
+	do_ecn_nodrop_test 11 $BACKLOG2
+}
+
+red_test()
+{
+	install_qdisc
+	defer uninstall_qdisc
+
+	# Make sure that we get the non-zero value if there is any.
+	local cur=$(busywait 1100 until_counter_is "> 0" \
+			    qdisc_stats_get $swp3 10: .backlog)
+	(( cur == 0 ))
+	check_err $? "backlog of $cur observed on non-busy qdisc"
+	log_test "$QDISC backlog properly cleaned"
+
+	do_red_test 10 $BACKLOG1
+	do_red_test 11 $BACKLOG2
+}
+
+mc_backlog_test()
+{
+	install_qdisc
+	defer uninstall_qdisc
+
+	# Note that the backlog numbers here do not correspond to RED
+	# configuration, but are arbitrary.
+	do_mc_backlog_test 10 $BACKLOG1
+	do_mc_backlog_test 11 $BACKLOG2
+}
+
+red_mirror_test()
+{
+	install_qdisc qevent early_drop block 10
+	defer uninstall_qdisc
+
+	do_drop_mirror_test 10 $BACKLOG1 early_drop
+	do_drop_mirror_test 11 $BACKLOG2 early_drop
+}
+
+red_trap_test()
+{
+	install_qdisc qevent early_drop block 10
+	defer uninstall_qdisc
+
+	do_drop_trap_test 10 $BACKLOG1 early_drop
+	do_drop_trap_test 11 $BACKLOG2 early_drop
+}
+
+ecn_mirror_test()
+{
+	install_qdisc ecn qevent mark block 10
+	defer uninstall_qdisc
+
+	do_mark_mirror_test 10 $BACKLOG1
+	do_mark_mirror_test 11 $BACKLOG2
+}
+
+bail_on_lldpad "configure DCB" "configure Qdiscs"
+
+trap cleanup EXIT
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh
new file mode 100755
index 000000000000..76820a0e9a1b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+QDISC=prio
+source sch_red_ets.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
new file mode 100755
index 000000000000..e9043771787b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ecn_test
+	ecn_test_perband
+	ecn_nodrop_test
+	red_test
+	mc_backlog_test
+	red_mirror_test
+"
+source sch_red_core.sh
+
+BACKLOG=300000
+
+install_qdisc()
+{
+	local -a args=("$@")
+
+	tc qdisc add dev $swp3 parent 1: handle 108: red \
+	   limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \
+	   probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+	sleep 1
+}
+
+uninstall_qdisc()
+{
+	tc qdisc del dev $swp3 parent 1:
+}
+
+ecn_test()
+{
+	install_qdisc ecn
+	defer uninstall_qdisc
+
+	do_ecn_test 10 $BACKLOG
+}
+
+ecn_test_perband()
+{
+	install_qdisc ecn
+	defer uninstall_qdisc
+
+	do_ecn_test_perband 10 $BACKLOG
+}
+
+ecn_nodrop_test()
+{
+	install_qdisc ecn nodrop
+	defer uninstall_qdisc
+
+	do_ecn_nodrop_test 10 $BACKLOG
+}
+
+red_test()
+{
+	install_qdisc
+	defer uninstall_qdisc
+
+	do_red_test 10 $BACKLOG
+}
+
+mc_backlog_test()
+{
+	install_qdisc
+	defer uninstall_qdisc
+
+	# Note that the backlog value here does not correspond to RED
+	# configuration, but is arbitrary.
+	do_mc_backlog_test 10 $BACKLOG
+}
+
+red_mirror_test()
+{
+	install_qdisc qevent early_drop block 10
+	defer uninstall_qdisc
+
+	do_drop_mirror_test 10 $BACKLOG
+}
+
+bail_on_lldpad "configure DCB" "configure Qdiscs"
+
+trap cleanup EXIT
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
new file mode 100755
index 000000000000..ecc3664376b3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+sch_tbf_pre_hook()
+{
+	bail_on_lldpad "configure DCB" "configure Qdiscs"
+}
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+TCFLAGS=skip_sw
+source $lib_dir/sch_tbf_ets.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
new file mode 100755
index 000000000000..2e0a4efb1703
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+sch_tbf_pre_hook()
+{
+	bail_on_lldpad "configure DCB" "configure Qdiscs"
+}
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+TCFLAGS=skip_sw
+source $lib_dir/sch_tbf_prio.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
new file mode 100755
index 000000000000..6679a338dfc4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+sch_tbf_pre_hook()
+{
+	bail_on_lldpad "configure DCB" "configure Qdiscs"
+}
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+TCFLAGS=skip_sw
+source $lib_dir/sch_tbf_root.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
new file mode 100755
index 000000000000..c068e6c2a580
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -0,0 +1,243 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	port_pool_test
+	port_tc_ip_test
+	port_tc_arp_test
+"
+
+NUM_NETIFS=2
+source ../../../net/forwarding/lib.sh
+source ../../../net/forwarding/devlink_lib.sh
+source mlxsw_lib.sh
+
+SB_POOL_ING=0
+SB_POOL_EGR_CPU=10
+
+SB_ITC_CPU_IP=2
+SB_ITC_CPU_ARP=2
+SB_ITC=0
+
+h1_create()
+{
+	simple_if_init $h1 192.0.1.1/24
+	tc qdisc add dev $h1 clsact
+
+	# Add egress filter on $h1 that will guarantee that the packet sent,
+	# will be the only packet being passed to the device.
+	tc filter add dev $h1 egress pref 2 handle 102 matchall action drop
+}
+
+h1_destroy()
+{
+	tc filter del dev $h1 egress pref 2 handle 102 matchall action drop
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.1.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.1.2/24
+	tc qdisc add dev $h2 clsact
+
+	# Add egress filter on $h2 that will guarantee that the packet sent,
+	# will be the only packet being passed to the device.
+	tc filter add dev $h2 egress pref 1 handle 101 matchall action drop
+}
+
+h2_destroy()
+{
+	tc filter del dev $h2 egress pref 1 handle 101 matchall action drop
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.1.2/24
+}
+
+sb_occ_pool_check()
+{
+	local dl_port=$1; shift
+	local pool=$1; shift
+	local exp_max_occ=$1
+	local max_occ
+	local err=0
+
+	max_occ=$(devlink sb -j occupancy show $dl_port \
+		  | jq -e ".[][][\"pool\"][\"$pool\"][\"max\"]")
+
+	if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+		err=1
+	fi
+
+	echo $max_occ
+	return $err
+}
+
+sb_occ_itc_check()
+{
+	local dl_port=$1; shift
+	local itc=$1; shift
+	local exp_max_occ=$1
+	local max_occ
+	local err=0
+
+	max_occ=$(devlink sb -j occupancy show $dl_port \
+		  | jq -e ".[][][\"itc\"][\"$itc\"][\"max\"]")
+
+	if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+		err=1
+	fi
+
+	echo $max_occ
+	return $err
+}
+
+sb_occ_etc_check()
+{
+	local dl_port=$1; shift
+	local etc=$1; shift
+	local exp_max_occ=$1; shift
+	local max_occ
+	local err=0
+
+	max_occ=$(devlink sb -j occupancy show $dl_port \
+		  | jq -e ".[][][\"etc\"][\"$etc\"][\"max\"]")
+
+	if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+		err=1
+	fi
+
+	echo $max_occ
+	return $err
+}
+
+port_pool_test()
+{
+	local exp_max_occ=$(devlink_cell_size_get)
+	local max_occ
+
+	tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \
+		src_mac $h1mac dst_mac $h2mac \
+		src_ip 192.0.1.1 dst_ip 192.0.1.2 \
+		action pass
+
+	devlink sb occupancy clearmax $DEVLINK_DEV
+
+	$MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+		-t ip -q
+
+	devlink sb occupancy snapshot $DEVLINK_DEV
+
+	RET=0
+	max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ)
+	check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h2) ingress pool"
+
+	RET=0
+	max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ)
+	check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "CPU port's egress pool"
+
+	tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \
+		src_mac $h1mac dst_mac $h2mac \
+		src_ip 192.0.1.1 dst_ip 192.0.1.2 \
+		action pass
+}
+
+port_tc_ip_test()
+{
+	local exp_max_occ=$(devlink_cell_size_get)
+	local max_occ
+
+	tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \
+		src_mac $h1mac dst_mac $h2mac \
+		src_ip 192.0.1.1 dst_ip 192.0.1.2 \
+		action pass
+
+	devlink sb occupancy clearmax $DEVLINK_DEV
+
+	$MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+		-t ip -q
+
+	devlink sb occupancy snapshot $DEVLINK_DEV
+
+	RET=0
+	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h2) ingress TC - IP packet"
+
+	RET=0
+	max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ)
+	check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "CPU port's egress TC - IP packet"
+
+	tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \
+		src_mac $h1mac dst_mac $h2mac \
+		src_ip 192.0.1.1 dst_ip 192.0.1.2 \
+		action pass
+}
+
+port_tc_arp_test()
+{
+	local exp_max_occ=$(devlink_cell_size_get)
+	local max_occ
+
+	tc filter add dev $h1 egress protocol arp pref 1 handle 101 flower \
+		src_mac $h1mac action pass
+
+	devlink sb occupancy clearmax $DEVLINK_DEV
+
+	$MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q
+
+	devlink sb occupancy snapshot $DEVLINK_DEV
+
+	RET=0
+	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h2) ingress TC - ARP packet"
+
+	RET=0
+	max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ)
+	check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "CPU port's egress TC - ARP packet"
+
+	tc filter del dev $h1 egress protocol arp pref 1 handle 101 flower \
+		src_mac $h1mac action pass
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	dl_port1=$(devlink_port_by_netdev $h1)
+	dl_port2=$(devlink_port_by_netdev $h2)
+
+	cpu_dl_port=$(devlink_cpu_port_get)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
new file mode 100755
index 000000000000..2223337eed0c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
@@ -0,0 +1,416 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import json as j
+import random
+
+
+class SkipTest(Exception):
+    pass
+
+
+class RandomValuePicker:
+    """
+    Class for storing shared buffer configuration. Can handle 3 different
+    objects, pool, tcbind and portpool. Provide an interface to get random
+    values for a specific object type as the follow:
+      1. Pool:
+         - random size
+
+      2. TcBind:
+         - random pool number
+         - random threshold
+
+      3. PortPool:
+         - random threshold
+    """
+    def __init__(self, pools):
+        self._pools = []
+        for pool in pools:
+            self._pools.append(pool)
+
+    def _cell_size(self):
+        return self._pools[0]["cell_size"]
+
+    def _get_static_size(self, th):
+        # For threshold of 16, this works out to be about 12MB on Spectrum-1,
+        # and about 17MB on Spectrum-2.
+        return th * 8000 * self._cell_size()
+
+    def _get_size(self):
+        return self._get_static_size(16)
+
+    def _get_thtype(self):
+        return "static"
+
+    def _get_th(self, pool):
+        # Threshold value could be any integer between 3 to 16
+        th = random.randint(3, 16)
+        if pool["thtype"] == "dynamic":
+            return th
+        else:
+            return self._get_static_size(th)
+
+    def _get_pool(self, direction):
+        ing_pools = []
+        egr_pools = []
+        for pool in self._pools:
+            if pool["type"] == "ingress":
+                ing_pools.append(pool)
+            else:
+                egr_pools.append(pool)
+        if direction == "ingress":
+            arr = ing_pools
+        else:
+            arr = egr_pools
+        return arr[random.randint(0, len(arr) - 1)]
+
+    def get_value(self, objid):
+        if isinstance(objid, Pool):
+            if objid["pool"] in [4, 8, 9, 10]:
+                # The threshold type of pools 4, 8, 9 and 10 cannot be changed
+                raise SkipTest()
+            else:
+                return (self._get_size(), self._get_thtype())
+        if isinstance(objid, TcBind):
+            if objid["tc"] >= 8:
+                # Multicast TCs cannot be changed
+                raise SkipTest()
+            else:
+                pool = self._get_pool(objid["type"])
+                th = self._get_th(pool)
+                pool_n = pool["pool"]
+                return (pool_n, th)
+        if isinstance(objid, PortPool):
+            pool_n = objid["pool"]
+            pool = self._pools[pool_n]
+            assert pool["pool"] == pool_n
+            th = self._get_th(pool)
+            return (th,)
+
+
+class RecordValuePickerException(Exception):
+    pass
+
+
+class RecordValuePicker:
+    """
+    Class for storing shared buffer configuration. Can handle 2 different
+    objects, pool and tcbind. Provide an interface to get the stored values per
+    object type.
+    """
+    def __init__(self, objlist):
+        self._recs = []
+        for item in objlist:
+            self._recs.append({"objid": item, "value": item.var_tuple()})
+
+    def get_value(self, objid):
+        if isinstance(objid, Pool) and objid["pool"] in [4, 8, 9, 10]:
+            # The threshold type of pools 4, 8, 9 and 10 cannot be changed
+            raise SkipTest()
+        if isinstance(objid, TcBind) and objid["tc"] >= 8:
+            # Multicast TCs cannot be changed
+            raise SkipTest()
+        for rec in self._recs:
+            if rec["objid"].weak_eq(objid):
+                return rec["value"]
+        raise RecordValuePickerException()
+
+
+def run_cmd(cmd, json=False):
+    out = subprocess.check_output(cmd, shell=True)
+    if json:
+        return j.loads(out)
+    return out
+
+
+def run_json_cmd(cmd):
+    return run_cmd(cmd, json=True)
+
+
+def log_test(test_name, err_msg=None):
+    if err_msg:
+        print("\t%s" % err_msg)
+        print("TEST: %-80s  [FAIL]" % test_name)
+    else:
+        print("TEST: %-80s  [ OK ]" % test_name)
+
+
+class CommonItem(dict):
+    varitems = []
+
+    def var_tuple(self):
+        ret = []
+        self.varitems.sort()
+        for key in self.varitems:
+            ret.append(self[key])
+        return tuple(ret)
+
+    def weak_eq(self, other):
+        for key in self:
+            if key in self.varitems:
+                continue
+            if self[key] != other[key]:
+                return False
+        return True
+
+
+class CommonList(list):
+    def get_by(self, by_obj):
+        for item in self:
+            if item.weak_eq(by_obj):
+                return item
+        return None
+
+    def del_by(self, by_obj):
+        for item in self:
+            if item.weak_eq(by_obj):
+                self.remove(item)
+
+
+class Pool(CommonItem):
+    varitems = ["size", "thtype"]
+
+    def dl_set(self, dlname, size, thtype):
+        run_cmd("devlink sb pool set {} sb {} pool {} size {} thtype {}".format(dlname, self["sb"],
+                                                                                self["pool"],
+                                                                                size, thtype))
+
+
+class PoolList(CommonList):
+    pass
+
+
+def get_pools(dlname, direction=None):
+    d = run_json_cmd("devlink sb pool show -j")
+    pools = PoolList()
+    for pooldict in d["pool"][dlname]:
+        if not direction or direction == pooldict["type"]:
+            pools.append(Pool(pooldict))
+    return pools
+
+
+def do_check_pools(dlname, pools, vp):
+    for pool in pools:
+        pre_pools = get_pools(dlname)
+        try:
+            (size, thtype) = vp.get_value(pool)
+        except SkipTest:
+            continue
+        pool.dl_set(dlname, size, thtype)
+        post_pools = get_pools(dlname)
+        pool = post_pools.get_by(pool)
+
+        err_msg = None
+        if pool["size"] != size:
+            err_msg = "Incorrect pool size (got {}, expected {})".format(pool["size"], size)
+        if pool["thtype"] != thtype:
+            err_msg = "Incorrect pool threshold type (got {}, expected {})".format(pool["thtype"], thtype)
+
+        pre_pools.del_by(pool)
+        post_pools.del_by(pool)
+        if pre_pools != post_pools:
+            err_msg = "Other pool setup changed as well"
+        log_test("pool {} of sb {} set verification".format(pool["pool"],
+                                                            pool["sb"]), err_msg)
+
+
+def check_pools(dlname, pools):
+    # Save defaults
+    record_vp = RecordValuePicker(pools)
+
+    # For each pool, set random size and static threshold type
+    do_check_pools(dlname, pools, RandomValuePicker(pools))
+
+    # Restore defaults
+    do_check_pools(dlname, pools, record_vp)
+
+
+class TcBind(CommonItem):
+    varitems = ["pool", "threshold"]
+
+    def __init__(self, port, d):
+        super(TcBind, self).__init__(d)
+        self["dlportname"] = port.name
+
+    def dl_set(self, pool, th):
+        run_cmd("devlink sb tc bind set {} sb {} tc {} type {} pool {} th {}".format(self["dlportname"],
+                                                                                     self["sb"],
+                                                                                     self["tc"],
+                                                                                     self["type"],
+                                                                                     pool, th))
+
+
+class TcBindList(CommonList):
+    pass
+
+
+def get_tcbinds(ports, verify_existence=False):
+    d = run_json_cmd("devlink sb tc bind show -j -n")
+    tcbinds = TcBindList()
+    for port in ports:
+        err_msg = None
+        if port.name not in d["tc_bind"] or len(d["tc_bind"][port.name]) == 0:
+            err_msg = "No tc bind for port"
+        else:
+            for tcbinddict in d["tc_bind"][port.name]:
+                tcbinds.append(TcBind(port, tcbinddict))
+        if verify_existence:
+            log_test("tc bind existence for port {} verification".format(port.name), err_msg)
+    return tcbinds
+
+
+def do_check_tcbind(ports, tcbinds, vp):
+    for tcbind in tcbinds:
+        pre_tcbinds = get_tcbinds(ports)
+        try:
+            (pool, th) = vp.get_value(tcbind)
+        except SkipTest:
+            continue
+        tcbind.dl_set(pool, th)
+        post_tcbinds = get_tcbinds(ports)
+        tcbind = post_tcbinds.get_by(tcbind)
+
+        err_msg = None
+        if tcbind["pool"] != pool:
+            err_msg = "Incorrect pool (got {}, expected {})".format(tcbind["pool"], pool)
+        if tcbind["threshold"] != th:
+            err_msg = "Incorrect threshold (got {}, expected {})".format(tcbind["threshold"], th)
+
+        pre_tcbinds.del_by(tcbind)
+        post_tcbinds.del_by(tcbind)
+        if pre_tcbinds != post_tcbinds:
+            err_msg = "Other tc bind setup changed as well"
+        log_test("tc bind {}-{} of sb {} set verification".format(tcbind["dlportname"],
+                                                                  tcbind["tc"],
+                                                                  tcbind["sb"]), err_msg)
+
+
+def check_tcbind(dlname, ports, pools):
+    tcbinds = get_tcbinds(ports, verify_existence=True)
+
+    # Save defaults
+    record_vp = RecordValuePicker(tcbinds)
+
+    # Bind each port and unicast TC (TCs < 8) to a random pool and a random
+    # threshold
+    do_check_tcbind(ports, tcbinds, RandomValuePicker(pools))
+
+    # Restore defaults
+    do_check_tcbind(ports, tcbinds, record_vp)
+
+
+class PortPool(CommonItem):
+    varitems = ["threshold"]
+
+    def __init__(self, port, d):
+        super(PortPool, self).__init__(d)
+        self["dlportname"] = port.name
+
+    def dl_set(self, th):
+        run_cmd("devlink sb port pool set {} sb {} pool {} th {}".format(self["dlportname"],
+                                                                         self["sb"],
+                                                                         self["pool"], th))
+
+
+class PortPoolList(CommonList):
+    pass
+
+
+def get_portpools(ports, verify_existence=False):
+    d = run_json_cmd("devlink sb port pool -j -n")
+    portpools = PortPoolList()
+    for port in ports:
+        err_msg = None
+        if port.name not in d["port_pool"] or len(d["port_pool"][port.name]) == 0:
+            err_msg = "No port pool for port"
+        else:
+            for portpooldict in d["port_pool"][port.name]:
+                portpools.append(PortPool(port, portpooldict))
+        if verify_existence:
+            log_test("port pool existence for port {} verification".format(port.name), err_msg)
+    return portpools
+
+
+def do_check_portpool(ports, portpools, vp):
+    for portpool in portpools:
+        pre_portpools = get_portpools(ports)
+        (th,) = vp.get_value(portpool)
+        portpool.dl_set(th)
+        post_portpools = get_portpools(ports)
+        portpool = post_portpools.get_by(portpool)
+
+        err_msg = None
+        if portpool["threshold"] != th:
+            err_msg = "Incorrect threshold (got {}, expected {})".format(portpool["threshold"], th)
+
+        pre_portpools.del_by(portpool)
+        post_portpools.del_by(portpool)
+        if pre_portpools != post_portpools:
+            err_msg = "Other port pool setup changed as well"
+        log_test("port pool {}-{} of sb {} set verification".format(portpool["dlportname"],
+                                                                    portpool["pool"],
+                                                                    portpool["sb"]), err_msg)
+
+
+def check_portpool(dlname, ports, pools):
+    portpools = get_portpools(ports, verify_existence=True)
+
+    # Save defaults
+    record_vp = RecordValuePicker(portpools)
+
+    # For each port pool, set a random threshold
+    do_check_portpool(ports, portpools, RandomValuePicker(pools))
+
+    # Restore defaults
+    do_check_portpool(ports, portpools, record_vp)
+
+
+class Port:
+    def __init__(self, name):
+        self.name = name
+
+
+class PortList(list):
+    pass
+
+
+def get_ports(dlname):
+    d = run_json_cmd("devlink port show -j")
+    ports = PortList()
+    for name in d["port"]:
+        if name.find(dlname) == 0 and d["port"][name]["flavour"] == "physical":
+            ports.append(Port(name))
+    return ports
+
+
+def get_device():
+    devices_info = run_json_cmd("devlink -j dev info")["info"]
+    for d in devices_info:
+        if "mlxsw_spectrum" in devices_info[d]["driver"]:
+            return d
+    return None
+
+
+class UnavailableDevlinkNameException(Exception):
+    pass
+
+
+def test_sb_configuration():
+    # Use static seed
+    random.seed(0)
+
+    dlname = get_device()
+    if not dlname:
+        raise UnavailableDevlinkNameException()
+
+    ports = get_ports(dlname)
+    pools = get_pools(dlname)
+
+    check_pools(dlname, pools)
+    check_tcbind(dlname, ports, pools)
+    check_portpool(dlname, ports, pools)
+
+
+test_sb_configuration()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh
new file mode 100644
index 000000000000..f7c168decd1e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../mirror_gre_scale.sh
+
+mirror_gre_get_target()
+{
+	local should_fail=$1; shift
+	local target
+
+	target=$(devlink_resource_size_get span_agents)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh
new file mode 120000
index 000000000000..bd670d9dc4e5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh
@@ -0,0 +1 @@
+../spectrum/port_range_scale.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh
new file mode 100644
index 000000000000..0b71dfbbb447
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../port_scale.sh
+
+port_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get physical_ports)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
new file mode 100755
index 000000000000..d7505b933aef
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+source ../mlxsw_lib.sh
+
+mlxsw_only_on_spectrum 2+ || exit 1
+
+current_test=""
+
+cleanup()
+{
+	pre_cleanup
+	if [ ! -z $current_test ]; then
+		${current_test}_cleanup
+	fi
+	# Need to reload in order to avoid router abort.
+	devlink_reload
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+	router
+	tc_flower
+	mirror_gre
+	tc_police
+	port
+	rif_mac_profile
+	rif_counter
+	port_range
+"
+
+for current_test in ${TESTS:-$ALL_TESTS}; do
+	RET_FIN=0
+	source ${current_test}_scale.sh
+
+	num_netifs_var=${current_test^^}_NUM_NETIFS
+	num_netifs=${!num_netifs_var:-$NUM_NETIFS}
+
+	for should_fail in 0 1; do
+		RET=0
+		target=$(${current_test}_get_target "$should_fail")
+		if ((target == 0)); then
+			continue
+		fi
+
+		${current_test}_setup_prepare
+		setup_wait_n $num_netifs
+		# Update target in case occupancy of a certain resource changed
+		# following the test setup.
+		target=$(${current_test}_get_target "$should_fail")
+		${current_test}_test "$target" "$should_fail"
+		if [[ "$should_fail" -eq 0 ]]; then
+			log_test "'$current_test' $target"
+
+			if ((!RET)); then
+				tt=${current_test}_traffic_test
+				if [[ $(type -t $tt) == "function" ]]; then
+					$tt "$target"
+					log_test "'$current_test' $target traffic test"
+				fi
+			fi
+		else
+			log_test "'$current_test' overflow $target"
+		fi
+		${current_test}_cleanup $target
+		devlink_reload
+		RET_FIN=$(( RET_FIN || RET ))
+	done
+done
+current_test=""
+
+exit "$RET_FIN"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh
new file mode 120000
index 000000000000..1f5752e8ffc0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh
@@ -0,0 +1 @@
+../spectrum/rif_counter_scale.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh
new file mode 100644
index 000000000000..303d7cbe3c45
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_mac_profile_scale.sh
+
+rif_mac_profile_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get rif_mac_profiles)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh
new file mode 100644
index 000000000000..1897e163e3ab
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../router_scale.sh
+
+router_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get kvd)
+
+	if [[ $should_fail -eq 0 ]]; then
+		target=$((target * 85 / 100))
+	else
+		target=$((target + 1))
+	fi
+
+	echo $target
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
new file mode 100755
index 000000000000..4994bea5daf8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -0,0 +1,1176 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the A-TCAM and C-TCAM operation in Spectrum-2.
+# It tries to exercise as many code paths in the eRP state machine as
+# possible.
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
+	multiple_masks_test ctcam_edge_cases_test delta_simple_test \
+	delta_two_masks_one_key_test delta_simple_rehash_test \
+	bloom_simple_test bloom_complex_test bloom_delta_test \
+	max_erp_entries_test max_group_size_test collision_test"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+tp_record()
+{
+	local tracepoint=$1
+	local cmd=$2
+
+	perf record -q -e $tracepoint $cmd
+	return $?
+}
+
+tp_record_all()
+{
+	local tracepoint=$1
+	local seconds=$2
+
+	perf record -a -q -e $tracepoint sleep $seconds
+	return $?
+}
+
+__tp_hit_count()
+{
+	local tracepoint=$1
+
+	local perf_output=`perf script -F trace:event,trace`
+	return `echo $perf_output | grep "$tracepoint:" | wc -l`
+}
+
+tp_check_hits()
+{
+	local tracepoint=$1
+	local count=$2
+
+	__tp_hit_count $tracepoint
+	if [[ "$?" -ne "$count" ]]; then
+		return 1
+	fi
+	return 0
+}
+
+tp_check_hits_any()
+{
+	local tracepoint=$1
+
+	__tp_hit_count $tracepoint
+	if [[ "$?" -eq "0" ]]; then
+		return 1
+	fi
+	return 0
+}
+
+single_mask_test()
+{
+	# When only a single mask is required, the device uses the master
+	# mask and not the eRP table. Verify that under this mode the right
+	# filter is matched
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Single filter - did not match"
+
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 198.51.100.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 2
+	check_err $? "Two filters - did not match highest priority"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Two filters - did not match lowest priority"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Single filter - did not match after delete"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "single mask test ($tcflags)"
+}
+
+identical_filters_test()
+{
+	# When two filters that only differ in their priority are used,
+	# one needs to be inserted into the C-TCAM. This test verifies
+	# that filters are correctly spilled to C-TCAM and that the right
+	# filter is matched
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match A-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match C-TCAM filter after A-TCAM delete"
+
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Did not match C-TCAM filter after A-TCAM add"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match A-TCAM filter after C-TCAM delete"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "identical filters test ($tcflags)"
+}
+
+two_masks_test()
+{
+	# When more than one mask is required, the eRP table is used. This
+	# test verifies that the eRP table is correctly allocated and used
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.0.0/8 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Two filters - did not match highest priority"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Single filter - did not match"
+
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Two filters - did not match highest priority after add"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "two masks test ($tcflags)"
+}
+
+multiple_masks_test()
+{
+	# The number of masks in a region is limited. Once the maximum
+	# number of masks has been reached filters that require new
+	# masks are spilled to the C-TCAM. This test verifies that
+	# spillage is performed correctly and that the right filter is
+	# matched
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	local index
+
+	RET=0
+
+	NUM_MASKS=32
+	NUM_ERPS=16
+	BASE_INDEX=100
+
+	for i in $(eval echo {1..$NUM_MASKS}); do
+		index=$((BASE_INDEX - i))
+
+		if ((i > NUM_ERPS)); then
+			exp_hits=1
+			err_msg="$i filters - C-TCAM spill did not happen when it was expected"
+		else
+			exp_hits=0
+			err_msg="$i filters - C-TCAM spill happened when it should not"
+		fi
+
+		tp_record "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" \
+			"tc filter add dev $h2 ingress protocol ip pref $index \
+				handle $index \
+				flower $tcflags \
+				dst_ip 192.0.2.2/${i} src_ip 192.0.2.1/${i} \
+				action drop"
+		tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" \
+				$exp_hits
+		check_err $? "$err_msg"
+
+		$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \
+			-B 192.0.2.2 -t ip -q
+
+		tc_check_packets "dev $h2 ingress" $index 1
+		check_err $? "$i filters - did not match highest priority (add)"
+	done
+
+	for i in $(eval echo {$NUM_MASKS..1}); do
+		index=$((BASE_INDEX - i))
+
+		$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \
+			-B 192.0.2.2 -t ip -q
+
+		tc_check_packets "dev $h2 ingress" $index 2
+		check_err $? "$i filters - did not match highest priority (del)"
+
+		tc filter del dev $h2 ingress protocol ip pref $index \
+			handle $index flower
+	done
+
+	log_test "multiple masks test ($tcflags)"
+}
+
+ctcam_two_atcam_masks_test()
+{
+	RET=0
+
+	# First case: C-TCAM is disabled when there are two A-TCAM masks.
+	# We push a filter into the C-TCAM by using two identical filters
+	# as in identical_filters_test()
+
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into C-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.0.0/16 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match A-TCAM filter"
+
+	# Delete both A-TCAM and C-TCAM filters and make sure the remaining
+	# A-TCAM filter still works
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match A-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "ctcam with two atcam masks test ($tcflags)"
+}
+
+ctcam_one_atcam_mask_test()
+{
+	RET=0
+
+	# Second case: C-TCAM is disabled when there is one A-TCAM mask.
+	# The test is similar to identical_filters_test()
+
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into C-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match C-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match A-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "ctcam with one atcam mask test ($tcflags)"
+}
+
+ctcam_no_atcam_masks_test()
+{
+	RET=0
+
+	# Third case: C-TCAM is disabled when there are no A-TCAM masks
+	# This test exercises the code path that transitions the eRP table
+	# to its initial state after deleting the last C-TCAM mask
+
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into C-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "ctcam with no atcam masks test ($tcflags)"
+}
+
+ctcam_edge_cases_test()
+{
+	# When the C-TCAM is disabled after deleting the last C-TCAM
+	# mask, we want to make sure the eRP state machine is put in
+	# the correct state
+
+	ctcam_two_atcam_masks_test
+	ctcam_one_atcam_mask_test
+	ctcam_no_atcam_masks_test
+}
+
+delta_simple_test()
+{
+	# The first filter will create eRP, the second filter will fit into
+	# the first eRP with delta. Remove the first rule then and check that
+        # the eRP stays (referenced by the second filter).
+
+	RET=0
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \
+		   pref 1 handle 101 flower $tcflags dst_ip 192.0.0.0/24 \
+		   action drop"
+	tp_check_hits "objagg:objagg_obj_root_create" 1
+	check_err $? "eRP was not created"
+
+	tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \
+		   pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \
+		   action drop"
+	tp_check_hits "objagg:objagg_obj_root_create" 0
+	check_err $? "eRP was incorrectly created"
+	tp_check_hits "objagg:objagg_obj_parent_assign" 1
+	check_err $? "delta was not created"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \
+		   pref 1 handle 101 flower"
+	tp_check_hits "objagg:objagg_obj_root_destroy" 0
+	check_err $? "eRP was incorrectly destroyed"
+	tp_check_hits "objagg:objagg_obj_parent_unassign" 0
+	check_err $? "delta was incorrectly destroyed"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Did not match on correct filter after the first was removed"
+
+	tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \
+		   pref 2 handle 102 flower"
+	tp_check_hits "objagg:objagg_obj_parent_unassign" 1
+	check_err $? "delta was not destroyed"
+	tp_check_hits "objagg:objagg_obj_root_destroy" 1
+	check_err $? "eRP was not destroyed"
+
+	log_test "delta simple test ($tcflags)"
+}
+
+delta_two_masks_one_key_test()
+{
+	# If 2 keys are the same and only differ in mask in a way that
+	# they belong under the same ERP (second is delta of the first),
+	# there should be C-TCAM spill.
+
+	RET=0
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \
+		   pref 1 handle 101 flower $tcflags dst_ip 192.0.2.0/24 \
+		   action drop"
+	tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 0
+	check_err $? "incorrect C-TCAM spill while inserting the first rule"
+
+	tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \
+		   pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \
+		   action drop"
+	tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 1
+	check_err $? "C-TCAM spill did not happen while inserting the second rule"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "delta two masks one key test ($tcflags)"
+}
+
+delta_simple_rehash_test()
+{
+	RET=0
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 0
+	check_err $? "Failed to set ACL region rehash interval"
+
+	tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_fail $? "Rehash trace was hit even when rehash should be disabled"
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 3000
+	check_err $? "Failed to set ACL region rehash interval"
+
+	sleep 1
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.1.0/25 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.3.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tp_record_all mlxsw:* 3
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_err $? "Rehash trace was not hit"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
+	check_err $? "Migrate trace was not hit"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end
+	check_err $? "Migrate end trace was not hit"
+	tp_record_all mlxsw:* 3
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_err $? "Rehash trace was not hit"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
+	check_fail $? "Migrate trace was hit when no migration should happen"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end
+	check_fail $? "Migrate end trace was hit when no migration should happen"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter after rehash"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_fail $? "Matched a wrong filter after rehash"
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Did not match on correct filter after rehash"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "delta simple rehash test ($tcflags)"
+}
+
+delta_simple_ipv6_rehash_test()
+{
+	RET=0
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 0
+	check_err $? "Failed to set ACL region rehash interval"
+
+	tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_fail $? "Rehash trace was hit even when rehash should be disabled"
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 3000
+	check_err $? "Failed to set ACL region rehash interval"
+
+	sleep 1
+
+	tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 101 flower \
+		$tcflags dst_ip 2001:db8:1::0/121 action drop
+	tc filter add dev $h2 ingress protocol ipv6 pref 2 handle 102 flower \
+		$tcflags dst_ip 2001:db8:2::2 action drop
+	tc filter add dev $h2 ingress protocol ipv6 pref 3 handle 103 flower \
+		$tcflags dst_ip 2001:db8:3::0/120 action drop
+
+	$MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \
+		-A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tp_record_all mlxsw:* 3
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_err $? "Rehash trace was not hit"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
+	check_err $? "Migrate trace was not hit"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end
+	check_err $? "Migrate end trace was not hit"
+	tp_record_all mlxsw:* 3
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_err $? "Rehash trace was not hit"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
+	check_fail $? "Migrate trace was hit when no migration should happen"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end
+	check_fail $? "Migrate end trace was hit when no migration should happen"
+
+	$MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \
+		-A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter after rehash"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_fail $? "Matched a wrong filter after rehash"
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Did not match on correct filter after rehash"
+
+	tc filter del dev $h2 ingress protocol ipv6 pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ipv6 pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 101 flower
+
+	log_test "delta simple IPv6 rehash test ($tcflags)"
+}
+
+TEST_RULE_BASE=256
+declare -a test_rules_inserted
+
+test_rule_add()
+{
+	local iface=$1
+	local tcflags=$2
+	local index=$3
+
+	if ! [ ${test_rules_inserted[$index]} ] ; then
+		test_rules_inserted[$index]=false
+	fi
+	if ${test_rules_inserted[$index]} ; then
+		return
+	fi
+
+	local number=$(( $index + $TEST_RULE_BASE ))
+	printf -v hexnumber '%x' $number
+
+	batch="${batch}filter add dev $iface ingress protocol ipv6 pref 1 \
+		handle $number flower $tcflags \
+		src_ip 2001:db8:1::$hexnumber action drop\n"
+	test_rules_inserted[$index]=true
+}
+
+test_rule_del()
+{
+	local iface=$1
+	local index=$2
+
+	if ! [ ${test_rules_inserted[$index]} ] ; then
+		test_rules_inserted[$index]=false
+	fi
+	if ! ${test_rules_inserted[$index]} ; then
+		return
+	fi
+
+	local number=$(( $index + $TEST_RULE_BASE ))
+	printf -v hexnumber '%x' $number
+
+	batch="${batch}filter del dev $iface ingress protocol ipv6 pref 1 \
+		handle $number flower\n"
+	test_rules_inserted[$index]=false
+}
+
+test_rule_add_or_remove()
+{
+	local iface=$1
+	local tcflags=$2
+	local index=$3
+
+	if ! [ ${test_rules_inserted[$index]} ] ; then
+		test_rules_inserted[$index]=false
+	fi
+	if ${test_rules_inserted[$index]} ; then
+		test_rule_del $iface $index
+	else
+		test_rule_add $iface $tcflags $index
+	fi
+}
+
+test_rule_add_or_remove_random_batch()
+{
+	local iface=$1
+	local tcflags=$2
+	local total_count=$3
+	local skip=0
+	local count=0
+	local MAXSKIP=20
+	local MAXCOUNT=20
+
+	for ((i=1;i<=total_count;i++)); do
+		if (( $skip == 0 )) && (($count == 0)); then
+			((skip=$RANDOM % $MAXSKIP + 1))
+			((count=$RANDOM % $MAXCOUNT + 1))
+		fi
+		if (( $skip != 0 )); then
+			((skip-=1))
+		else
+			((count-=1))
+			test_rule_add_or_remove $iface $tcflags $i
+		fi
+	done
+}
+
+delta_massive_ipv6_rehash_test()
+{
+	RET=0
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 0
+	check_err $? "Failed to set ACL region rehash interval"
+
+	tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_fail $? "Rehash trace was hit even when rehash should be disabled"
+
+	RANDOM=4432897
+	declare batch=""
+	test_rule_add_or_remove_random_batch $h2 $tcflags 5000
+
+	echo -n -e $batch | tc -b -
+
+	declare batch=""
+	test_rule_add_or_remove_random_batch $h2 $tcflags 5000
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 3000
+	check_err $? "Failed to set ACL region rehash interval"
+
+	sleep 1
+
+	tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 101 flower \
+		$tcflags dst_ip 2001:db8:1::0/121 action drop
+	tc filter add dev $h2 ingress protocol ipv6 pref 2 handle 102 flower \
+		$tcflags dst_ip 2001:db8:2::2 action drop
+	tc filter add dev $h2 ingress protocol ipv6 pref 3 handle 103 flower \
+		$tcflags dst_ip 2001:db8:3::0/120 action drop
+
+	$MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \
+		-A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	echo -n -e $batch | tc -b -
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 0
+	check_err $? "Failed to set ACL region rehash interval"
+
+	$MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \
+		-A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter after rehash"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_fail $? "Matched a wrong filter after rehash"
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Did not match on correct filter after rehash"
+
+	tc filter del dev $h2 ingress protocol ipv6 pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ipv6 pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 101 flower
+
+	declare batch=""
+	for i in {1..5000}; do
+		test_rule_del $h2 $tcflags $i
+	done
+	echo -e $batch | tc -b -
+
+	log_test "delta massive IPv6 rehash test ($tcflags)"
+}
+
+bloom_simple_test()
+{
+	# Bloom filter requires that the eRP table is used. This test
+	# verifies that Bloom filter is not harming correctness of ACLs.
+	# First, make sure that eRP table is used and then set rule patterns
+	# which are distant enough and will result skipping a lookup after
+	# consulting the Bloom filter. Although some eRP lookups are skipped,
+	# the correct filter should be hit.
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 5 handle 104 flower \
+		$tcflags dst_ip 198.51.100.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.0.0/8 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Two filters - did not match highest priority"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Single filter - did not match"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Low prio filter - did not match"
+
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 198.0.0.0/8 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Two filters - did not match highest priority after add"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 5 handle 104 flower
+
+	log_test "bloom simple test ($tcflags)"
+}
+
+bloom_complex_test()
+{
+	# Bloom filter index computation is affected from region ID, eRP
+	# ID and from the region key size. In order to exercise those parts
+	# of the Bloom filter code, use a series of regions, each with a
+	# different key size and send packet that should hit all of them.
+	local index
+
+	RET=0
+	NUM_CHAINS=4
+	BASE_INDEX=100
+
+	# Create chain with up to 2 key blocks (ip_proto only)
+	tc chain add dev $h2 ingress chain 1 protocol ip flower \
+		ip_proto tcp &> /dev/null
+	# Create chain with 2-4 key blocks (ip_proto, src MAC)
+	tc chain add dev $h2 ingress chain 2 protocol ip flower \
+		ip_proto tcp \
+		src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+	# Create chain with 4-8 key blocks (ip_proto, src & dst MAC, IPv4 dest)
+	tc chain add dev $h2 ingress chain 3 protocol ip flower \
+		ip_proto tcp \
+		dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \
+		src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \
+		dst_ip 0.0.0.0/32 &> /dev/null
+	# Default chain contains all fields and therefore is 8-12 key blocks
+	tc chain add dev $h2 ingress chain 4
+
+	# We need at least 2 rules in every region to have eRP table active
+	# so create a dummy rule per chain using a different pattern
+	for i in $(eval echo {0..$NUM_CHAINS}); do
+		index=$((BASE_INDEX - 1 - i))
+		tc filter add dev $h2 ingress chain $i protocol ip \
+			pref 2 handle $index flower \
+			$tcflags ip_proto tcp action drop
+	done
+
+	# Add rules to test Bloom filter, each in a different chain
+	index=$BASE_INDEX
+	tc filter add dev $h2 ingress protocol ip \
+		pref 1 handle $((++index)) flower \
+		$tcflags dst_ip 192.0.0.0/16 action goto chain 1
+	tc filter add dev $h2 ingress chain 1 protocol ip \
+		pref 1 handle $((++index)) flower \
+		$tcflags action goto chain 2
+	tc filter add dev $h2 ingress chain 2 protocol ip \
+		pref 1 handle $((++index)) flower \
+		$tcflags src_mac $h1mac action goto chain 3
+	tc filter add dev $h2 ingress chain 3 protocol ip \
+		pref 1 handle $((++index)) flower \
+		$tcflags dst_ip 192.0.0.0/8 action goto chain 4
+	tc filter add dev $h2 ingress chain 4 protocol ip \
+		pref 1 handle $((++index)) flower \
+		$tcflags src_ip 192.0.2.0/24 action drop
+
+	# Send a packet that is supposed to hit all chains
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	for i in $(eval echo {0..$NUM_CHAINS}); do
+		index=$((BASE_INDEX + i + 1))
+		tc_check_packets "dev $h2 ingress" $index 1
+		check_err $? "Did not match chain $i"
+	done
+
+	# Rules cleanup
+	for i in $(eval echo {$NUM_CHAINS..0}); do
+		index=$((BASE_INDEX - i - 1))
+		tc filter del dev $h2 ingress chain $i \
+			pref 2 handle $index flower
+		index=$((BASE_INDEX + i + 1))
+		tc filter del dev $h2 ingress chain $i \
+			pref 1 handle $index flower
+	done
+
+	# Chains cleanup
+	for i in $(eval echo {$NUM_CHAINS..1}); do
+		tc chain del dev $h2 ingress chain $i
+	done
+
+	log_test "bloom complex test ($tcflags)"
+}
+
+
+bloom_delta_test()
+{
+	# When multiple masks are used, the eRP table is activated. When
+	# masks are close enough (delta) the masks reside on the same
+	# eRP table. This test verifies that the eRP table is correctly
+	# allocated and used in delta condition and that Bloom filter is
+	# still functional with delta.
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.1.0.0/16 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.1.2.1 -B 192.1.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Single filter - did not match"
+
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.2.1.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.2.1.1 -B 192.2.1.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Delta filters - did not match second filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "bloom delta test ($tcflags)"
+}
+
+max_erp_entries_test()
+{
+	# The number of eRP entries is limited. Once the maximum number of eRPs
+	# has been reached, filters cannot be added. This test verifies that
+	# when this limit is reached, inserstion fails without crashing.
+
+	RET=0
+
+	local num_masks=32
+	local num_regions=15
+	local chain_failed
+	local mask_failed
+	local ret
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	for ((i=1; i < $num_regions; i++)); do
+		for ((j=$num_masks; j >= 0; j--)); do
+			tc filter add dev $h2 ingress chain $i protocol ip \
+				pref $i	handle $j flower $tcflags \
+				dst_ip 192.1.0.0/$j &> /dev/null
+			ret=$?
+
+			if [ $ret -ne 0 ]; then
+				chain_failed=$i
+				mask_failed=$j
+				break 2
+			fi
+		done
+	done
+
+	# We expect to exceed the maximum number of eRP entries, so that
+	# insertion eventually fails. Otherwise, the test should be adjusted to
+	# add more filters.
+	check_fail $ret "expected to exceed number of eRP entries"
+
+	for ((; i >= 1; i--)); do
+		for ((j=0; j <= $num_masks; j++)); do
+			tc filter del dev $h2 ingress chain $i protocol ip \
+				pref $i handle $j flower &> /dev/null
+		done
+	done
+
+	log_test "max eRP entries test ($tcflags). " \
+		"max chain $chain_failed, mask $mask_failed"
+}
+
+max_group_size_test()
+{
+	# The number of ACLs in an ACL group is limited. Once the maximum
+	# number of ACLs has been reached, filters cannot be added. This test
+	# verifies that when this limit is reached, insertion fails without
+	# crashing.
+
+	RET=0
+
+	local num_acls=32
+	local max_size
+	local ret
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	for ((i=1; i < $num_acls; i++)); do
+		if [[ $(( i % 2 )) == 1 ]]; then
+			tc filter add dev $h2 ingress pref $i proto ipv4 \
+				flower $tcflags dst_ip 198.51.100.1/32 \
+				ip_proto tcp tcp_flags 0x01/0x01 \
+				action drop &> /dev/null
+		else
+			tc filter add dev $h2 ingress pref $i proto ipv6 \
+				flower $tcflags dst_ip 2001:db8:1::1/128 \
+				action drop &> /dev/null
+		fi
+
+		ret=$?
+		[[ $ret -ne 0 ]] && max_size=$((i - 1)) && break
+	done
+
+	# We expect to exceed the maximum number of ACLs in a group, so that
+	# insertion eventually fails. Otherwise, the test should be adjusted to
+	# add more filters.
+	check_fail $ret "expected to exceed number of ACLs in a group"
+
+	for ((; i >= 1; i--)); do
+		if [[ $(( i % 2 )) == 1 ]]; then
+			tc filter del dev $h2 ingress pref $i proto ipv4 \
+				flower $tcflags dst_ip 198.51.100.1/32 \
+				ip_proto tcp tcp_flags 0x01/0x01 \
+				action drop &> /dev/null
+		else
+			tc filter del dev $h2 ingress pref $i proto ipv6 \
+				flower $tcflags dst_ip 2001:db8:1::1/128 \
+				action drop &> /dev/null
+		fi
+	done
+
+	log_test "max ACL group size test ($tcflags). max size $max_size"
+}
+
+collision_test()
+{
+	# Filters cannot share an eRP if in the common unmasked part (i.e.,
+	# without the delta bits) they have the same values. If the driver does
+	# not prevent such configuration (by spilling into the C-TCAM), then
+	# multiple entries will be present in the device with the same key,
+	# leading to collisions and a reduced scale.
+	#
+	# Create such a scenario and make sure all the filters are successfully
+	# added.
+
+	RET=0
+
+	local ret
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	# Add a single dst_ip/24 filter and multiple dst_ip/32 filters that all
+	# have the same values in the common unmasked part (dst_ip/24).
+
+	tc filter add dev $h2 ingress pref 1 proto ipv4 handle 101 \
+		flower $tcflags dst_ip 198.51.100.0/24 \
+		action drop
+
+	for i in {0..255}; do
+		tc filter add dev $h2 ingress pref 2 proto ipv4 \
+			handle $((102 + i)) \
+			flower $tcflags dst_ip 198.51.100.${i}/32 \
+			action drop
+		ret=$?
+		[[ $ret -ne 0 ]] && break
+	done
+
+	check_err $ret "failed to add all the filters"
+
+	for i in {255..0}; do
+		tc filter del dev $h2 ingress pref 2 proto ipv4 \
+			handle $((102 + i)) flower
+	done
+
+	tc filter del dev $h2 ingress pref 1 proto ipv4 handle 101 flower
+
+	log_test "collision test ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+if ! tc_offload_check; then
+	check_err 1 "Could not test offloaded functionality"
+	log_test "mlxsw-specific tests for tc flower"
+	exit
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
new file mode 100644
index 000000000000..4444bbace1a9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_flower_scale.sh
+
+tc_flower_get_target()
+{
+	local should_fail=$1; shift
+	local max_cnts
+
+	# The driver associates a counter with each tc filter, which means the
+	# number of supported filters is bounded by the number of available
+	# counters.
+	max_cnts=$(devlink_resource_size_get counters flow)
+
+	# Remove already allocated counters.
+	((max_cnts -= $(devlink_resource_occ_get counters flow)))
+
+	# Each rule uses two counters, for packets and bytes.
+	((max_cnts /= 2))
+
+	if ((! should_fail)); then
+		echo $max_cnts
+	else
+		echo $((max_cnts + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
new file mode 100644
index 000000000000..e79ac0dad1f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_police_scale.sh
+
+tc_police_get_target()
+{
+	local should_fail=$1; shift
+	local target
+
+	target=$(devlink_resource_size_get global_policers single_rate_policers)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
new file mode 100755
index 000000000000..fd23c80eba31
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
@@ -0,0 +1,339 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test VxLAN flooding. The device stores flood records in a singly linked list
+# where each record stores up to four IPv6 addresses of remote VTEPs. The test
+# verifies that packets are correctly flooded in various cases such as deletion
+# of a record in the middle of the list.
+#
+# +-----------------------+
+# | H1 (vrf)              |
+# |    + $h1              |
+# |    | 2001:db8:1::1/64 |
+# +----|------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# | +--|--------------------------------------------------------------------+ |
+# | |  + $swp1                   BR0 (802.1d)                               | |
+# | |                                                                       | |
+# | |  + vxlan0 (vxlan)                                                     | |
+# | |    local 2001:db8:2::1                                                | |
+# | |    remote 2001:db8:2::{2..17}                                         | |
+# | |    id 10 dstport 4789                                                 | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |  2001:db8:2::0/64 via 2001:db8:3::2                                       |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 2001:db8:3::1/64                                                     |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                               R2 (vrf) |
+# |    + $rp2                                                   |
+# |      2001:db8:3::2/64                                       |
+# |                                                             |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="flooding_test"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 2001:db8:1::1/64
+}
+
+switch_create()
+{
+	# Make sure the bridge uses the MAC address of the local port and
+	# not that of the VxLAN's device
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link set dev br0 address $(mac_get $swp1)
+
+	ip link add name vxlan0 type vxlan id 10 nolearning \
+		udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \
+		local 2001:db8:2::1 dstport 4789
+
+	ip address add 2001:db8:2::1/128 dev lo
+
+	ip link set dev $swp1 master br0
+	ip link set dev vxlan0 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev vxlan0 up
+}
+
+switch_destroy()
+{
+	ip link set dev vxlan0 down
+	ip link set dev $swp1 down
+	ip link set dev br0 down
+
+	ip link set dev vxlan0 nomaster
+	ip link set dev $swp1 nomaster
+
+	ip address del 2001:db8:2::1/128 dev lo
+
+	ip link del dev vxlan0
+
+	ip link del dev br0
+}
+
+router1_create()
+{
+	# This router is in the default VRF, where the VxLAN device is
+	# performing the L3 lookup
+	ip link set dev $rp1 up
+	ip address add 2001:db8:3::1/64 dev $rp1
+	ip route add 2001:db8:2::0/64 via 2001:db8:3::2
+}
+
+router1_destroy()
+{
+	ip route del 2001:db8:2::0/64 via 2001:db8:3::2
+	ip address del 2001:db8:3::1/64 dev $rp1
+	ip link set dev $rp1 down
+}
+
+router2_create()
+{
+	# This router is not in the default VRF, so use simple_if_init()
+	simple_if_init $rp2 2001:db8:3::2/64
+}
+
+router2_destroy()
+{
+	simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+
+	switch_create
+
+	router1_create
+	router2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	switch_destroy
+
+	h1_destroy
+
+	vrf_cleanup
+}
+
+flooding_remotes_add()
+{
+	local num_remotes=$1
+	local lsb
+	local i
+
+	# Prevent unwanted packets from entering the bridge and interfering
+	# with the test.
+	tc qdisc add dev br0 clsact
+	tc filter add dev br0 egress protocol all pref 1 handle 1 \
+		matchall skip_hw action drop
+	tc qdisc add dev $h1 clsact
+	tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+		flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+	tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+		matchall skip_hw action drop
+
+	for i in $(eval echo {1..$num_remotes}); do
+		lsb=$((i + 1))
+
+		bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \
+			dst 2001:db8:2::$lsb
+	done
+}
+
+flooding_filters_add()
+{
+	local num_remotes=$1
+	local lsb
+	local i
+
+	tc qdisc add dev $rp2 clsact
+
+	for i in $(eval echo {1..$num_remotes}); do
+		lsb=$((i + 1))
+
+		tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \
+			flower ip_proto udp dst_ip 2001:db8:2::$lsb \
+			dst_port 4789 skip_sw action drop
+	done
+}
+
+flooding_filters_del()
+{
+	local num_remotes=$1
+	local i
+
+	for i in $(eval echo {1..$num_remotes}); do
+		tc filter del dev $rp2 ingress protocol ipv6 pref $i \
+			handle $i flower
+	done
+
+	tc qdisc del dev $rp2 clsact
+
+	tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+	tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+	tc qdisc del dev $h1 clsact
+	tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+	tc qdisc del dev br0 clsact
+}
+
+flooding_check_packets()
+{
+	local packets=("$@")
+	local num_remotes=${#packets[@]}
+	local i
+
+	for i in $(eval echo {1..$num_remotes}); do
+		tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]}
+		check_err $? "remote $i - did not get expected number of packets"
+	done
+}
+
+flooding_test()
+{
+	# Use 16 remote VTEPs that will be stored in 4 records. The array
+	# 'packets' will store how many packets are expected to be received
+	# by each remote VTEP at each stage of the test
+	declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1)
+	local num_remotes=16
+
+	RET=0
+
+	# Add FDB entries for remote VTEPs and corresponding tc filters on the
+	# ingress of the nexthop router. These filters will count how many
+	# packets were flooded to each remote VTEP
+	flooding_remotes_add $num_remotes
+	flooding_filters_add $num_remotes
+
+	# Send one packet and make sure it is flooded to all the remote VTEPs
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 1 packet"
+
+	# Delete the third record which corresponds to VTEPs with LSB 10..13
+	# and check that packet is flooded correctly when we remove a record
+	# from the middle of the list
+	RET=0
+
+	packets=(2 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 2 packets"
+
+	# Delete the first record and make sure the packet is flooded correctly
+	RET=0
+
+	packets=(2 2 2 2 3 3 3 3 1 1 1 1 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 3 packets"
+
+	# Delete the last record and make sure the packet is flooded correctly
+	RET=0
+
+	packets=(2 2 2 2 4 4 4 4 1 1 1 1 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 4 packets"
+
+	# Delete the last record, one entry at a time and make sure single
+	# entries are correctly removed
+	RET=0
+
+	packets=(2 2 2 2 4 5 5 5 1 1 1 1 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 5 packets"
+
+	RET=0
+
+	packets=(2 2 2 2 4 5 6 6 1 1 1 1 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 6 packets"
+
+	RET=0
+
+	packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 7 packets"
+
+	RET=0
+
+	packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 8 packets"
+
+	flooding_filters_del $num_remotes
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
new file mode 100644
index 000000000000..06a80f40daa4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source "../../../../net/forwarding/devlink_lib.sh"
+source ../mlxsw_lib.sh
+
+mlxsw_only_on_spectrum 1 || exit 1
+
+# Needed for returning to default
+declare -A KVD_DEFAULTS
+
+KVD_CHILDREN="linear hash_single hash_double"
+KVDL_CHILDREN="singles chunks large_chunks"
+
+devlink_sp_resource_minimize()
+{
+	local size
+	local i
+
+	for i in $KVD_CHILDREN; do
+		size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+		devlink_resource_size_set "$size" kvd "$i"
+	done
+
+	for i in $KVDL_CHILDREN; do
+		size=$(devlink_resource_get kvd linear "$i" | \
+		       jq '.["size_min"]')
+		devlink_resource_size_set "$size" kvd linear "$i"
+	done
+}
+
+devlink_sp_size_kvd_to_default()
+{
+	local need_reload=0
+	local i
+
+	for i in $KVD_CHILDREN; do
+		local size=$(echo "${KVD_DEFAULTS[kvd_$i]}" | jq '.["size"]')
+		current_size=$(devlink_resource_size_get kvd "$i")
+
+		if [ "$size" -ne "$current_size" ]; then
+			devlink_resource_size_set "$size" kvd "$i"
+			need_reload=1
+		fi
+	done
+
+	for i in $KVDL_CHILDREN; do
+		local size=$(echo "${KVD_DEFAULTS[kvd_linear_$i]}" | \
+			     jq '.["size"]')
+		current_size=$(devlink_resource_size_get kvd linear "$i")
+
+		if [ "$size" -ne "$current_size" ]; then
+			devlink_resource_size_set "$size" kvd linear "$i"
+			need_reload=1
+		fi
+	done
+
+	if [ "$need_reload" -ne "0" ]; then
+		devlink_reload
+	fi
+}
+
+devlink_sp_read_kvd_defaults()
+{
+	local key
+	local i
+
+	KVD_DEFAULTS[kvd]=$(devlink_resource_get "kvd")
+	for i in $KVD_CHILDREN; do
+		key=kvd_$i
+		KVD_DEFAULTS[$key]=$(devlink_resource_get kvd "$i")
+	done
+
+	for i in $KVDL_CHILDREN; do
+		key=kvd_linear_$i
+		KVD_DEFAULTS[$key]=$(devlink_resource_get kvd linear "$i")
+	done
+}
+
+KVD_PROFILES="default scale ipv4_max"
+
+devlink_sp_resource_kvd_profile_set()
+{
+	local profile=$1
+
+	case "$profile" in
+	scale)
+		devlink_resource_size_set 64000 kvd linear
+		devlink_resource_size_set 15616 kvd linear singles
+		devlink_resource_size_set 32000 kvd linear chunks
+		devlink_resource_size_set 16384 kvd linear large_chunks
+		devlink_resource_size_set 128000 kvd hash_single
+		devlink_resource_size_set 48000 kvd hash_double
+		devlink_reload
+		;;
+	ipv4_max)
+		devlink_resource_size_set 64000 kvd linear
+		devlink_resource_size_set 15616 kvd linear singles
+		devlink_resource_size_set 32000 kvd linear chunks
+		devlink_resource_size_set 16384 kvd linear large_chunks
+		devlink_resource_size_set 144000 kvd hash_single
+		devlink_resource_size_set 32768 kvd hash_double
+		devlink_reload
+		;;
+	default)
+		devlink_resource_size_set 98304 kvd linear
+		devlink_resource_size_set 16384 kvd linear singles
+		devlink_resource_size_set 49152 kvd linear chunks
+		devlink_resource_size_set 32768 kvd linear large_chunks
+		devlink_resource_size_set 87040 kvd hash_single
+		devlink_resource_size_set 60416 kvd hash_double
+		devlink_reload
+		;;
+	*)
+		check_err 1 "Unknown profile $profile"
+	esac
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
new file mode 100755
index 000000000000..6f2683cbc7d5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+NUM_NETIFS=1
+source $lib_dir/lib.sh
+source devlink_lib_spectrum.sh
+
+setup_prepare()
+{
+	devlink_sp_read_kvd_defaults
+}
+
+cleanup()
+{
+	pre_cleanup
+	devlink_sp_size_kvd_to_default
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+profiles_test()
+{
+	local i
+
+	log_info "Running profile tests"
+
+	for i in $KVD_PROFILES; do
+		RET=0
+		devlink_sp_resource_kvd_profile_set $i
+		log_test "'$i' profile"
+	done
+
+	# Default is explicitly tested at end to ensure it's actually applied
+	RET=0
+	devlink_sp_resource_kvd_profile_set "default"
+	log_test "'default' profile"
+}
+
+resources_min_test()
+{
+	local size
+	local i
+	local j
+
+	log_info "Running KVD-minimum tests"
+
+	for i in $KVD_CHILDREN; do
+		RET=0
+		size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+		devlink_resource_size_set "$size" kvd "$i"
+
+		# In case of linear, need to minimize sub-resources as well
+		if [[ "$i" == "linear" ]]; then
+			for j in $KVDL_CHILDREN; do
+				devlink_resource_size_set 0 kvd linear "$j"
+			done
+		fi
+
+		devlink_reload
+		devlink_sp_size_kvd_to_default
+		log_test "'$i' minimize [$size]"
+	done
+}
+
+resources_max_test()
+{
+	local min_size
+	local size
+	local i
+	local j
+
+	log_info "Running KVD-maximum tests"
+	for i in $KVD_CHILDREN; do
+		RET=0
+		devlink_sp_resource_minimize
+
+		# Calculate the maximum possible size for the given partition
+		size=$(devlink_resource_size_get kvd)
+		for j in $KVD_CHILDREN; do
+			if [ "$i" != "$j" ]; then
+				min_size=$(devlink_resource_get kvd "$j" | \
+					   jq '.["size_min"]')
+				size=$((size - min_size))
+			fi
+		done
+
+		# Test almost maximum size
+		devlink_resource_size_set "$((size - 128))" kvd "$i"
+		devlink_reload
+		log_test "'$i' almost maximize [$((size - 128))]"
+
+		# Test above maximum size
+		devlink resource set "$DEVLINK_DEV" \
+			path "kvd/$i" size $((size + 128)) &> /dev/null
+		check_fail $? "Set kvd/$i to size $((size + 128)) should fail"
+		log_test "'$i' Overflow rejection [$((size + 128))]"
+
+		# Test maximum size
+		if [ "$i" == "hash_single" ] || [ "$i" == "hash_double" ]; then
+			echo "SKIP: Observed problem with exact max $i"
+			continue
+		fi
+
+		devlink_resource_size_set "$size" kvd "$i"
+		devlink_reload
+		log_test "'$i' maximize [$size]"
+
+		devlink_sp_size_kvd_to_default
+	done
+}
+
+profiles_test
+resources_min_test
+resources_max_test
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
new file mode 100644
index 000000000000..f7c168decd1e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../mirror_gre_scale.sh
+
+mirror_gre_get_target()
+{
+	local should_fail=$1; shift
+	local target
+
+	target=$(devlink_resource_size_get span_agents)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh
new file mode 100644
index 000000000000..d0847e8ea270
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../port_range_scale.sh
+
+port_range_get_target()
+{
+	local should_fail=$1; shift
+	local target
+
+	target=$(devlink_resource_size_get port_range_registers)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh
new file mode 100644
index 000000000000..0b71dfbbb447
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../port_scale.sh
+
+port_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get physical_ports)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
new file mode 100755
index 000000000000..60753d46a2d4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+VXPORT=4789
+
+ALL_TESTS="
+	create_vxlan_on_top_of_8021ad_bridge
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+create_vxlan_on_top_of_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+
+	ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
+		"$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx100 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev vx100 master br0
+
+	bridge vlan add vid 100 dev vx100 pvid untagged 2>/dev/null
+	check_fail $? "802.1ad bridge with VxLAN in Spectrum-1 not rejected"
+
+	bridge vlan add vid 100 dev vx100 pvid untagged 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1ad bridge with VxLAN in Spectrum-1 rejected without extack"
+
+	log_test "create VxLAN on top of 802.1ad bridge"
+
+	ip link del dev vx100
+	ip link del dev br0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
new file mode 100755
index 000000000000..7b98cdd0580d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source devlink_lib_spectrum.sh
+
+current_test=""
+
+cleanup()
+{
+	pre_cleanup
+	if [ ! -z $current_test ]; then
+		${current_test}_cleanup
+	fi
+	devlink_sp_size_kvd_to_default
+}
+
+devlink_sp_read_kvd_defaults
+trap cleanup EXIT
+
+ALL_TESTS="
+	router
+	tc_flower
+	mirror_gre
+	tc_police
+	port
+	rif_mac_profile
+	rif_counter
+	port_range
+"
+
+for current_test in ${TESTS:-$ALL_TESTS}; do
+	RET_FIN=0
+	source ${current_test}_scale.sh
+
+	num_netifs_var=${current_test^^}_NUM_NETIFS
+	num_netifs=${!num_netifs_var:-$NUM_NETIFS}
+
+	for profile in $KVD_PROFILES; do
+		RET=0
+		devlink_sp_resource_kvd_profile_set $profile
+		if [[ $RET -gt 0 ]]; then
+			log_test "'$current_test' [$profile] setting"
+			continue
+		fi
+
+		for should_fail in 0 1; do
+			RET=0
+			target=$(${current_test}_get_target "$should_fail")
+			if ((target == 0)); then
+				continue
+			fi
+			${current_test}_setup_prepare
+			setup_wait_n $num_netifs
+			# Update target in case occupancy of a certain resource
+			# changed following the test setup.
+			target=$(${current_test}_get_target "$should_fail")
+			${current_test}_test "$target" "$should_fail"
+			if [[ "$should_fail" -eq 0 ]]; then
+				log_test "'$current_test' [$profile] $target"
+
+				if ((!RET)); then
+					tt=${current_test}_traffic_test
+					if [[ $(type -t $tt) == "function" ]]
+					then
+						$tt "$target"
+						log_test "'$current_test' [$profile] $target traffic test"
+					fi
+				fi
+			else
+				log_test "'$current_test' [$profile] overflow $target"
+			fi
+			${current_test}_cleanup $target
+			RET_FIN=$(( RET_FIN || RET ))
+		done
+	done
+done
+current_test=""
+
+exit "$RET_FIN"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh
new file mode 100644
index 000000000000..d44536276e8a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_counter_scale.sh
+
+rif_counter_get_target()
+{
+	local should_fail=$1; shift
+	local max_cnts
+	local max_rifs
+	local target
+
+	max_rifs=$(devlink_resource_size_get rifs)
+	max_cnts=$(devlink_resource_size_get counters rif)
+
+	# Remove already allocated RIFs.
+	((max_rifs -= $(devlink_resource_occ_get rifs)))
+
+	# 10 KVD slots per counter, ingress+egress counters per RIF
+	((max_cnts /= 20))
+
+	# Pointless to run the overflow test if we don't have enough RIFs to
+	# host all the counters.
+	if ((max_cnts > max_rifs && should_fail)); then
+		echo 0
+		return
+	fi
+
+	target=$((max_rifs < max_cnts ? max_rifs : max_cnts))
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh
new file mode 100644
index 000000000000..303d7cbe3c45
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_mac_profile_scale.sh
+
+rif_mac_profile_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get rif_mac_profiles)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
new file mode 100644
index 000000000000..21c4697d5bab
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../router_scale.sh
+
+router_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get kvd hash_single)
+
+	if [[ $should_fail -eq 0 ]]; then
+		target=$((target * 85 / 100))
+	else
+		target=$((target + 1))
+	fi
+
+	echo $target
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
new file mode 100644
index 000000000000..f9bfd8937765
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_flower_scale.sh
+
+tc_flower_get_target()
+{
+	local should_fail=$1; shift
+
+	# 6144 (6x1024) is the theoretical maximum.
+	# One bank of 512 rules is taken by the 18-byte MC router rule.
+	# One rule is the ACL catch-all.
+	# 6144 - 512 - 1 = 5631
+	local target=5631
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
new file mode 100644
index 000000000000..e79ac0dad1f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_police_scale.sh
+
+tc_police_get_target()
+{
+	local should_fail=$1; shift
+	local target
+
+	target=$(devlink_resource_size_get global_policers single_rate_policers)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh
new file mode 100755
index 000000000000..d8fd875ad527
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh
@@ -0,0 +1,334 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test VxLAN flooding. The device stores flood records in a singly linked list
+# where each record stores up to five IPv6 addresses of remote VTEPs. The test
+# verifies that packets are correctly flooded in various cases such as deletion
+# of a record in the middle of the list.
+#
+# +-----------------------+
+# | H1 (vrf)              |
+# |    + $h1              |
+# |    | 2001:db8:1::1/64 |
+# +----|------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# | +--|--------------------------------------------------------------------+ |
+# | |  + $swp1                   BR0 (802.1d)                               | |
+# | |                                                                       | |
+# | |  + vxlan0 (vxlan)                                                     | |
+# | |    local 2001:db8:2::1                                                | |
+# | |    remote 2001:db8:2::{2..21}                                         | |
+# | |    id 10 dstport 4789                                                 | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |  2001:db8:2::0/64 via 2001:db8:3::2                                       |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 2001:db8:3::1/64                                                     |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                               R2 (vrf) |
+# |    + $rp2                                                   |
+# |      2001:db8:3::2/64                                       |
+# |                                                             |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="flooding_test"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 2001:db8:1::1/64
+}
+
+switch_create()
+{
+	# Make sure the bridge uses the MAC address of the local port and
+	# not that of the VxLAN's device
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link set dev br0 address $(mac_get $swp1)
+
+	ip link add name vxlan0 type vxlan id 10 nolearning \
+		udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \
+		local 2001:db8:2::1 dstport 4789
+
+	ip address add 2001:db8:2::1/128 dev lo
+
+	ip link set dev $swp1 master br0
+	ip link set dev vxlan0 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev vxlan0 up
+}
+
+switch_destroy()
+{
+	ip link set dev vxlan0 down
+	ip link set dev $swp1 down
+	ip link set dev br0 down
+
+	ip link set dev vxlan0 nomaster
+	ip link set dev $swp1 nomaster
+
+	ip address del 2001:db8:2::1/128 dev lo
+
+	ip link del dev vxlan0
+
+	ip link del dev br0
+}
+
+router1_create()
+{
+	# This router is in the default VRF, where the VxLAN device is
+	# performing the L3 lookup
+	ip link set dev $rp1 up
+	ip address add 2001:db8:3::1/64 dev $rp1
+	ip route add 2001:db8:2::0/64 via 2001:db8:3::2
+}
+
+router1_destroy()
+{
+	ip route del 2001:db8:2::0/64 via 2001:db8:3::2
+	ip address del 2001:db8:3::1/64 dev $rp1
+	ip link set dev $rp1 down
+}
+
+router2_create()
+{
+	# This router is not in the default VRF, so use simple_if_init()
+	simple_if_init $rp2 2001:db8:3::2/64
+}
+
+router2_destroy()
+{
+	simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+
+	switch_create
+
+	router1_create
+	router2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	switch_destroy
+
+	h1_destroy
+
+	vrf_cleanup
+}
+
+flooding_remotes_add()
+{
+	local num_remotes=$1
+	local lsb
+	local i
+
+	for i in $(eval echo {1..$num_remotes}); do
+		lsb=$((i + 1))
+
+		bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \
+			dst 2001:db8:2::$lsb
+	done
+}
+
+flooding_filters_add()
+{
+	local num_remotes=$1
+	local lsb
+	local i
+
+	tc qdisc add dev $rp2 clsact
+
+	for i in $(eval echo {1..$num_remotes}); do
+		lsb=$((i + 1))
+
+		tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \
+			flower ip_proto udp dst_ip 2001:db8:2::$lsb \
+			dst_port 4789 skip_sw action drop
+	done
+}
+
+flooding_filters_del()
+{
+	local num_remotes=$1
+	local i
+
+	for i in $(eval echo {1..$num_remotes}); do
+		tc filter del dev $rp2 ingress protocol ipv6 pref $i \
+			handle $i flower
+	done
+
+	tc qdisc del dev $rp2 clsact
+}
+
+flooding_check_packets()
+{
+	local packets=("$@")
+	local num_remotes=${#packets[@]}
+	local i
+
+	for i in $(eval echo {1..$num_remotes}); do
+		tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]}
+		check_err $? "remote $i - did not get expected number of packets"
+	done
+}
+
+flooding_test()
+{
+	# Use 20 remote VTEPs that will be stored in 4 records. The array
+	# 'packets' will store how many packets are expected to be received
+	# by each remote VTEP at each stage of the test
+	declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1)
+	local num_remotes=20
+
+	RET=0
+
+	# Add FDB entries for remote VTEPs and corresponding tc filters on the
+	# ingress of the nexthop router. These filters will count how many
+	# packets were flooded to each remote VTEP
+	flooding_remotes_add $num_remotes
+	flooding_filters_add $num_remotes
+
+	# Send one packet and make sure it is flooded to all the remote VTEPs
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 1 packet"
+
+	# Delete the third record which corresponds to VTEPs with LSB 12..16
+	# and check that packet is flooded correctly when we remove a record
+	# from the middle of the list
+	RET=0
+
+	packets=(2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 2 2 2 2 2)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 2 packets"
+
+	# Delete the first record and make sure the packet is flooded correctly
+	RET=0
+
+	packets=(2 2 2 2 2 3 3 3 3 3 1 1 1 1 1 3 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 3 packets"
+
+	# Delete the last record and make sure the packet is flooded correctly
+	RET=0
+
+	packets=(2 2 2 2 2 4 4 4 4 4 1 1 1 1 1 3 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::18
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::19
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::20
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::21
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 4 packets"
+
+	# Delete the last record, one entry at a time and make sure single
+	# entries are correctly removed
+	RET=0
+
+	packets=(2 2 2 2 2 4 5 5 5 5 1 1 1 1 1 3 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 5 packets"
+
+	RET=0
+
+	packets=(2 2 2 2 2 4 5 6 6 6 1 1 1 1 1 3 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 6 packets"
+
+	RET=0
+
+	packets=(2 2 2 2 2 4 5 6 7 7 1 1 1 1 1 3 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 7 packets"
+
+	RET=0
+
+	packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 8 packets"
+
+	RET=0
+
+	packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11
+
+	$MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 9 packets"
+
+	flooding_filters_del $num_remotes
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh
new file mode 100755
index 000000000000..20ed98fe5a60
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	default_hw_stats_test
+	immediate_hw_stats_test
+	delayed_hw_stats_test
+	disabled_hw_stats_test
+"
+NUM_NETIFS=2
+
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/24
+	tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1 192.0.2.2/24
+}
+
+hw_stats_test()
+{
+	RET=0
+
+	local name=$1
+	local action_hw_stats=$2
+	local occ_delta=$3
+	local expected_packet_count=$4
+
+	local orig_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]')
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop $action_hw_stats
+	check_err $? "Failed to add rule with $name hw_stats"
+
+	local new_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]')
+	local expected_occ=$((orig_occ + occ_delta))
+	[ "$new_occ" == "$expected_occ" ]
+	check_err $? "Expected occupancy of $expected_occ, got $new_occ"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $swp1mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 101 $expected_packet_count
+	check_err $? "Did not match incoming packet"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "$name hw_stats"
+}
+
+default_hw_stats_test()
+{
+	hw_stats_test "default" "" 2 1
+}
+
+immediate_hw_stats_test()
+{
+	hw_stats_test "immediate" "hw_stats immediate" 2 1
+}
+
+delayed_hw_stats_test()
+{
+	RET=0
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop hw_stats delayed
+	check_fail $? "Unexpected success in adding rule with delayed hw_stats"
+
+	log_test "delayed hw_stats"
+}
+
+disabled_hw_stats_test()
+{
+	hw_stats_test "disabled" "hw_stats disabled" 0 0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	h1mac=$(mac_get $h1)
+	swp1mac=$(mac_get $swp1)
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+check_tc_action_hw_stats_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
new file mode 100644
index 000000000000..d3d9e60d6ddf
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for resource limit of offloaded flower rules. The test adds a given
+# number of flower matches for different IPv6 addresses, then check the offload
+# indication for all of the tc flower rules. This file contains functions to set
+# up a testing topology and run the test, and is meant to be sourced from a test
+# script that calls the testing routine with a given number of rules.
+
+TC_FLOWER_NUM_NETIFS=2
+
+tc_flower_h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+}
+
+tc_flower_h1_destroy()
+{
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+tc_flower_h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+}
+
+tc_flower_h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+tc_flower_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	vrf_prepare
+
+	tc_flower_h1_create
+	tc_flower_h2_create
+}
+
+tc_flower_cleanup()
+{
+	pre_cleanup
+
+	tc_flower_h2_destroy
+	tc_flower_h1_destroy
+
+	vrf_cleanup
+
+	if [[ -v TC_FLOWER_BATCH_FILE ]]; then
+		rm -f $TC_FLOWER_BATCH_FILE
+	fi
+}
+
+tc_flower_addr()
+{
+	local num=$1; shift
+
+	printf "2001:db8:1::%x" $num
+}
+
+tc_flower_rules_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	TC_FLOWER_BATCH_FILE="$(mktemp)"
+
+	for ((i = 0; i < count; ++i)); do
+		cat >> $TC_FLOWER_BATCH_FILE <<-EOF
+			filter add dev $h2 ingress \
+				prot ipv6 \
+				pref 1000 \
+				handle 42$i \
+				flower $tcflags dst_ip $(tc_flower_addr $i) \
+				action drop
+		EOF
+	done
+
+	tc -b $TC_FLOWER_BATCH_FILE
+	check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_flower_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+	local last=$((count - 1))
+
+	tc_flower_rules_create $count $should_fail
+
+	offload_count=$(tc -j -s filter show dev $h2 ingress    |
+			jq -r '[ .[] | select(.kind == "flower") |
+			.options | .in_hw ]' | jq .[] | wc -l)
+	[[ $((offload_count - 1)) -eq $count ]]
+	check_err_fail $should_fail $? "Attempt to offload $count rules (actual result $((offload_count - 1)))"
+}
+
+tc_flower_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	# We use lower 16 bits of IPv6 address for match. Also there are only 16
+	# bits of rule priority space.
+	if ((count > 65536)); then
+		check_err 1 "Invalid count of $count. At most 65536 rules supported"
+		return
+	fi
+
+	if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+		check_err 1 "Could not test offloaded functionality"
+		return
+	fi
+
+	tcflags="skip_sw"
+	__tc_flower_test $count $should_fail
+}
+
+tc_flower_traffic_test()
+{
+	local count=$1; shift
+	local i;
+
+	for ((i = count - 1; i > 0; i /= 2)); do
+		$MZ -6 $h1 -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \
+		    -A $(tc_flower_addr 0) -B $(tc_flower_addr $i) \
+		    -q -t udp sp=54321,dp=12345
+	done
+	for ((i = count - 1; i > 0; i /= 2)); do
+		tc_check_packets "dev $h2 ingress" 42$i 1
+		check_err $? "Traffic not seen at rule #$i"
+	done
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh
new file mode 100755
index 000000000000..448b75c1545a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that policers shared by different tc filters are correctly reference
+# counted by observing policers' occupancy via devlink-resource.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	tc_police_occ_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+switch_create()
+{
+	simple_if_init $swp1
+	tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+tc_police_occ_get()
+{
+	devlink_resource_occ_get global_policers single_rate_policers
+}
+
+tc_police_occ_test()
+{
+	RET=0
+
+	local occ=$(tc_police_occ_get)
+
+	tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+		flower skip_sw \
+		action police rate 100mbit burst 100k conform-exceed drop/ok
+	(( occ + 1 == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+	tc filter del dev $swp1 ingress pref 1 handle 101 flower
+	(( occ == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $occ"
+
+	tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+		flower skip_sw \
+		action police rate 100mbit burst 100k conform-exceed drop/ok \
+		index 10
+	tc filter add dev $swp1 ingress pref 2 handle 102 proto ip \
+		flower skip_sw action police index 10
+
+	(( occ + 1 == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+	tc filter del dev $swp1 ingress pref 2 handle 102 flower
+	(( occ + 1 == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+	tc filter del dev $swp1 ingress pref 1 handle 101 flower
+	(( occ == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $occ"
+
+	log_test "tc police occupancy"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
new file mode 100644
index 000000000000..86e787895f78
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TC_POLICE_NUM_NETIFS=2
+
+tc_police_h1_create()
+{
+	simple_if_init $h1
+}
+
+tc_police_h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+tc_police_switch_create()
+{
+	simple_if_init $swp1
+	tc qdisc add dev $swp1 clsact
+}
+
+tc_police_switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1
+}
+
+tc_police_addr()
+{
+       local num=$1; shift
+
+       printf "2001:db8:1::%x" $num
+}
+
+tc_police_rules_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	TC_POLICE_BATCH_FILE="$(mktemp)"
+
+	for ((i = 0; i < count; ++i)); do
+		cat >> $TC_POLICE_BATCH_FILE <<-EOF
+			filter add dev $swp1 ingress \
+				prot ipv6 \
+				pref 1000 \
+				flower skip_sw dst_ip $(tc_police_addr $i) \
+				action police rate 10mbit burst 100k \
+				conform-exceed drop/ok
+		EOF
+	done
+
+	tc -b $TC_POLICE_BATCH_FILE
+	check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_police_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	tc_police_rules_create $count $should_fail
+
+	offload_count=$(tc -j filter show dev $swp1 ingress |
+			jq "[.[] | select(.options.in_hw == true)] | length")
+	((offload_count == count))
+	check_err_fail $should_fail $? "tc police offload count"
+}
+
+tc_police_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	if ! tc_offload_check $TC_POLICE_NUM_NETIFS; then
+		check_err 1 "Could not test offloaded functionality"
+		return
+	fi
+
+	__tc_police_test $count $should_fail
+}
+
+tc_police_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	tc_police_h1_create
+	tc_police_switch_create
+}
+
+tc_police_cleanup()
+{
+	pre_cleanup
+
+	tc_police_switch_destroy
+	tc_police_h1_destroy
+
+	vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
new file mode 100755
index 000000000000..0441a18f098b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -0,0 +1,414 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	shared_block_drop_test
+	egress_redirect_test
+	multi_mirror_test
+	matchall_sample_egress_test
+	matchall_mirror_behind_flower_ingress_test
+	matchall_sample_behind_flower_ingress_test
+	matchall_mirror_behind_flower_egress_test
+	matchall_proto_match_test
+	police_limits_test
+	multi_police_test
+"
+NUM_NETIFS=2
+
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.1/24
+	simple_if_init $swp2 192.0.2.2/24
+}
+
+switch_destroy()
+{
+	simple_if_fini $swp2 192.0.2.2/24
+	simple_if_fini $swp1 192.0.2.1/24
+}
+
+shared_block_drop_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have mixed-bound
+	# shared block with a drop rule.
+
+	tc qdisc add dev $swp1 ingress_block 22 clsact
+	check_err $? "Failed to create clsact with ingress block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add drop rule to ingress bound block"
+
+	tc qdisc add dev $swp2 ingress_block 22 clsact
+	check_err $? "Failed to create another clsact with ingress shared block"
+
+	tc qdisc del dev $swp2 clsact
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block after blocker drop rule removed"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_fail $? "Incorrect success to add drop rule to mixed bound block"
+
+	tc qdisc del dev $swp1 clsact
+
+	tc qdisc add dev $swp1 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add drop rule to egress bound shared block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	log_test "shared block drop"
+}
+
+egress_redirect_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have mirred redirect on
+	# egress-bound block.
+
+	tc qdisc add dev $swp1 ingress_block 22 clsact
+	check_err $? "Failed to create clsact with ingress block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_err $? "Failed to add redirect rule to ingress bound block"
+
+	tc qdisc add dev $swp2 ingress_block 22 clsact
+	check_err $? "Failed to create another clsact with ingress shared block"
+
+	tc qdisc del dev $swp2 clsact
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block after blocker redirect rule removed"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_fail $? "Incorrect success to add redirect rule to mixed bound block"
+
+	tc qdisc del dev $swp1 clsact
+
+	tc qdisc add dev $swp1 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_fail $? "Incorrect success to add redirect rule to egress bound shared block"
+
+	tc qdisc del dev $swp2 clsact
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_fail $? "Incorrect success to add redirect rule to egress bound block"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "shared block drop"
+}
+
+multi_mirror_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have multiple mirror
+	# actions in a single rule.
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress mirror dev $swp2
+	check_err $? "Failed to add rule with single mirror action"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress mirror dev $swp2 \
+		action mirred egress mirror dev $swp1
+	check_fail $? "Incorrect success to add rule with two mirror actions"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "multi mirror"
+}
+
+matchall_sample_egress_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have matchall with sample action
+	# bound on egress. Spectrum-1 specific restriction
+	mlxsw_only_on_spectrum 1 || return
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol all pref 1 handle 101 \
+		matchall skip_sw action sample rate 100 group 1
+	check_err $? "Failed to add rule with sample action on ingress"
+
+	tc filter del dev $swp1 ingress protocol all pref 1 handle 101 matchall
+
+	tc filter add dev $swp1 egress protocol all pref 1 handle 101 \
+		matchall skip_sw action sample rate 100 group 1
+	check_fail $? "Incorrect success to add rule with sample action on egress"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall sample egress"
+}
+
+matchall_behind_flower_ingress_test()
+{
+	local action=$1
+	local action_args=$2
+
+	RET=0
+
+	# On ingress, all matchall-mirror and matchall-sample
+	# rules have to be in front of the flower rules
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+
+	tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \
+		matchall skip_sw action $action_args
+	check_err $? "Failed to add matchall rule in front of a flower rule"
+
+	tc filter del dev $swp1 ingress protocol all pref 9 handle 102 matchall
+
+	tc filter add dev $swp1 ingress protocol all pref 11 handle 102 \
+		matchall skip_sw action $action_args
+	check_fail $? "Incorrect success to add matchall rule behind a flower rule"
+
+	tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \
+		matchall skip_sw action $action_args
+
+	tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add flower rule behind a matchall rule"
+
+	tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol ip pref 8 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_fail $? "Incorrect success to add flower rule in front of a matchall rule"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall $action flower ingress"
+}
+
+matchall_mirror_behind_flower_ingress_test()
+{
+	matchall_behind_flower_ingress_test "mirror" "mirred egress mirror dev $swp2"
+}
+
+matchall_sample_behind_flower_ingress_test()
+{
+	matchall_behind_flower_ingress_test "sample" "sample rate 100 group 1"
+}
+
+matchall_behind_flower_egress_test()
+{
+	local action=$1
+	local action_args=$2
+
+	RET=0
+
+	# On egress, all matchall-mirror rules have to be behind the flower rules
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+
+	tc filter add dev $swp1 egress protocol all pref 11 handle 102 \
+		matchall skip_sw action $action_args
+	check_err $? "Failed to add matchall rule in front of a flower rule"
+
+	tc filter del dev $swp1 egress protocol all pref 11 handle 102 matchall
+
+	tc filter add dev $swp1 egress protocol all pref 9 handle 102 \
+		matchall skip_sw action $action_args
+	check_fail $? "Incorrect success to add matchall rule behind a flower rule"
+
+	tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 egress protocol all pref 11 handle 102 \
+		matchall skip_sw action $action_args
+
+	tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add flower rule behind a matchall rule"
+
+	tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 egress protocol ip pref 12 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_fail $? "Incorrect success to add flower rule in front of a matchall rule"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall $action flower egress"
+}
+
+matchall_mirror_behind_flower_egress_test()
+{
+	matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
+}
+
+matchall_proto_match_test()
+{
+	RET=0
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		matchall skip_sw \
+		action sample group 1 rate 100
+	check_fail $? "Incorrect success to add matchall rule with protocol match"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall protocol match"
+}
+
+police_limits_test()
+{
+	RET=0
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 0.5kbit burst 1m conform-exceed drop/ok
+	check_fail $? "Incorrect success to add police action with too low rate"
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 2.5tbit burst 1g conform-exceed drop/ok
+	check_fail $? "Incorrect success to add police action with too high rate"
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 1.5kbit burst 1m conform-exceed drop/ok
+	check_err $? "Failed to add police action with low rate"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 1.9tbit burst 1g conform-exceed drop/ok
+	check_err $? "Failed to add police action with high rate"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 1.5kbit burst 512b conform-exceed drop/ok
+	check_fail $? "Incorrect success to add police action with too low burst size"
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 1.5kbit burst 2k conform-exceed drop/ok
+	check_err $? "Failed to add police action with low burst size"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "police rate and burst limits"
+}
+
+multi_police_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have multiple police
+	# actions in a single rule.
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+		flower skip_sw \
+		action police rate 100mbit burst 100k conform-exceed drop/ok
+	check_err $? "Failed to add rule with single police action"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+		flower skip_sw \
+		action police rate 100mbit burst 100k conform-exceed drop/pipe \
+		action police rate 200mbit burst 200k conform-exceed drop/ok
+	check_fail $? "Incorrect success to add rule with two police actions"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "multi police"
+}
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	vrf_prepare
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	vrf_cleanup
+}
+
+check_tc_shblock_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
new file mode 100755
index 000000000000..bc7ea2df49fb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -0,0 +1,658 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that packets are sampled when tc-sample is used and that reported
+# metadata is correct. Two sets of hosts (with and without LAG) are used, since
+# metadata extraction in mlxsw is a bit different when LAG is involved.
+#
+# +---------------------------------+       +---------------------------------+
+# | H1 (vrf)                        |       | H3 (vrf)                        |
+# |    + $h1                        |       |    + $h3_lag                    |
+# |    | 192.0.2.1/28               |       |    | 192.0.2.17/28              |
+# |    |                            |       |    |                            |
+# |    |  default via 192.0.2.2     |       |    |  default via 192.0.2.18    |
+# +----|----------------------------+       +----|----------------------------+
+#      |                                         |
+# +----|-----------------------------------------|----------------------------+
+# |    | 192.0.2.2/28                            | 192.0.2.18/28              |
+# |    + $rp1                                    + $rp3_lag                   |
+# |                                                                           |
+# |    + $rp2                                    + $rp4_lag                   |
+# |    | 198.51.100.2/28                         | 198.51.100.18/28           |
+# +----|-----------------------------------------|----------------------------+
+#      |                                         |
+# +----|----------------------------+       +----|----------------------------+
+# |    |  default via 198.51.100.2  |       |    |  default via 198.51.100.18 |
+# |    |                            |       |    |                            |
+# |    | 198.51.100.1/28            |       |    | 198.51.100.17/28           |
+# |    + $h2                        |       |    + $h4_lag                    |
+# | H2 (vrf)                        |       | H4 (vrf)                        |
+# +---------------------------------+       +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	tc_sample_rate_test
+	tc_sample_max_rate_test
+	tc_sample_conflict_test
+	tc_sample_group_conflict_test
+	tc_sample_md_iif_test
+	tc_sample_md_lag_iif_test
+	tc_sample_md_oif_test
+	tc_sample_md_lag_oif_test
+	tc_sample_md_out_tc_test
+	tc_sample_md_out_tc_occ_test
+	tc_sample_md_latency_test
+	tc_sample_acl_group_conflict_test
+	tc_sample_acl_rate_test
+	tc_sample_acl_max_rate_test
+"
+NUM_NETIFS=8
+CAPTURE_FILE=$(mktemp)
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/28
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+}
+
+h2_destroy()
+{
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/28
+}
+
+h3_create()
+{
+	ip link set dev $h3 down
+	ip link add name ${h3}_bond type bond mode 802.3ad
+	ip link set dev $h3 master ${h3}_bond
+
+	simple_if_init ${h3}_bond 192.0.2.17/28
+
+	ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18
+}
+
+h3_destroy()
+{
+	ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18
+
+	simple_if_fini ${h3}_bond 192.0.2.17/28
+
+	ip link set dev $h3 nomaster
+	ip link del dev ${h3}_bond
+}
+
+h4_create()
+{
+	ip link set dev $h4 down
+	ip link add name ${h4}_bond type bond mode 802.3ad
+	ip link set dev $h4 master ${h4}_bond
+
+	simple_if_init ${h4}_bond 198.51.100.17/28
+
+	ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18
+}
+
+h4_destroy()
+{
+	ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18
+
+	simple_if_fini ${h4}_bond 198.51.100.17/28
+
+	ip link set dev $h4 nomaster
+	ip link del dev ${h4}_bond
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	__addr_add_del $rp1 add 192.0.2.2/28
+	tc qdisc add dev $rp1 clsact
+
+	ip link set dev $rp2 up
+	__addr_add_del $rp2 add 198.51.100.2/28
+	tc qdisc add dev $rp2 clsact
+
+	ip link add name ${rp3}_bond type bond mode 802.3ad
+	ip link set dev $rp3 master ${rp3}_bond
+	__addr_add_del ${rp3}_bond add 192.0.2.18/28
+	tc qdisc add dev $rp3 clsact
+	ip link set dev ${rp3}_bond up
+
+	ip link add name ${rp4}_bond type bond mode 802.3ad
+	ip link set dev $rp4 master ${rp4}_bond
+	__addr_add_del ${rp4}_bond add 198.51.100.18/28
+	tc qdisc add dev $rp4 clsact
+	ip link set dev ${rp4}_bond up
+}
+
+router_destroy()
+{
+	ip link set dev ${rp4}_bond down
+	tc qdisc del dev $rp4 clsact
+	__addr_add_del ${rp4}_bond del 198.51.100.18/28
+	ip link set dev $rp4 nomaster
+	ip link del dev ${rp4}_bond
+
+	ip link set dev ${rp3}_bond down
+	tc qdisc del dev $rp3 clsact
+	__addr_add_del ${rp3}_bond del 192.0.2.18/28
+	ip link set dev $rp3 nomaster
+	ip link del dev ${rp3}_bond
+
+	tc qdisc del dev $rp2 clsact
+	__addr_add_del $rp2 del 198.51.100.2/28
+	ip link set dev $rp2 down
+
+	tc qdisc del dev $rp1 clsact
+	__addr_add_del $rp1 del 192.0.2.2/28
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+	h3=${NETIFS[p5]}
+	rp3=${NETIFS[p6]}
+	h4=${NETIFS[p7]}
+	rp4=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	h4_create
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	rm -f $CAPTURE_FILE
+
+	router_destroy
+	h4_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+psample_capture_start()
+{
+	rm -f $CAPTURE_FILE
+
+	psample &> $CAPTURE_FILE &
+
+	sleep 1
+}
+
+psample_capture_stop()
+{
+	kill_process %%
+}
+
+__tc_sample_rate_test()
+{
+	local desc=$1; shift
+	local dip=$1; shift
+	local pkts pct
+
+	RET=0
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 32 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
+		-B $dip -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+	pct=$((100 * (pkts - 10000) / 10000))
+	(( -25 <= pct && pct <= 25))
+	check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+	log_test "tc sample rate ($desc)"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_rate_test()
+{
+	__tc_sample_rate_test "forward" 198.51.100.1
+	__tc_sample_rate_test "local receive" 192.0.2.2
+}
+
+tc_sample_max_rate_test()
+{
+	RET=0
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate $((35 * 10 ** 8)) group 1
+	check_err $? "Failed to configure sampling rule with max rate"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate $((35 * 10 ** 8 + 1)) \
+		group 1 &> /dev/null
+	check_fail $? "Managed to configure sampling rate above maximum"
+
+	log_test "tc sample maximum rate"
+}
+
+tc_sample_conflict_test()
+{
+	RET=0
+
+	# Test that two sampling rules cannot be configured on the same port,
+	# even when they share the same parameters.
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+		skip_sw action sample rate 1024 group 1 &> /dev/null
+	check_fail $? "Managed to configure second sampling rule"
+
+	# Delete the first rule and make sure the second rule can now be
+	# configured.
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule after deletion"
+
+	log_test "tc sample conflict test"
+
+	tc filter del dev $rp1 ingress protocol all pref 2 handle 102 matchall
+}
+
+tc_sample_group_conflict_test()
+{
+	RET=0
+
+	# Test that two sampling rules cannot be configured on the same port
+	# with different groups.
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+		skip_sw action sample rate 1024 group 2 &> /dev/null
+	check_fail $? "Managed to configure sampling rule with conflicting group"
+
+	log_test "tc sample group conflict test"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_iif_test()
+{
+	local rp1_ifindex
+
+	RET=0
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]')
+	grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected in-ifindex"
+
+	log_test "tc sample iif"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_iif_test()
+{
+	local rp3_ifindex
+
+	RET=0
+
+	tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+		-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]')
+	grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected in-ifindex"
+
+	log_test "tc sample lag iif"
+
+	tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_oif_test()
+{
+	local rp2_ifindex
+
+	RET=0
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]')
+	grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected out-ifindex"
+
+	log_test "tc sample oif"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_oif_test()
+{
+	local rp4_ifindex
+
+	RET=0
+
+	tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+		-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]')
+	grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected out-ifindex"
+
+	log_test "tc sample lag oif"
+
+	tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_test()
+{
+	RET=0
+
+	# Output traffic class is not supported on Spectrum-1.
+	mlxsw_only_on_spectrum 2+ || return
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	# By default, all the packets should go to the same traffic class (0).
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	grep -q -e "out-tc 0 " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected out-tc (0)"
+
+	# Map all priorities to highest traffic class (7) and check reported
+	# out-tc.
+	tc qdisc replace dev $rp2 root handle 1: \
+		prio bands 3 priomap 0 0 0 0 0 0 0 0
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	grep -q -e "out-tc 7 " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected out-tc (7)"
+
+	log_test "tc sample out-tc"
+
+	tc qdisc del dev $rp2 root handle 1:
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_occ_test()
+{
+	local backlog pct occ
+
+	RET=0
+
+	# Output traffic class occupancy is not supported on Spectrum-1.
+	mlxsw_only_on_spectrum 2+ || return
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	# Configure a shaper on egress to create congestion.
+	tc qdisc replace dev $rp2 root handle 1: \
+		tbf rate 1Mbit burst 256k limit 1M
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q &
+
+	# Allow congestion to reach steady state.
+	sleep 10
+
+	backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]')
+
+	# Kill mausezahn.
+	kill_process %%
+
+	psample_capture_stop
+
+	# Record last congestion sample.
+	occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \
+		cut -d ' ' -f 16)
+
+	pct=$((100 * (occ - backlog) / backlog))
+	(( -1 <= pct && pct <= 1))
+	check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%"
+
+	log_test "tc sample out-tc-occ"
+
+	tc qdisc del dev $rp2 root handle 1:
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_latency_test()
+{
+	RET=0
+
+	# Egress sampling not supported on Spectrum-1.
+	mlxsw_only_on_spectrum 2+ || return
+
+	tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	grep -q -e "latency " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have latency attribute"
+
+	log_test "tc sample latency"
+
+	tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_acl_group_conflict_test()
+{
+	RET=0
+
+	# Test that two flower sampling rules cannot be configured on the same
+	# port with different groups.
+
+	# Policy-based sampling is not supported on Spectrum-1.
+	mlxsw_only_on_spectrum 2+ || return
+
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule with same group"
+
+	tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \
+		skip_sw action sample rate 1024 group 2 &> /dev/null
+	check_fail $? "Managed to configure sampling rule with conflicting group"
+
+	log_test "tc sample (w/ flower) group conflict test"
+
+	tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+__tc_sample_acl_rate_test()
+{
+	local bind=$1; shift
+	local port=$1; shift
+	local pkts pct
+
+	RET=0
+
+	# Policy-based sampling is not supported on Spectrum-1.
+	mlxsw_only_on_spectrum 2+ || return
+
+	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+	pct=$((100 * (pkts - 10000) / 10000))
+	(( -25 <= pct && pct <= 25))
+	check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+	# Setup a filter that should not match any packet and make sure packets
+	# are not sampled.
+	tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	grep -q -e "group 1 " $CAPTURE_FILE
+	check_fail $? "Sampled packets when should not"
+
+	log_test "tc sample (w/ flower) rate ($bind)"
+
+	tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+}
+
+tc_sample_acl_rate_test()
+{
+	__tc_sample_acl_rate_test ingress $rp1
+	__tc_sample_acl_rate_test egress $rp2
+}
+
+tc_sample_acl_max_rate_test()
+{
+	RET=0
+
+	# Policy-based sampling is not supported on Spectrum-1.
+	mlxsw_only_on_spectrum 2+ || return
+
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw action sample rate $((2 ** 24 - 1)) group 1
+	check_err $? "Failed to configure sampling rule with max rate"
+
+	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw action sample rate $((2 ** 24)) \
+		group 1 &> /dev/null
+	check_fail $? "Managed to configure sampling rate above maximum"
+
+	log_test "tc sample (w/ flower) maximum rate"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
new file mode 100755
index 000000000000..4687b0a7dffb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
@@ -0,0 +1,1185 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test various aspects of VxLAN offloading which are specific to mlxsw, such
+# as sanitization of invalid configurations and offload indication.
+
+: ${ADDR_FAMILY:=ipv4}
+export ADDR_FAMILY
+
+: ${LOCAL_IP_1:=198.51.100.1}
+export LOCAL_IP_1
+
+: ${LOCAL_IP_2:=198.51.100.2}
+export LOCAL_IP_2
+
+: ${PREFIX_LEN:=32}
+export PREFIX_LEN
+
+: ${UDPCSUM_FLAFS:=noudpcsum}
+export UDPCSUM_FLAFS
+
+: ${MC_IP:=239.0.0.1}
+export MC_IP
+
+: ${IP_FLAG:=""}
+export IP_FLAG
+
+: ${ALL_TESTS:="
+	sanitization_test
+	offload_indication_test
+	sanitization_vlan_aware_test
+	offload_indication_vlan_aware_test
+"}
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+NUM_NETIFS=2
+: ${TIMEOUT:=20000} # ms
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+sanitization_single_dev_test_pass()
+{
+	ip link set dev $swp1 master br0
+	check_err $?
+	ip link set dev vxlan0 master br0
+	check_err $?
+
+	ip link set dev $swp1 nomaster
+
+	ip link set dev $swp1 master br0
+	check_err $?
+}
+
+sanitization_single_dev_test_fail()
+{
+	ip link set dev $swp1 master br0
+	check_err $?
+	ip link set dev vxlan0 master br0 &> /dev/null
+	check_fail $?
+
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vxlan0 master br0
+	check_err $?
+	ip link set dev $swp1 master br0 &> /dev/null
+	check_fail $?
+}
+
+sanitization_single_dev_valid_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	sanitization_single_dev_test_pass
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device - valid configuration"
+}
+
+sanitization_single_dev_vlan_aware_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	sanitization_single_dev_test_pass
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with a vlan-aware bridge"
+}
+
+sanitization_single_dev_mcast_enabled_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with a multicast enabled bridge"
+}
+
+sanitization_single_dev_mcast_group_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link add name dummy1 up type dummy
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \
+		dev dummy1 group $MC_IP
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev dummy1
+	ip link del dev br0
+
+	log_test "vxlan device with a multicast group"
+}
+
+sanitization_single_dev_no_local_ip_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit dstport 4789
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with no local ip"
+}
+
+sanitization_single_dev_learning_enabled_ipv4_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	sanitization_single_dev_test_pass
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with learning enabled"
+}
+
+sanitization_single_dev_local_interface_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link add name dummy1 up type dummy
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 dev dummy1
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev dummy1
+	ip link del dev br0
+
+	log_test "vxlan device with local interface"
+}
+
+sanitization_single_dev_port_range_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \
+		srcport 4000 5000
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with udp source port range"
+}
+
+sanitization_single_dev_tos_static_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos 20 local $LOCAL_IP_1 dstport 4789
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with static tos"
+}
+
+sanitization_single_dev_ttl_inherit_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl inherit tos inherit local $LOCAL_IP_1 dstport 4789
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with inherit ttl"
+}
+
+sanitization_single_dev_udp_checksum_ipv4_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with udp checksum"
+}
+
+sanitization_single_dev_test()
+{
+	# These tests make sure that we correctly sanitize VxLAN device
+	# configurations we do not support
+	sanitization_single_dev_valid_test
+	sanitization_single_dev_vlan_aware_test
+	sanitization_single_dev_mcast_enabled_test
+	sanitization_single_dev_mcast_group_test
+	sanitization_single_dev_no_local_ip_test
+	sanitization_single_dev_learning_enabled_"$ADDR_FAMILY"_test
+	sanitization_single_dev_local_interface_test
+	sanitization_single_dev_port_range_test
+	sanitization_single_dev_tos_static_test
+	sanitization_single_dev_ttl_inherit_test
+	sanitization_single_dev_udp_checksum_"$ADDR_FAMILY"_test
+}
+
+sanitization_multi_devs_test_pass()
+{
+	ip link set dev $swp1 master br0
+	check_err $?
+	ip link set dev vxlan0 master br0
+	check_err $?
+	ip link set dev $swp2 master br1
+	check_err $?
+	ip link set dev vxlan1 master br1
+	check_err $?
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+
+	ip link set dev $swp1 master br0
+	check_err $?
+	ip link set dev $swp2 master br1
+	check_err $?
+}
+
+sanitization_multi_devs_test_fail()
+{
+	ip link set dev $swp1 master br0
+	check_err $?
+	ip link set dev vxlan0 master br0
+	check_err $?
+	ip link set dev $swp2 master br1
+	check_err $?
+	ip link set dev vxlan1 master br1 &> /dev/null
+	check_fail $?
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vxlan1 master br1
+	check_err $?
+	ip link set dev $swp1 master br0
+	check_err $?
+	ip link set dev $swp2 master br1 &> /dev/null
+	check_fail $?
+}
+
+sanitization_multi_devs_valid_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link add dev br1 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+	ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	sanitization_multi_devs_test_pass
+
+	ip link del dev vxlan1
+	ip link del dev vxlan0
+	ip link del dev br1
+	ip link del dev br0
+
+	log_test "multiple vxlan devices - valid configuration"
+}
+
+sanitization_multi_devs_ttl_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link add dev br1 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+	ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+		ttl 40 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	sanitization_multi_devs_test_fail
+
+	ip link del dev vxlan1
+	ip link del dev vxlan0
+	ip link del dev br1
+	ip link del dev br0
+
+	log_test "multiple vxlan devices with different ttl"
+}
+
+sanitization_multi_devs_udp_dstport_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link add dev br1 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+	ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 5789
+
+	sanitization_multi_devs_test_fail
+
+	ip link del dev vxlan1
+	ip link del dev vxlan0
+	ip link del dev br1
+	ip link del dev br0
+
+	log_test "multiple vxlan devices with different udp destination port"
+}
+
+sanitization_multi_devs_local_ip_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link add dev br1 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+	ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_2 dstport 4789
+
+	sanitization_multi_devs_test_fail
+
+	ip link del dev vxlan1
+	ip link del dev vxlan0
+	ip link del dev br1
+	ip link del dev br0
+
+	log_test "multiple vxlan devices with different local ip"
+}
+
+sanitization_multi_devs_test()
+{
+	# The device has a single VTEP, which means all the VxLAN devices
+	# we offload must share certain properties such as source IP and
+	# UDP destination port. These tests make sure that we forbid
+	# configurations that violate this limitation
+	sanitization_multi_devs_valid_test
+	sanitization_multi_devs_ttl_test
+	sanitization_multi_devs_udp_dstport_test
+	sanitization_multi_devs_local_ip_test
+}
+
+sanitization_test()
+{
+	sanitization_single_dev_test
+	sanitization_multi_devs_test
+}
+
+offload_indication_setup_create()
+{
+	# Create a simple setup with two bridges, each with a VxLAN device
+	# and one local port
+	ip link add name br0 type bridge mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+	ip link add name br1 type bridge mcast_snooping 0
+	ip link set dev br1 addrgenmode none
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br1
+
+	ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
+
+	ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \
+		$UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+	ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \
+		$UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+}
+
+offload_indication_setup_destroy()
+{
+	ip link del dev vxlan1
+	ip link del dev vxlan0
+
+	ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+
+	ip link del dev br1
+	ip link del dev br0
+}
+
+offload_indication_fdb_flood_test()
+{
+	RET=0
+
+	bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst $LOCAL_IP_2
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \
+		bridge fdb show brport vxlan0
+	check_err $?
+
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self
+
+	log_test "vxlan flood entry offload indication"
+}
+
+offload_indication_fdb_bridge_test()
+{
+	RET=0
+
+	bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \
+		dst $LOCAL_IP_2
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
+	check_err $?
+
+	log_test "vxlan entry offload indication - initial state"
+
+	# Remove FDB entry from the bridge driver and check that corresponding
+	# entry in the VxLAN driver is not marked as offloaded
+	RET=0
+
+	bridge fdb del de:ad:be:ef:13:37 dev vxlan0 master
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
+	check_err $?
+
+	log_test "vxlan entry offload indication - after removal from bridge"
+
+	# Add the FDB entry back to the bridge driver and make sure it is
+	# marked as offloaded in both drivers
+	RET=0
+
+	bridge fdb add de:ad:be:ef:13:37 dev vxlan0 master static
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
+	check_err $?
+
+	log_test "vxlan entry offload indication - after re-add to bridge"
+
+	# Remove FDB entry from the VxLAN driver and check that corresponding
+	# entry in the bridge driver is not marked as offloaded
+	RET=0
+
+	bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
+	check_err $?
+
+	log_test "vxlan entry offload indication - after removal from vxlan"
+
+	# Add the FDB entry back to the VxLAN driver and make sure it is
+	# marked as offloaded in both drivers
+	RET=0
+
+	bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst $LOCAL_IP_2
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
+	check_err $?
+
+	log_test "vxlan entry offload indication - after re-add to vxlan"
+
+	bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self master
+}
+
+offload_indication_fdb_test()
+{
+	offload_indication_fdb_flood_test
+	offload_indication_fdb_bridge_test
+}
+
+offload_indication_decap_route_test()
+{
+	RET=0
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	ip link set dev vxlan0 down
+	busywait "$TIMEOUT" wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	ip link set dev vxlan1 down
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	log_test "vxlan decap route - vxlan device down"
+
+	RET=0
+
+	ip link set dev vxlan1 up
+	busywait "$TIMEOUT" wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	ip link set dev vxlan0 up
+	busywait "$TIMEOUT" wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	log_test "vxlan decap route - vxlan device up"
+
+	RET=0
+
+	ip address delete $LOCAL_IP_1/$PREFIX_LEN dev lo
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
+	busywait "$TIMEOUT" wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	log_test "vxlan decap route - add local route"
+
+	RET=0
+
+	ip link set dev $swp1 nomaster
+	busywait "$TIMEOUT" wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	ip link set dev $swp2 nomaster
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	log_test "vxlan decap route - local ports enslavement"
+
+	RET=0
+
+	ip link del dev br0
+	busywait "$TIMEOUT" wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	ip link del dev br1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	log_test "vxlan decap route - bridge device deletion"
+
+	RET=0
+
+	ip link add name br0 type bridge mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+	ip link add name br1 type bridge mcast_snooping 0
+	ip link set dev br1 addrgenmode none
+	ip link set dev br1 up
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br1
+	ip link set dev vxlan0 master br0
+	ip link set dev vxlan1 master br1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	ip link del dev vxlan0
+	busywait "$TIMEOUT" wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	ip link del dev vxlan1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	log_test "vxlan decap route - vxlan device deletion"
+
+	ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \
+		$UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+	ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \
+		$UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+}
+
+check_fdb_offloaded()
+{
+	local mac=00:11:22:33:44:55
+	local zmac=00:00:00:00:00:00
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac self \
+		bridge fdb show dev vxlan0
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac master \
+		bridge fdb show dev vxlan0
+	check_err $?
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show dev vxlan0
+	check_err $?
+}
+
+check_vxlan_fdb_not_offloaded()
+{
+	local mac=00:11:22:33:44:55
+	local zmac=00:00:00:00:00:00
+
+	bridge fdb show dev vxlan0 | grep $mac | grep -q self
+	check_err $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac self \
+		bridge fdb show dev vxlan0
+	check_err $?
+
+	bridge fdb show dev vxlan0 | grep $zmac | grep -q self
+	check_err $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show dev vxlan0
+	check_err $?
+}
+
+check_bridge_fdb_not_offloaded()
+{
+	local mac=00:11:22:33:44:55
+	local zmac=00:00:00:00:00:00
+
+	bridge fdb show dev vxlan0 | grep $mac | grep -q master
+	check_err $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac master \
+		bridge fdb show dev vxlan0
+	check_err $?
+}
+
+__offload_indication_join_vxlan_first()
+{
+	local vid=$1; shift
+
+	local mac=00:11:22:33:44:55
+	local zmac=00:00:00:00:00:00
+
+	bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2
+
+	ip link set dev vxlan0 master br0
+	bridge fdb add dev vxlan0 $mac self master static dst $LOCAL_IP_2
+
+	RET=0
+	check_vxlan_fdb_not_offloaded
+	ip link set dev $swp1 master br0
+	sleep .1
+	check_fdb_offloaded
+	log_test "offload indication - attach vxlan first"
+
+	RET=0
+	ip link set dev vxlan0 down
+	check_vxlan_fdb_not_offloaded
+	check_bridge_fdb_not_offloaded
+	log_test "offload indication - set vxlan down"
+
+	RET=0
+	ip link set dev vxlan0 up
+	sleep .1
+	check_fdb_offloaded
+	log_test "offload indication - set vxlan up"
+
+	if [[ ! -z $vid ]]; then
+		RET=0
+		bridge vlan del dev vxlan0 vid $vid
+		check_vxlan_fdb_not_offloaded
+		check_bridge_fdb_not_offloaded
+		log_test "offload indication - delete VLAN"
+
+		RET=0
+		bridge vlan add dev vxlan0 vid $vid
+		check_vxlan_fdb_not_offloaded
+		check_bridge_fdb_not_offloaded
+		log_test "offload indication - add tagged VLAN"
+
+		RET=0
+		bridge vlan add dev vxlan0 vid $vid pvid untagged
+		sleep .1
+		check_fdb_offloaded
+		log_test "offload indication - add pvid/untagged VLAN"
+	fi
+
+	RET=0
+	ip link set dev $swp1 nomaster
+	check_vxlan_fdb_not_offloaded
+	log_test "offload indication - detach port"
+}
+
+offload_indication_join_vxlan_first()
+{
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	__offload_indication_join_vxlan_first
+
+	ip link del dev vxlan0
+	ip link del dev br0
+}
+
+__offload_indication_join_vxlan_last()
+{
+	local zmac=00:00:00:00:00:00
+
+	RET=0
+
+	bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2
+
+	ip link set dev $swp1 master br0
+
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show dev vxlan0
+	check_err $?
+
+	ip link set dev vxlan0 master br0
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show dev vxlan0
+	check_err $?
+
+	log_test "offload indication - attach vxlan last"
+}
+
+offload_indication_join_vxlan_last()
+{
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	__offload_indication_join_vxlan_last
+
+	ip link del dev vxlan0
+	ip link del dev br0
+}
+
+offload_indication_test()
+{
+	offload_indication_setup_create
+	offload_indication_fdb_test
+	offload_indication_decap_route_test
+	offload_indication_setup_destroy
+
+	log_info "offload indication - replay & cleanup"
+	offload_indication_join_vxlan_first
+	offload_indication_join_vxlan_last
+}
+
+sanitization_vlan_aware_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1
+	ip link set dev br0 addrgenmode none
+
+	ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \
+		$UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \
+		$UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	# Test that when each VNI is mapped to a different VLAN we can enslave
+	# a port to the bridge
+	bridge vlan add vid 10 dev vxlan10 pvid untagged
+	bridge vlan add vid 20 dev vxlan20 pvid untagged
+
+	ip link set dev $swp1 master br0
+	check_err $?
+
+	log_test "vlan-aware - enslavement to vlan-aware bridge"
+
+	# Try to map both VNIs to the same VLAN and make sure configuration
+	# fails
+	RET=0
+
+	bridge vlan add vid 10 dev vxlan20 pvid untagged &> /dev/null
+	check_fail $?
+
+	log_test "vlan-aware - two vnis mapped to the same vlan"
+
+	# Test that enslavement of a port to a bridge fails when two VNIs
+	# are mapped to the same VLAN
+	RET=0
+
+	ip link set dev $swp1 nomaster
+
+	bridge vlan del vid 20 dev vxlan20 pvid untagged
+	bridge vlan add vid 10 dev vxlan20 pvid untagged
+
+	ip link set dev $swp1 master br0 &> /dev/null
+	check_fail $?
+
+	log_test "vlan-aware - failed enslavement to vlan-aware bridge"
+
+	bridge vlan del vid 10 dev vxlan20
+	bridge vlan add vid 20 dev vxlan20 pvid untagged
+
+	# Test that when two VXLAN tunnels with conflicting configurations
+	# (i.e., different TTL) are enslaved to the same VLAN-aware bridge,
+	# then the enslavement of a port to the bridge is denied.
+
+	# Use the offload indication of the local route to ensure the VXLAN
+	# configuration was correctly rollbacked.
+	ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
+
+	ip link set dev vxlan10 type vxlan ttl 10
+	ip link set dev $swp1 master br0 &> /dev/null
+	check_fail $?
+
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	log_test "vlan-aware - failed enslavement to bridge due to conflict"
+
+	ip link set dev vxlan10 type vxlan ttl 20
+	ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo
+
+	ip link del dev vxlan20
+	ip link del dev vxlan10
+	ip link del dev br0
+}
+
+offload_indication_vlan_aware_setup_create()
+{
+	# Create a simple setup with two VxLAN devices and a single VLAN-aware
+	# bridge
+	ip link add name br0 type bridge mcast_snooping 0 vlan_filtering 1 \
+		vlan_default_pvid 0
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+
+	ip link set dev $swp1 master br0
+
+	bridge vlan add vid 10 dev $swp1
+	bridge vlan add vid 20 dev $swp1
+
+	ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
+
+	ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \
+		$UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+	ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \
+		$UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	bridge vlan add vid 10 dev vxlan10 pvid untagged
+	bridge vlan add vid 20 dev vxlan20 pvid untagged
+}
+
+offload_indication_vlan_aware_setup_destroy()
+{
+	bridge vlan del vid 20 dev vxlan20
+	bridge vlan del vid 10 dev vxlan10
+
+	ip link del dev vxlan20
+	ip link del dev vxlan10
+
+	ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo
+
+	bridge vlan del vid 20 dev $swp1
+	bridge vlan del vid 10 dev $swp1
+
+	ip link set dev $swp1 nomaster
+
+	ip link del dev br0
+}
+
+offload_indication_vlan_aware_fdb_test()
+{
+	RET=0
+
+	log_info "vxlan entry offload indication - vlan-aware"
+
+	bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \
+		dst $LOCAL_IP_2 vlan 10
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
+	check_err $?
+
+	log_test "vxlan entry offload indication - initial state"
+
+	# Remove FDB entry from the bridge driver and check that corresponding
+	# entry in the VxLAN driver is not marked as offloaded
+	RET=0
+
+	bridge fdb del de:ad:be:ef:13:37 dev vxlan10 master vlan 10
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
+	check_err $?
+
+	log_test "vxlan entry offload indication - after removal from bridge"
+
+	# Add the FDB entry back to the bridge driver and make sure it is
+	# marked as offloaded in both drivers
+	RET=0
+
+	bridge fdb add de:ad:be:ef:13:37 dev vxlan10 master static vlan 10
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
+	check_err $?
+
+	log_test "vxlan entry offload indication - after re-add to bridge"
+
+	# Remove FDB entry from the VxLAN driver and check that corresponding
+	# entry in the bridge driver is not marked as offloaded
+	RET=0
+
+	bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
+	check_err $?
+
+	log_test "vxlan entry offload indication - after removal from vxlan"
+
+	# Add the FDB entry back to the VxLAN driver and make sure it is
+	# marked as offloaded in both drivers
+	RET=0
+
+	bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst $LOCAL_IP_2
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
+	check_err $?
+
+	log_test "vxlan entry offload indication - after re-add to vxlan"
+
+	bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self master vlan 10
+}
+
+offload_indication_vlan_aware_decap_route_test()
+{
+	RET=0
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	# Toggle PVID flag on one VxLAN device and make sure route is still
+	# marked as offloaded
+	bridge vlan add vid 10 dev vxlan10 untagged
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	# Toggle PVID flag on second VxLAN device and make sure route is no
+	# longer marked as offloaded
+	bridge vlan add vid 20 dev vxlan20 untagged
+
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip $IP_FLAG route show table local $LOCAL_IP_1
+	check_err $?
+
+	# Toggle PVID flag back and make sure route is marked as offloaded
+	bridge vlan add vid 10 dev vxlan10 pvid untagged
+	bridge vlan add vid 20 dev vxlan20 pvid untagged
+
+	busywait "$TIMEOUT" wait_for_offload ip $IP_FLAG route show table local \
+		$LOCAL_IP_1
+	check_err $?
+
+	log_test "vxlan decap route - vni map/unmap"
+}
+
+offload_indication_vlan_aware_join_vxlan_first()
+{
+	ip link add dev br0 type bridge mcast_snooping 0 \
+		vlan_filtering 1 vlan_default_pvid 1
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	__offload_indication_join_vxlan_first 1
+
+	ip link del dev vxlan0
+	ip link del dev br0
+}
+
+offload_indication_vlan_aware_join_vxlan_last()
+{
+	ip link add dev br0 type bridge mcast_snooping 0 \
+		vlan_filtering 1 vlan_default_pvid 1
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	__offload_indication_join_vxlan_last
+
+	ip link del dev vxlan0
+	ip link del dev br0
+}
+
+offload_indication_vlan_aware_l3vni_test()
+{
+	local zmac=00:00:00:00:00:00
+
+	RET=0
+
+	sysctl_set net.ipv6.conf.default.disable_ipv6 1
+	ip link add dev br0 type bridge mcast_snooping 0 \
+		vlan_filtering 1 vlan_default_pvid 0
+	ip link set dev br0 addrgenmode none
+	ip link set dev br0 up
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	ip link set dev $swp1 master br0
+
+	# The test will use the offload indication on the FDB entry to
+	# understand if the tunnel is offloaded or not
+	bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2
+
+	ip link set dev vxlan0 master br0
+	bridge vlan add dev vxlan0 vid 10 pvid untagged
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel not offloaded when should"
+
+	# Configure a VLAN interface and make sure tunnel is offloaded
+	ip link add link br0 name br10 up type vlan id 10
+	sysctl_set net.ipv6.conf.br10.disable_ipv6 0
+	ip -6 address add 2001:db8:1::1/64 dev br10
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel not offloaded when should"
+
+	# Unlink the VXLAN device, make sure tunnel is no longer offloaded,
+	# then add it back to the bridge and make sure it is offloaded
+	ip link set dev vxlan0 nomaster
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel offloaded after unlinked from bridge"
+
+	ip link set dev vxlan0 master br0
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel offloaded despite no matching vid"
+
+	bridge vlan add dev vxlan0 vid 10 pvid untagged
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel not offloaded after adding vid"
+
+	log_test "vxlan - l3 vni"
+
+	ip link del dev vxlan0
+	ip link del dev br0
+	sysctl_restore net.ipv6.conf.default.disable_ipv6
+}
+
+offload_indication_vlan_aware_test()
+{
+	offload_indication_vlan_aware_setup_create
+	offload_indication_vlan_aware_fdb_test
+	offload_indication_vlan_aware_decap_route_test
+	offload_indication_vlan_aware_setup_destroy
+
+	log_info "offload indication - replay & cleanup - vlan aware"
+	offload_indication_vlan_aware_join_vxlan_first
+	offload_indication_vlan_aware_join_vxlan_last
+	offload_indication_vlan_aware_l3vni_test
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
new file mode 100755
index 000000000000..38148f51877a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
@@ -0,0 +1,141 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test vetoing of FDB entries that mlxsw can not offload. This exercises several
+# different veto vectors to test various rollback scenarios in the vxlan driver.
+
+: ${LOCAL_IP:=198.51.100.1}
+export LOCAL_IP
+
+: ${REMOTE_IP_1:=198.51.100.2}
+export REMOTE_IP_1
+
+: ${REMOTE_IP_2:=198.51.100.3}
+export REMOTE_IP_2
+
+: ${UDPCSUM_FLAFS:=noudpcsum}
+export UDPCSUM_FLAFS
+
+: ${MC_IP:=224.0.0.1}
+export MC_IP
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	fdb_create_veto_test
+	fdb_replace_veto_test
+	fdb_append_veto_test
+	fdb_changelink_veto_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link set dev $swp1 up
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 up
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP dstport 4789
+	ip link set dev vxlan0 master br0
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev vxlan0 nomaster
+	ip link del dev vxlan0
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 nomaster
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+fdb_create_veto_test()
+{
+	RET=0
+
+	bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \
+	       dst $REMOTE_IP_1 2>/dev/null
+	check_fail $? "multicast MAC not rejected"
+
+	bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \
+	       dst $REMOTE_IP_1 2>&1 >/dev/null | grep -q mlxsw_spectrum
+	check_err $? "multicast MAC rejected without extack"
+
+	log_test "vxlan FDB veto - create"
+}
+
+fdb_replace_veto_test()
+{
+	RET=0
+
+	bridge fdb add 00:01:02:03:04:05 dev vxlan0 self static \
+	       dst $REMOTE_IP_1
+	check_err $? "valid FDB rejected"
+
+	bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \
+	       dst $REMOTE_IP_1 port 1234 2>/dev/null
+	check_fail $? "FDB with an explicit port not rejected"
+
+	bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \
+	       dst $REMOTE_IP_1 port 1234 2>&1 >/dev/null \
+	    | grep -q mlxsw_spectrum
+	check_err $? "FDB with an explicit port rejected without extack"
+
+	log_test "vxlan FDB veto - replace"
+}
+
+fdb_append_veto_test()
+{
+	RET=0
+
+	bridge fdb add 00:00:00:00:00:00 dev vxlan0 self static \
+	       dst $REMOTE_IP_1
+	check_err $? "valid FDB rejected"
+
+	bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \
+	       dst $REMOTE_IP_2 port 1234 2>/dev/null
+	check_fail $? "FDB with an explicit port not rejected"
+
+	bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \
+	       dst $REMOTE_IP_2 port 1234 2>&1 >/dev/null \
+	    | grep -q mlxsw_spectrum
+	check_err $? "FDB with an explicit port rejected without extack"
+
+	log_test "vxlan FDB veto - append"
+}
+
+fdb_changelink_veto_test()
+{
+	RET=0
+
+	ip link set dev vxlan0 type vxlan \
+	   group $MC_IP dev lo 2>/dev/null
+	check_fail $? "FDB with a multicast IP not rejected"
+
+	ip link set dev vxlan0 type vxlan \
+	   group $MC_IP dev lo 2>&1 >/dev/null \
+	    | grep -q mlxsw_spectrum
+	check_err $? "FDB with a multicast IP rejected without extack"
+
+	log_test "vxlan FDB veto - changelink"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh
new file mode 100755
index 000000000000..66c87aab86f6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN test for IPv6.
+
+LOCAL_IP=2001:db8:1::1
+REMOTE_IP_1=2001:db8:2::1
+REMOTE_IP_2=2001:db8:3::1
+UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx"
+MC_IP=FF02::2
+
+source vxlan_fdb_veto.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
new file mode 100755
index 000000000000..af5ea50ed5c0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
@@ -0,0 +1,326 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test VxLAN flooding. The device stores flood records in a singly linked list
+# where each record stores up to three IPv4 addresses of remote VTEPs. The test
+# verifies that packets are correctly flooded in various cases such as deletion
+# of a record in the middle of the list.
+#
+# +--------------------+
+# | H1 (vrf)           |
+# |    + $h1           |
+# |    | 203.0.113.1/24|
+# +----|---------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# | +--|--------------------------------------------------------------------+ |
+# | |  + $swp1                   BR0 (802.1d)                               | |
+# | |                                                                       | |
+# | |  + vxlan0 (vxlan)                                                     | |
+# | |    local 198.51.100.1                                                 | |
+# | |    remote 198.51.100.{2..13}                                          | |
+# | |    id 10 dstport 4789                                                 | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |  198.51.100.0/24 via 192.0.2.2                                            |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 192.0.2.1/24                                                         |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                               R2 (vrf) |
+# |    + $rp2                                                   |
+# |      192.0.2.2/24                                           |
+# |                                                             |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="flooding_test"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 203.0.113.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 203.0.113.1/24
+}
+
+switch_create()
+{
+	# Make sure the bridge uses the MAC address of the local port and
+	# not that of the VxLAN's device
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link set dev br0 address $(mac_get $swp1)
+
+	ip link add name vxlan0 type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	ip address add 198.51.100.1/32 dev lo
+
+	ip link set dev $swp1 master br0
+	ip link set dev vxlan0 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev vxlan0 up
+}
+
+switch_destroy()
+{
+	ip link set dev vxlan0 down
+	ip link set dev $swp1 down
+	ip link set dev br0 down
+
+	ip link set dev vxlan0 nomaster
+	ip link set dev $swp1 nomaster
+
+	ip address del 198.51.100.1/32 dev lo
+
+	ip link del dev vxlan0
+
+	ip link del dev br0
+}
+
+router1_create()
+{
+	# This router is in the default VRF, where the VxLAN device is
+	# performing the L3 lookup
+	ip link set dev $rp1 up
+	ip address add 192.0.2.1/24 dev $rp1
+	ip route add 198.51.100.0/24 via 192.0.2.2
+}
+
+router1_destroy()
+{
+	ip route del 198.51.100.0/24 via 192.0.2.2
+	ip address del 192.0.2.1/24 dev $rp1
+	ip link set dev $rp1 down
+}
+
+router2_create()
+{
+	# This router is not in the default VRF, so use simple_if_init()
+	simple_if_init $rp2 192.0.2.2/24
+}
+
+router2_destroy()
+{
+	simple_if_fini $rp2 192.0.2.2/24
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+
+	switch_create
+
+	router1_create
+	router2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	switch_destroy
+
+	h1_destroy
+
+	vrf_cleanup
+}
+
+flooding_remotes_add()
+{
+	local num_remotes=$1
+	local lsb
+	local i
+
+	for i in $(eval echo {1..$num_remotes}); do
+		lsb=$((i + 1))
+
+		bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \
+			dst 198.51.100.$lsb
+	done
+}
+
+flooding_filters_add()
+{
+	local num_remotes=$1
+	local lsb
+	local i
+
+	# Prevent unwanted packets from entering the bridge and interfering
+	# with the test.
+	tc qdisc add dev br0 clsact
+	tc filter add dev br0 egress protocol all pref 1 handle 1 \
+		matchall skip_hw action drop
+	tc qdisc add dev $h1 clsact
+	tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+		flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+	tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+		matchall skip_hw action drop
+
+	tc qdisc add dev $rp2 clsact
+
+	for i in $(eval echo {1..$num_remotes}); do
+		lsb=$((i + 1))
+
+		tc filter add dev $rp2 ingress protocol ip pref $i handle $i \
+			flower ip_proto udp dst_ip 198.51.100.$lsb \
+			dst_port 4789 skip_sw action drop
+	done
+}
+
+flooding_filters_del()
+{
+	local num_remotes=$1
+	local i
+
+	for i in $(eval echo {1..$num_remotes}); do
+		tc filter del dev $rp2 ingress protocol ip pref $i \
+			handle $i flower
+	done
+
+	tc qdisc del dev $rp2 clsact
+
+	tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+	tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+	tc qdisc del dev $h1 clsact
+	tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+	tc qdisc del dev br0 clsact
+}
+
+flooding_check_packets()
+{
+	local packets=("$@")
+	local num_remotes=${#packets[@]}
+	local i
+
+	for i in $(eval echo {1..$num_remotes}); do
+		tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]}
+		check_err $? "remote $i - did not get expected number of packets"
+	done
+}
+
+flooding_test()
+{
+	# Use 12 remote VTEPs that will be stored in 4 records. The array
+	# 'packets' will store how many packets are expected to be received
+	# by each remote VTEP at each stage of the test
+	declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1)
+	local num_remotes=12
+
+	RET=0
+
+	# Add FDB entries for remote VTEPs and corresponding tc filters on the
+	# ingress of the nexthop router. These filters will count how many
+	# packets were flooded to each remote VTEP
+	flooding_remotes_add $num_remotes
+	flooding_filters_add $num_remotes
+
+	# Send one packet and make sure it is flooded to all the remote VTEPs
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 1 packet"
+
+	# Delete the third record which corresponds to VTEPs with LSB 8..10
+	# and check that packet is flooded correctly when we remove a record
+	# from the middle of the list
+	RET=0
+
+	packets=(2 2 2 2 2 2 1 1 1 2 2 2)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.8
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.9
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.10
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 2 packets"
+
+	# Delete the first record and make sure the packet is flooded correctly
+	RET=0
+
+	packets=(2 2 2 3 3 3 1 1 1 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.3
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.4
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 3 packets"
+
+	# Delete the last record and make sure the packet is flooded correctly
+	RET=0
+
+	packets=(2 2 2 4 4 4 1 1 1 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.11
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.12
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.13
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 4 packets"
+
+	# Delete the last record, one entry at a time and make sure single
+	# entries are correctly removed
+	RET=0
+
+	packets=(2 2 2 4 5 5 1 1 1 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.5
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 5 packets"
+
+	RET=0
+
+	packets=(2 2 2 4 5 6 1 1 1 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.6
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 6 packets"
+
+	RET=0
+
+	packets=(2 2 2 4 5 6 1 1 1 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.7
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 7 packets"
+
+	flooding_filters_del $num_remotes
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh
new file mode 100755
index 000000000000..f2ea0163ddea
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN test for IPv6.
+
+ADDR_FAMILY=ipv6
+LOCAL_IP_1=2001:db8:1::1
+LOCAL_IP_2=2001:db8:1::2
+PREFIX_LEN=128
+UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx"
+MC_IP=FF02::2
+IP_FLAG="-6"
+
+ALL_TESTS="
+	sanitization_test
+	offload_indication_test
+	sanitization_vlan_aware_test
+	offload_indication_vlan_aware_test
+"
+
+sanitization_single_dev_learning_enabled_ipv6_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \
+		ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with learning enabled"
+}
+
+sanitization_single_dev_udp_checksum_ipv6_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning \
+		noudp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \
+		local $LOCAL_IP_1 dstport 4789
+
+	sanitization_single_dev_test_fail
+	log_test "vxlan device without zero udp checksum at RX"
+
+	ip link del dev vxlan0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning \
+		udp6zerocsumrx noudp6zerocsumtx ttl 20 tos inherit \
+		local $LOCAL_IP_1 dstport 4789
+
+	sanitization_single_dev_test_fail
+	log_test "vxlan device without zero udp checksum at TX"
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+}
+
+source vxlan.sh
diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py
new file mode 100755
index 000000000000..d05eddcad539
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, NetDrvEpEnv
+from lib.py import bkg, cmd, rand_port, NetNSEnter
+
+def test_napi_id(cfg) -> None:
+    port = rand_port()
+    listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr} {port}"
+
+    with bkg(listen_cmd, ksft_wait=3) as server:
+        cmd(f"echo a | socat - TCP:{cfg.baddr}:{port}", host=cfg.remote, shell=True)
+
+    ksft_eq(0, server.ret)
+
+def main() -> None:
+    with NetDrvEpEnv(__file__) as cfg:
+        ksft_run([test_napi_id], args=(cfg,))
+    ksft_exit()
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c
new file mode 100644
index 000000000000..7f49ca6c8637
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id_helper.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <netdb.h>
+
+#include "../../net/lib/ksft.h"
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_storage address;
+	struct addrinfo *result;
+	struct addrinfo hints;
+	unsigned int napi_id;
+	socklen_t addr_len;
+	socklen_t optlen;
+	char buf[1024];
+	int opt = 1;
+	int family;
+	int server;
+	int client;
+	int ret;
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = AF_UNSPEC;
+	hints.ai_socktype = SOCK_STREAM;
+	hints.ai_flags = AI_PASSIVE;
+
+	ret = getaddrinfo(argv[1], argv[2], &hints, &result);
+	if (ret != 0) {
+		fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(ret));
+		return 1;
+	}
+
+	family = result->ai_family;
+	addr_len = result->ai_addrlen;
+
+	server = socket(family, SOCK_STREAM, IPPROTO_TCP);
+	if (server < 0) {
+		perror("socket creation failed");
+		freeaddrinfo(result);
+		if (errno == EAFNOSUPPORT)
+			return -1;
+		return 1;
+	}
+
+	if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) {
+		perror("setsockopt");
+		freeaddrinfo(result);
+		return 1;
+	}
+
+	memcpy(&address, result->ai_addr, result->ai_addrlen);
+	freeaddrinfo(result);
+
+	if (bind(server, (struct sockaddr *)&address, addr_len) < 0) {
+		perror("bind failed");
+		return 1;
+	}
+
+	if (listen(server, 1) < 0) {
+		perror("listen");
+		return 1;
+	}
+
+	ksft_ready();
+
+	client = accept(server, NULL, 0);
+	if (client < 0) {
+		perror("accept");
+		return 1;
+	}
+
+	optlen = sizeof(napi_id);
+	ret = getsockopt(client, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id,
+			 &optlen);
+	if (ret != 0) {
+		perror("getsockopt");
+		return 1;
+	}
+
+	read(client, buf, 1024);
+
+	ksft_wait();
+
+	if (napi_id == 0) {
+		fprintf(stderr, "napi ID is 0\n");
+		return 1;
+	}
+
+	close(client);
+	close(server);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py
new file mode 100755
index 000000000000..f4be72b2145a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_threaded.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Test napi threaded states.
+"""
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, ksft_ne, ksft_ge
+from lib.py import NetDrvEnv, NetdevFamily
+from lib.py import cmd, defer, ethtool
+
+
+def _assert_napi_threaded_enabled(nl, napi_id) -> None:
+    napi = nl.napi_get({'id': napi_id})
+    ksft_eq(napi['threaded'], 'enabled')
+    ksft_ne(napi.get('pid'), None)
+
+
+def _assert_napi_threaded_disabled(nl, napi_id) -> None:
+    napi = nl.napi_get({'id': napi_id})
+    ksft_eq(napi['threaded'], 'disabled')
+    ksft_eq(napi.get('pid'), None)
+
+
+def _set_threaded_state(cfg, threaded) -> None:
+    with open(f"/sys/class/net/{cfg.ifname}/threaded", "wb") as fp:
+        fp.write(str(threaded).encode('utf-8'))
+
+
+def _setup_deferred_cleanup(cfg) -> None:
+    combined = ethtool(f"-l {cfg.ifname}", json=True)[0].get("combined", 0)
+    ksft_ge(combined, 2)
+    defer(ethtool, f"-L {cfg.ifname} combined {combined}")
+
+    threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout
+    defer(_set_threaded_state, cfg, threaded)
+
+    return combined
+
+
+def napi_init(cfg, nl) -> None:
+    """
+    Test that threaded state (in the persistent NAPI config) gets updated
+    even when NAPI with given ID is not allocated at the time.
+    """
+
+    qcnt = _setup_deferred_cleanup(cfg)
+
+    _set_threaded_state(cfg, 1)
+    cmd(f"ethtool -L {cfg.ifname} combined 1")
+    _set_threaded_state(cfg, 0)
+    cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+    napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+    for napi in napis:
+        ksft_eq(napi['threaded'], 'disabled')
+        ksft_eq(napi.get('pid'), None)
+
+    cmd(f"ethtool -L {cfg.ifname} combined 1")
+    _set_threaded_state(cfg, 1)
+    cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+    napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+    for napi in napis:
+        ksft_eq(napi['threaded'], 'enabled')
+        ksft_ne(napi.get('pid'), None)
+
+
+def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None:
+    """
+    Test that when napi threaded is enabled at device level and
+    then disabled at napi level for one napi, the threaded state
+    of all napis is preserved after a change in number of queues.
+    """
+
+    napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_ge(len(napis), 2)
+
+    napi0_id = napis[0]['id']
+    napi1_id = napis[1]['id']
+
+    qcnt = _setup_deferred_cleanup(cfg)
+
+    # set threaded
+    _set_threaded_state(cfg, 1)
+
+    # check napi threaded is set for both napis
+    _assert_napi_threaded_enabled(nl, napi0_id)
+    _assert_napi_threaded_enabled(nl, napi1_id)
+
+    # disable threaded for napi1
+    nl.napi_set({'id': napi1_id, 'threaded': 'disabled'})
+
+    cmd(f"ethtool -L {cfg.ifname} combined 1")
+    cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+    _assert_napi_threaded_enabled(nl, napi0_id)
+    _assert_napi_threaded_disabled(nl, napi1_id)
+
+
+def change_num_queues(cfg, nl) -> None:
+    """
+    Test that when napi threaded is enabled at device level,
+    the napi threaded state is preserved after a change in
+    number of queues.
+    """
+
+    napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_ge(len(napis), 2)
+
+    napi0_id = napis[0]['id']
+    napi1_id = napis[1]['id']
+
+    qcnt = _setup_deferred_cleanup(cfg)
+
+    # set threaded
+    _set_threaded_state(cfg, 1)
+
+    # check napi threaded is set for both napis
+    _assert_napi_threaded_enabled(nl, napi0_id)
+    _assert_napi_threaded_enabled(nl, napi1_id)
+
+    cmd(f"ethtool -L {cfg.ifname} combined 1")
+    cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
+
+    # check napi threaded is set for both napis
+    _assert_napi_threaded_enabled(nl, napi0_id)
+    _assert_napi_threaded_enabled(nl, napi1_id)
+
+
+def main() -> None:
+    """ Ksft boiler plate main """
+
+    with NetDrvEnv(__file__, queue_count=2) as cfg:
+        ksft_run([napi_init,
+                  change_num_queues,
+                  enable_dev_threaded_disable_napi_threaded],
+                 args=(cfg, NetdevFamily()))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
new file mode 100755
index 000000000000..2022f3061738
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test creates two netdevsim virtual interfaces, assigns one of them (the
+# "destination interface") to a new namespace, and assigns IP addresses to both
+# interfaces.
+#
+# It listens on the destination interface using socat and configures a dynamic
+# target on netconsole, pointing to the destination IP address.
+#
+# Finally, it checks whether the message was received properly on the
+# destination interface.  Note that this test may pollute the kernel log buffer
+# (dmesg) and relies on dynamic configuration and namespaces being configured.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+
+# Run the test twice, with different format modes
+for FORMAT in "basic" "extended"
+do
+	for IP_VERSION in "ipv6" "ipv4"
+	do
+		echo "Running with target mode: ${FORMAT} (${IP_VERSION})"
+		# Set current loglevel to KERN_INFO(6), and default to
+		# KERN_NOTICE(5)
+		echo "6 5" > /proc/sys/kernel/printk
+		# Create one namespace and two interfaces
+		set_network "${IP_VERSION}"
+		# Create a dynamic target for netconsole
+		create_dynamic_target "${FORMAT}"
+		# Only set userdata for extended format
+		if [ "$FORMAT" == "extended" ]
+		then
+			# Set userdata "key" with the "value" value
+			set_user_data
+		fi
+		# Listed for netconsole port inside the namespace and
+		# destination interface
+		listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" &
+		# Wait for socat to start and listen to the port.
+		wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}"
+		# Send the message
+		echo "${MSG}: ${TARGET}" > /dev/kmsg
+		# Wait until socat saves the file to disk
+		busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+
+		# Make sure the message was received in the dst part
+		# and exit
+		validate_result "${OUTPUT_FILE}" "${FORMAT}"
+		# kill socat in case it is still running
+		pkill_socat
+		cleanup
+		echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2
+	done
+done
+
+trap - EXIT
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
new file mode 100755
index 000000000000..d1d23dc67f99
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This is a selftest to test cmdline arguments on netconsole.
+# It exercises loading of netconsole from cmdline instead of the dynamic
+# reconfiguration. This includes parsing the long netconsole= line and all the
+# flow through init_netconsole().
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+check_netconsole_module
+
+modprobe netdevsim 2> /dev/null || true
+rmmod netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+# check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace and network interfaces
+trap do_cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+
+# Run the test twice, with different cmdline parameters
+for BINDMODE in "ifname" "mac"
+do
+	echo "Running with bind mode: ${BINDMODE}" >&2
+	# Create the command line for netconsole, with the configuration from
+	# the function above
+	CMDLINE=$(create_cmdline_str "${BINDMODE}")
+
+	# The content of kmsg will be save to the following file
+	OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}"
+
+	# Load the module, with the cmdline set
+	modprobe netconsole "${CMDLINE}"
+
+	# Listed for netconsole port inside the namespace and destination
+	# interface
+	listen_port_and_save_to "${OUTPUT_FILE}" &
+	# Wait for socat to start and listen to the port.
+	wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+	# Send the message
+	echo "${MSG}: ${TARGET}" > /dev/kmsg
+	# Wait until socat saves the file to disk
+	busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+	# Make sure the message was received in the dst part
+	# and exit
+	validate_msg "${OUTPUT_FILE}"
+
+	# kill socat in case it is still running
+	pkill_socat
+	# Unload the module
+	rmmod netconsole
+	echo "${BINDMODE} : Test passed" >&2
+done
+
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
new file mode 100755
index 000000000000..4a71e01a230c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test netconsole's message fragmentation functionality.
+#
+# When a message exceeds the maximum packet size, netconsole splits it into
+# multiple fragments for transmission. This test verifies:
+#  - Correct fragmentation of large messages
+#  - Proper reassembly of fragments at the receiver
+#  - Preservation of userdata across fragments
+#  - Behavior with and without kernel release version appending
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# set userdata to a long value. In this case, it is "1-2-3-4...50-"
+USERDATA_VALUE=$(printf -- '%.2s-' {1..60})
+
+# Convert the header string in a regexp, so, we can remove
+# the second header as well.
+# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If
+# release is appended, you might find something like:L
+# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;"
+function header_to_regex() {
+	# header is everything before ;
+	local HEADER="${1}"
+	REGEX=$(echo "${HEADER}" | cut -d'=' -f1)
+	echo "${REGEX}=[0-9]*\/[0-9]*;"
+}
+
+# We have two headers in the message. Remove both to get the full message,
+# and extract the full message.
+function extract_msg() {
+	local MSGFILE="${1}"
+	# Extract the header, which is the very first thing that arrives in the
+	# first list.
+	HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1)
+	HEADER_REGEX=$(header_to_regex "${HEADER}")
+
+	# Remove the two headers from the received message
+	# This will return the message without any header, similarly to what
+	# was sent.
+	sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}"
+}
+
+# Validate the message, which has two messages glued together.
+# unwrap them to make sure all the characters were transmitted.
+# File will look like the following:
+#  13,468,514729715,-,ncfrag=0/1135;<message>
+#   key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key>
+function validate_fragmented_result() {
+	# Discard the netconsole headers, and assemble the full message
+	RCVMSG=$(extract_msg "${1}")
+
+	# check for the main message
+	if ! echo "${RCVMSG}" | grep -q "${MSG}"; then
+		echo "Message body doesn't match." >&2
+		echo "msg received=" "${RCVMSG}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	# check userdata
+	if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then
+		echo "message userdata doesn't match" >&2
+		echo "msg received=" "${RCVMSG}" >&2
+		exit "${ksft_fail}"
+	fi
+	# test passed. hooray
+}
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# Set userdata "key" with the "value" value
+set_user_data
+
+
+# TEST 1: Send message and userdata. They will fragment
+# =======
+MSG=$(printf -- 'MSG%.3s=' {1..150})
+
+# Listen for netconsole port inside the namespace and destination interface
+listen_port_and_save_to "${OUTPUT_FILE}" &
+# Wait for socat to start and listen to the port.
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+# Send the message
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+# Wait until socat saves the file to disk
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+# Check if the message was not corrupted
+validate_fragmented_result "${OUTPUT_FILE}"
+
+# TEST 2: Test with smaller message, and without release appended
+# =======
+MSG=$(printf -- 'FOOBAR%.3s=' {1..100})
+# Let's disable release and test again.
+disable_release_append
+
+listen_port_and_save_to "${OUTPUT_FILE}" &
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+validate_fragmented_result "${OUTPUT_FILE}"
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh
new file mode 100755
index 000000000000..06089643b771
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test verifies that users can successfully create up to
+# MAX_USERDATA_ITEMS userdata entries without encountering any failures.
+#
+# Additionally, it tests for expected failure when attempting to exceed this
+# maximum limit.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+# This is coming from netconsole code. Check for it in drivers/net/netconsole.c
+MAX_USERDATA_ITEMS=256
+
+# Function to create userdata entries
+function create_userdata_max_entries() {
+	# All these keys should be created without any error
+	for i in $(seq $MAX_USERDATA_ITEMS)
+	do
+		# USERDATA_KEY is used by set_user_data
+		USERDATA_KEY="key"${i}
+		set_user_data
+	done
+}
+
+# Function to verify the entry limit
+function verify_entry_limit() {
+	# Allowing the test to fail without exiting, since the next command
+	# will fail
+	set +e
+	mkdir "${NETCONS_PATH}/userdata/key_that_will_fail" 2> /dev/null
+	ret="$?"
+	set -e
+	if [ "$ret" -eq 0 ];
+	then
+		echo "Adding more than ${MAX_USERDATA_ITEMS} entries in userdata should fail, but it didn't" >&2
+		ls "${NETCONS_PATH}/userdata/" >&2
+		exit "${ksft_fail}"
+	fi
+}
+
+# ========== #
+# Start here #
+# ========== #
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# populate the maximum number of supported keys in userdata
+create_userdata_max_entries
+# Verify an additional entry is not allowed
+verify_entry_limit
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
new file mode 100755
index 000000000000..baf69031089e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
@@ -0,0 +1,272 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A test that makes sure that sysdata runtime CPU data is properly set
+# when a message is sent.
+#
+# There are 3 different tests, every time sent using a random CPU.
+#  - Test #1
+#    * Only enable cpu_nr sysdata feature.
+#  - Test #2
+#    * Keep cpu_nr sysdata feature enable and enable userdata.
+#  - Test #3
+#    * keep userdata enabled, and disable sysdata cpu_nr feature.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+# Enable the sysdata cpu_nr feature
+function set_cpu_nr() {
+	if [[ ! -f "${NETCONS_PATH}/userdata/cpu_nr_enabled" ]]
+	then
+		echo "Populate CPU configfs path not available in ${NETCONS_PATH}/userdata/cpu_nr_enabled" >&2
+		exit "${ksft_skip}"
+	fi
+
+	echo 1 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
+}
+
+# Enable the taskname to be appended to sysdata
+function set_taskname() {
+	if [[ ! -f "${NETCONS_PATH}/userdata/taskname_enabled" ]]
+	then
+		echo "Not able to enable taskname sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/taskname_enabled" >&2
+		exit "${ksft_skip}"
+	fi
+
+	echo 1 > "${NETCONS_PATH}/userdata/taskname_enabled"
+}
+
+# Enable the release to be appended to sysdata
+function set_release() {
+	if [[ ! -f "${NETCONS_PATH}/userdata/release_enabled" ]]
+	then
+		echo "Not able to enable release sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/release_enabled" >&2
+		exit "${ksft_skip}"
+	fi
+
+	echo 1 > "${NETCONS_PATH}/userdata/release_enabled"
+}
+
+# Enable the msgid to be appended to sysdata
+function set_msgid() {
+	if [[ ! -f "${NETCONS_PATH}/userdata/msgid_enabled" ]]
+	then
+		echo "Not able to enable msgid sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/msgid_enabled" >&2
+		exit "${ksft_skip}"
+	fi
+
+	echo 1 > "${NETCONS_PATH}/userdata/msgid_enabled"
+}
+
+# Disable the sysdata cpu_nr feature
+function unset_cpu_nr() {
+	echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
+}
+
+# Once called, taskname=<..> will not be appended anymore
+function unset_taskname() {
+	echo 0 > "${NETCONS_PATH}/userdata/taskname_enabled"
+}
+
+function unset_release() {
+	echo 0 > "${NETCONS_PATH}/userdata/release_enabled"
+}
+
+function unset_msgid() {
+	echo 0 > "${NETCONS_PATH}/userdata/msgid_enabled"
+}
+
+# Test if MSG contains sysdata
+function validate_sysdata() {
+	# OUTPUT_FILE will contain something like:
+	# 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+	#  userdatakey=userdatavalue
+	#  cpu=X
+	#  taskname=<taskname>
+	#  msgid=<id>
+
+	# Echo is what this test uses to create the message. See runtest()
+	# function
+	SENDER="echo"
+
+	if [ ! -f "$OUTPUT_FILE" ]; then
+		echo "FAIL: File was not generated." >&2
+		exit "${ksft_fail}"
+	fi
+
+	if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then
+		echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2
+		cat "${OUTPUT_FILE}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	# Check if cpu=XX exists in the file and matches the one used
+	# in taskset(1)
+	if ! grep -q "cpu=${CPU}\+" "${OUTPUT_FILE}"; then
+		echo "FAIL: 'cpu=${CPU}' not found in ${OUTPUT_FILE}" >&2
+		cat "${OUTPUT_FILE}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	if ! grep -q "taskname=${SENDER}" "${OUTPUT_FILE}"; then
+		echo "FAIL: 'taskname=echo' not found in ${OUTPUT_FILE}" >&2
+		cat "${OUTPUT_FILE}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	if ! grep -q "msgid=[0-9]\+$" "${OUTPUT_FILE}"; then
+		echo "FAIL: 'msgid=<id>' not found in ${OUTPUT_FILE}" >&2
+		cat "${OUTPUT_FILE}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	rm "${OUTPUT_FILE}"
+	pkill_socat
+}
+
+function validate_release() {
+	RELEASE=$(uname -r)
+
+	if [ ! -f "$OUTPUT_FILE" ]; then
+		echo "FAIL: File was not generated." >&2
+		exit "${ksft_fail}"
+	fi
+
+	if ! grep -q "release=${RELEASE}" "${OUTPUT_FILE}"; then
+		echo "FAIL: 'release=${RELEASE}' not found in ${OUTPUT_FILE}" >&2
+		cat "${OUTPUT_FILE}" >&2
+		exit "${ksft_fail}"
+	fi
+}
+
+# Test if MSG content exists in OUTPUT_FILE but no `cpu=` and `taskname=`
+# strings
+function validate_no_sysdata() {
+	if [ ! -f "$OUTPUT_FILE" ]; then
+		echo "FAIL: File was not generated." >&2
+		exit "${ksft_fail}"
+	fi
+
+	if ! grep -q "${MSG}" "${OUTPUT_FILE}"; then
+		echo "FAIL: ${MSG} not found in ${OUTPUT_FILE}" >&2
+		cat "${OUTPUT_FILE}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	if grep -q "cpu=" "${OUTPUT_FILE}"; then
+		echo "FAIL: 'cpu=  found in ${OUTPUT_FILE}" >&2
+		cat "${OUTPUT_FILE}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	if grep -q "taskname=" "${OUTPUT_FILE}"; then
+		echo "FAIL: 'taskname=  found in ${OUTPUT_FILE}" >&2
+		cat "${OUTPUT_FILE}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	if grep -q "release=" "${OUTPUT_FILE}"; then
+		echo "FAIL: 'release=  found in ${OUTPUT_FILE}" >&2
+		cat "${OUTPUT_FILE}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	if grep -q "msgid=" "${OUTPUT_FILE}"; then
+		echo "FAIL: 'msgid=  found in ${OUTPUT_FILE}" >&2
+		cat "${OUTPUT_FILE}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	rm "${OUTPUT_FILE}"
+}
+
+# Start socat, send the message and wait for the file to show up in the file
+# system
+function runtest {
+	# Listen for netconsole port inside the namespace and destination
+	# interface
+	listen_port_and_save_to "${OUTPUT_FILE}" &
+	# Wait for socat to start and listen to the port.
+	wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+	# Send the message
+	taskset -c "${CPU}" echo "${MSG}: ${TARGET}" > /dev/kmsg
+	# Wait until socat saves the file to disk
+	busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+}
+
+# ========== #
+# Start here #
+# ========== #
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# This test also depends on taskset(1). Check for it before starting the test
+check_for_taskset
+
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+
+#====================================================
+# TEST #1
+# Send message from a random CPU
+#====================================================
+# Random CPU in the system
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_1"
+MSG="Test #1 from CPU${CPU}"
+# Enable the auto population of cpu_nr
+set_cpu_nr
+# Enable taskname to be appended to sysdata
+set_taskname
+set_release
+set_msgid
+runtest
+# Make sure the message was received in the dst part
+# and exit
+validate_release
+validate_sysdata
+
+#====================================================
+# TEST #2
+# This test now adds userdata together with sysdata
+# ===================================================
+# Get a new random CPU
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_2"
+MSG="Test #2 from CPU${CPU}"
+set_user_data
+runtest
+validate_release
+validate_sysdata
+
+# ===================================================
+# TEST #3
+# Unset all sysdata, fail if any userdata is set
+# ===================================================
+CPU=$((RANDOM % $(nproc)))
+OUTPUT_FILE="/tmp/${TARGET}_3"
+MSG="Test #3 from CPU${CPU}"
+unset_cpu_nr
+unset_taskname
+unset_release
+unset_msgid
+runtest
+# At this time, cpu= shouldn't be present in the msg
+validate_no_sysdata
+
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netcons_torture.sh
new file mode 100755
index 000000000000..2ce9ee3719d1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_torture.sh
@@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Repeatedly send kernel messages, toggles netconsole targets on and off,
+# creates and deletes targets in parallel, and toggles the source interface to
+# simulate stress conditions.
+#
+# This test aims to verify the robustness of netconsole under dynamic
+# configurations and concurrent operations.
+#
+# The major goal is to run this test with LOCKDEP, Kmemleak and KASAN to make
+# sure no issues is reported.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+# Number of times the main loop run
+ITERATIONS=${1:-150}
+
+# Only test extended format
+FORMAT="extended"
+# And ipv6 only
+IP_VERSION="ipv6"
+
+# Create, enable and delete some targets.
+create_and_delete_random_target() {
+	COUNT=2
+	RND_PREFIX=$(mktemp -u netcons_rnd_XXXX_)
+
+	if [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}${COUNT}"  ] || \
+	   [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}0" ]; then
+		echo "Function didn't finish yet, skipping it." >&2
+		return
+	fi
+
+	# enable COUNT targets
+	for i in $(seq ${COUNT})
+	do
+		RND_TARGET="${RND_PREFIX}"${i}
+		RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}"
+
+		# Basic population so the target can come up
+		_create_dynamic_target "${FORMAT}" "${RND_TARGET_PATH}"
+	done
+
+	echo "netconsole selftest: ${COUNT} additional targets were created" > /dev/kmsg
+	# disable them all
+	for i in $(seq ${COUNT})
+	do
+		RND_TARGET="${RND_PREFIX}"${i}
+		RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}"
+		if [[ $(cat "${RND_TARGET_PATH}/enabled") -eq 1 ]]
+		then
+			echo 0 > "${RND_TARGET_PATH}"/enabled
+		fi
+		rmdir "${RND_TARGET_PATH}"
+	done
+}
+
+# Disable and enable the target mid-air, while messages
+# are being transmitted.
+toggle_netcons_target() {
+	for i in $(seq 2)
+	do
+		if [ ! -d "${NETCONS_PATH}" ]
+		then
+			break
+		fi
+		echo 0 > "${NETCONS_PATH}"/enabled 2> /dev/null || true
+		# Try to enable a bit harder, given it might fail to enable
+		# Write to `enabled` might fail depending on the lock, which is
+		# highly contentious here
+		for _ in $(seq 5)
+		do
+			echo 1 > "${NETCONS_PATH}"/enabled 2> /dev/null || true
+		done
+	done
+}
+
+toggle_iface(){
+	ip link set "${SRCIF}" down
+	ip link set "${SRCIF}" up
+}
+
+# Start here
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network "${IP_VERSION}"
+# Create a dynamic target for netconsole
+create_dynamic_target "${FORMAT}"
+
+for i in $(seq "$ITERATIONS")
+do
+	for _ in $(seq 10)
+	do
+		echo "${MSG}: ${TARGET} ${i}" > /dev/kmsg
+	done
+	wait
+
+	if (( i % 30 == 0 )); then
+		toggle_netcons_target &
+	fi
+
+	if (( i % 50 == 0 )); then
+		# create some targets, enable them, send msg and disable
+		# all in a parallel thread
+		create_and_delete_random_target &
+	fi
+
+	if (( i % 70 == 0 )); then
+		toggle_iface &
+	fi
+done
+wait
+
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile
new file mode 100644
index 000000000000..1a228c5430f5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS := \
+	devlink.sh \
+	devlink_in_netns.sh \
+	devlink_trap.sh \
+	ethtool-coalesce.sh \
+	ethtool-features.sh \
+	ethtool-fec.sh \
+	ethtool-pause.sh \
+	fib.sh \
+	fib_notifications.sh \
+	hw_stats_l3.sh \
+	macsec-offload.sh \
+	nexthop.sh \
+	peer.sh \
+	psample.sh \
+	tc-mq-visibility.sh \
+	udp_tunnel_nic.sh \
+# end of TEST_PROGS
+
+TEST_FILES := \
+	ethtool-common.sh
+# end of TEST_FILES
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/netdevsim/config b/tools/testing/selftests/drivers/net/netdevsim/config
new file mode 100644
index 000000000000..5117c78ddf0a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/config
@@ -0,0 +1,11 @@
+CONFIG_DUMMY=y
+CONFIG_GENEVE=m
+CONFIG_IPV6=y
+CONFIG_MACSEC=m
+CONFIG_NETDEVSIM=m
+CONFIG_NET_SCH_MQPRIO=y
+CONFIG_NET_SCH_MULTIQ=y
+CONFIG_NET_SCH_PRIO=y
+CONFIG_PSAMPLE=y
+CONFIG_PTP_1588_CLOCK_MOCK=y
+CONFIG_VXLAN=m
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
new file mode 100755
index 000000000000..1b529ccaf050
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -0,0 +1,879 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="fw_flash_test params_test  \
+	   params_default_test regions_test reload_test \
+	   netns_reload_test resource_test dev_info_test \
+	   empty_reporter_test dummy_reporter_test rate_test"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+BUS_ADDR=10
+PORT_COUNT=4
+VF_COUNT=4
+DEV_NAME=netdevsim$BUS_ADDR
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/
+DL_HANDLE=netdevsim/$DEV_NAME
+
+wait_for_devlink()
+{
+	"$@" | grep -q $DL_HANDLE
+}
+
+devlink_wait()
+{
+	local timeout=$1
+
+	busywait "$timeout" wait_for_devlink devlink dev
+}
+
+fw_flash_test()
+{
+	DUMMYFILE=$(find /lib/firmware -type f -printf '%P\n' | head -1)
+	RET=0
+
+	if [ -z "$DUMMYFILE" ]
+	then
+		echo "SKIP: unable to find suitable dummy firmware file"
+		return
+	fi
+
+	echo "10"> $DEBUGFS_DIR/fw_update_flash_chunk_time_ms
+
+	devlink dev flash $DL_HANDLE file $DUMMYFILE
+	check_err $? "Failed to flash with status updates on"
+
+	devlink dev flash $DL_HANDLE file $DUMMYFILE component fw.mgmt
+	check_err $? "Failed to flash with component attribute"
+
+	devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite settings
+	check_fail $? "Flash with overwrite settings should be rejected"
+
+	echo "1"> $DEBUGFS_DIR/fw_update_overwrite_mask
+	check_err $? "Failed to change allowed overwrite mask"
+
+	devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite settings
+	check_err $? "Failed to flash with settings overwrite enabled"
+
+	devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite identifiers
+	check_fail $? "Flash with overwrite settings should be identifiers"
+
+	echo "3"> $DEBUGFS_DIR/fw_update_overwrite_mask
+	check_err $? "Failed to change allowed overwrite mask"
+
+	devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite identifiers overwrite settings
+	check_err $? "Failed to flash with settings and identifiers overwrite enabled"
+
+	echo "n"> $DEBUGFS_DIR/fw_update_status
+	check_err $? "Failed to disable status updates"
+
+	devlink dev flash $DL_HANDLE file $DUMMYFILE
+	check_err $? "Failed to flash with status updates off"
+
+	log_test "fw flash test"
+}
+
+param_get()
+{
+	local name=$1
+	local attr=${2:-value}
+	local cmode=${3:-driverinit}
+
+	cmd_jq "devlink dev param show $DL_HANDLE name $name -j" \
+	       '.[][][].values[] | select(.cmode == "'"$cmode"'").'"$attr"
+}
+
+param_set()
+{
+	local name=$1
+	local value=$2
+	local cmode=${3:-driverinit}
+
+	devlink dev param set $DL_HANDLE name $name cmode $cmode value $value
+}
+
+param_set_default()
+{
+	local name=$1
+	local cmode=${2:-driverinit}
+
+	devlink dev param set $DL_HANDLE name $name default cmode $cmode
+}
+
+check_value()
+{
+	local name=$1
+	local phase_name=$2
+	local expected_param_value=$3
+	local expected_debugfs_value=$4
+	local cmode=${5:-driverinit}
+	local value
+	local attr="value"
+
+	if [[ "$phase_name" == *"default"* ]]; then
+		attr="default"
+	fi
+
+	value=$(param_get $name $attr $cmode)
+	check_err $? "Failed to get $name param $attr"
+	[ "$value" == "$expected_param_value" ]
+	check_err $? "Unexpected $phase_name $name param $attr"
+	value=$(<$DEBUGFS_DIR/$name)
+	check_err $? "Failed to get $name debugfs value"
+	[ "$value" == "$expected_debugfs_value" ]
+	check_err $? "Unexpected $phase_name $name debugfs value"
+}
+
+params_test()
+{
+	RET=0
+
+	local max_macs
+	local test1
+
+	check_value max_macs initial 32 32
+	check_value test1 initial true Y
+
+	param_set max_macs 16
+	check_err $? "Failed to set max_macs param value"
+	param_set test1 false
+	check_err $? "Failed to set test1 param value"
+
+	check_value max_macs post-set 16 32
+	check_value test1 post-set false Y
+
+	devlink dev reload $DL_HANDLE
+
+	check_value max_macs post-reload 16 16
+	check_value test1 post-reload false N
+
+	log_test "params test"
+}
+
+value_to_debugfs()
+{
+	local value=$1
+
+	case "$value" in
+		true)
+			echo "Y"
+			;;
+		false)
+			echo "N"
+			;;
+		*)
+			echo "$value"
+			;;
+	esac
+}
+
+test_default()
+{
+	local param_name=$1
+	local new_value=$2
+	local expected_default=$3
+	local cmode=${4:-driverinit}
+	local default_debugfs
+	local new_debugfs
+	local expected_debugfs
+
+	default_debugfs=$(value_to_debugfs $expected_default)
+	new_debugfs=$(value_to_debugfs $new_value)
+
+	expected_debugfs=$default_debugfs
+	check_value $param_name initial-default $expected_default $expected_debugfs $cmode
+
+	param_set $param_name $new_value $cmode
+	check_err $? "Failed to set $param_name to $new_value"
+
+	expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$new_debugfs" || echo "$default_debugfs")
+	check_value $param_name post-set $new_value $expected_debugfs $cmode
+
+	devlink dev reload $DL_HANDLE
+	check_err $? "Failed to reload device"
+
+	expected_debugfs=$new_debugfs
+	check_value $param_name post-reload-new-value $new_value $expected_debugfs $cmode
+
+	param_set_default $param_name $cmode
+	check_err $? "Failed to set $param_name to default"
+
+	expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$default_debugfs" || echo "$new_debugfs")
+	check_value $param_name post-set-default $expected_default $expected_debugfs $cmode
+
+	devlink dev reload $DL_HANDLE
+	check_err $? "Failed to reload device"
+
+	expected_debugfs=$default_debugfs
+	check_value $param_name post-reload-default $expected_default $expected_debugfs $cmode
+}
+
+params_default_test()
+{
+	RET=0
+
+	if ! devlink dev param help 2>&1 | grep -q "value VALUE | default"; then
+		echo "SKIP: devlink cli missing default feature"
+		return
+	fi
+
+	# Remove side effects of previous tests. Use plain param_set, because
+	# param_set_default is a feature under test here.
+	param_set max_macs 32 driverinit
+	check_err $? "Failed to reset max_macs to default value"
+	param_set test1 true driverinit
+	check_err $? "Failed to reset test1 to default value"
+	param_set test2 1234 runtime
+	check_err $? "Failed to reset test2 to default value"
+
+	devlink dev reload $DL_HANDLE
+	check_err $? "Failed to reload device for clean state"
+
+	test_default max_macs 16 32 driverinit
+	test_default test1 false true driverinit
+	test_default test2 100 1234 runtime
+
+	log_test "params default test"
+}
+
+check_region_size()
+{
+	local name=$1
+	local size
+
+	size=$(devlink region show $DL_HANDLE/$name -j | jq -e -r '.[][].size')
+	check_err $? "Failed to get $name region size"
+	[ $size -eq 32768 ]
+	check_err $? "Invalid $name region size"
+}
+
+check_region_snapshot_count()
+{
+	local name=$1
+	local phase_name=$2
+	local expected_count=$3
+	local count
+
+	count=$(devlink region show $DL_HANDLE/$name -j | jq -e -r '.[][].snapshot | length')
+	[ $count -eq $expected_count ]
+	check_err $? "Unexpected $phase_name snapshot count"
+}
+
+regions_test()
+{
+	RET=0
+
+	local count
+
+	check_region_size dummy
+	check_region_snapshot_count dummy initial 0
+
+	echo ""> $DEBUGFS_DIR/take_snapshot
+	check_err $? "Failed to take first dummy region snapshot"
+	check_region_snapshot_count dummy post-first-snapshot 1
+
+	echo ""> $DEBUGFS_DIR/take_snapshot
+	check_err $? "Failed to take second dummy region snapshot"
+	check_region_snapshot_count dummy post-second-snapshot 2
+
+	echo ""> $DEBUGFS_DIR/take_snapshot
+	check_err $? "Failed to take third dummy region snapshot"
+	check_region_snapshot_count dummy post-third-snapshot 3
+
+	devlink region del $DL_HANDLE/dummy snapshot 1
+	check_err $? "Failed to delete first dummy region snapshot"
+
+	check_region_snapshot_count dummy post-first-delete 2
+
+	devlink region new $DL_HANDLE/dummy snapshot 25
+	check_err $? "Failed to create a new snapshot with id 25"
+
+	check_region_snapshot_count dummy post-first-request 3
+
+	devlink region dump $DL_HANDLE/dummy snapshot 25 >> /dev/null
+	check_err $? "Failed to dump snapshot with id 25"
+
+	devlink region read $DL_HANDLE/dummy snapshot 25 addr 0 len 1 >> /dev/null
+	check_err $? "Failed to read snapshot with id 25 (1 byte)"
+
+	devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len 128 >> /dev/null
+	check_err $? "Failed to read snapshot with id 25 (128 bytes)"
+
+	devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len $((1<<32)) >> /dev/null
+	check_err $? "Failed to read snapshot with id 25 (oversized)"
+
+	devlink region read $DL_HANDLE/dummy snapshot 25 addr $((1<<32)) len 128 >> /dev/null 2>&1
+	check_fail $? "Bad read of snapshot with id 25 did not fail"
+
+	devlink region del $DL_HANDLE/dummy snapshot 25
+	check_err $? "Failed to delete snapshot with id 25"
+
+	check_region_snapshot_count dummy post-second-delete 2
+
+	sid=$(devlink -j region new $DL_HANDLE/dummy | jq '.[][][][]')
+	check_err $? "Failed to create a new snapshot with id allocated by the kernel"
+
+	check_region_snapshot_count dummy post-first-request 3
+
+	devlink region dump $DL_HANDLE/dummy snapshot $sid >> /dev/null
+	check_err $? "Failed to dump a snapshot with id allocated by the kernel"
+
+	devlink region del $DL_HANDLE/dummy snapshot $sid
+	check_err $? "Failed to delete snapshot with id allocated by the kernel"
+
+	check_region_snapshot_count dummy post-first-request 2
+
+	log_test "regions test"
+}
+
+reload_test()
+{
+	RET=0
+
+	devlink dev reload $DL_HANDLE
+	check_err $? "Failed to reload"
+
+	echo "y"> $DEBUGFS_DIR/fail_reload
+	check_err $? "Failed to setup devlink reload to fail"
+
+	devlink dev reload $DL_HANDLE
+	check_fail $? "Unexpected success of devlink reload"
+
+	echo "n"> $DEBUGFS_DIR/fail_reload
+	check_err $? "Failed to setup devlink reload not to fail"
+
+	devlink dev reload $DL_HANDLE
+	check_err $? "Failed to reload after set not to fail"
+
+	echo "y"> $DEBUGFS_DIR/dont_allow_reload
+	check_err $? "Failed to forbid devlink reload"
+
+	devlink dev reload $DL_HANDLE
+	check_fail $? "Unexpected success of devlink reload"
+
+	echo "n"> $DEBUGFS_DIR/dont_allow_reload
+	check_err $? "Failed to re-enable devlink reload"
+
+	devlink dev reload $DL_HANDLE
+	check_err $? "Failed to reload after re-enable"
+
+	log_test "reload test"
+}
+
+netns_reload_test()
+{
+	RET=0
+
+	ip netns add testns1
+	check_err $? "Failed add netns \"testns1\""
+	ip netns add testns2
+	check_err $? "Failed add netns \"testns2\""
+
+	devlink dev reload $DL_HANDLE netns testns1
+	check_err $? "Failed to reload into netns \"testns1\""
+
+	devlink -N testns1 dev reload $DL_HANDLE netns testns2
+	check_err $? "Failed to reload from netns \"testns1\" into netns \"testns2\""
+
+	ip netns del testns2
+	ip netns del testns1
+
+	# Wait until netns async cleanup is done.
+	devlink_wait 2000
+
+	log_test "netns reload test"
+}
+
+DUMMYDEV="dummytest"
+
+res_val_get()
+{
+	local netns=$1
+	local parentname=$2
+	local name=$3
+	local type=$4
+
+	cmd_jq "devlink -N $netns resource show $DL_HANDLE -j" \
+	       ".[][][] | select(.name == \"$parentname\").resources[] \
+	        | select(.name == \"$name\").$type"
+}
+
+resource_test()
+{
+	RET=0
+
+	ip netns add testns1
+	check_err $? "Failed add netns \"testns1\""
+	ip netns add testns2
+	check_err $? "Failed add netns \"testns2\""
+
+	devlink dev reload $DL_HANDLE netns testns1
+	check_err $? "Failed to reload into netns \"testns1\""
+
+	# Create dummy dev to add the address and routes on.
+
+	ip -n testns1 link add name $DUMMYDEV type dummy
+	check_err $? "Failed create dummy device"
+	ip -n testns1 link set $DUMMYDEV up
+	check_err $? "Failed bring up dummy device"
+	ip -n testns1 a a 192.0.1.1/24 dev $DUMMYDEV
+	check_err $? "Failed add an IP address to dummy device"
+
+	local occ=$(res_val_get testns1 IPv4 fib occ)
+	local limit=$((occ+1))
+
+	# Set fib size limit to handle one another route only.
+
+	devlink -N testns1 resource set $DL_HANDLE path IPv4/fib size $limit
+	check_err $? "Failed to set IPv4/fib resource size"
+	local size_new=$(res_val_get testns1 IPv4 fib size_new)
+	[ "$size_new" -eq "$limit" ]
+	check_err $? "Unexpected \"size_new\" value (got $size_new, expected $limit)"
+
+	devlink -N testns1 dev reload $DL_HANDLE
+	check_err $? "Failed to reload"
+	local size=$(res_val_get testns1 IPv4 fib size)
+	[ "$size" -eq "$limit" ]
+	check_err $? "Unexpected \"size\" value (got $size, expected $limit)"
+
+	# Insert 2 routes, the first is going to be inserted,
+	# the second is expected to fail to be inserted.
+
+	ip -n testns1 r a 192.0.2.0/24 via 192.0.1.2
+	check_err $? "Failed to add route"
+
+	ip -n testns1 r a 192.0.3.0/24 via 192.0.1.2
+	check_fail $? "Unexpected successful route add over limit"
+
+	# Now create another dummy in second network namespace and
+	# insert two routes. That is over the limit of the netdevsim
+	# instance in the first namespace. Move the netdevsim instance
+	# into the second namespace and expect it to fail.
+
+	ip -n testns2 link add name $DUMMYDEV type dummy
+	check_err $? "Failed create dummy device"
+	ip -n testns2 link set $DUMMYDEV up
+	check_err $? "Failed bring up dummy device"
+	ip -n testns2 a a 192.0.1.1/24 dev $DUMMYDEV
+	check_err $? "Failed add an IP address to dummy device"
+	ip -n testns2 r a 192.0.2.0/24 via 192.0.1.2
+	check_err $? "Failed to add route"
+	ip -n testns2 r a 192.0.3.0/24 via 192.0.1.2
+	check_err $? "Failed to add route"
+
+	devlink -N testns1 dev reload $DL_HANDLE netns testns2
+	check_fail $? "Unexpected successful reload from netns \"testns1\" into netns \"testns2\""
+
+	devlink -N testns2 resource set $DL_HANDLE path IPv4/fib size ' -1'
+	check_err $? "Failed to reset IPv4/fib resource size"
+
+	devlink -N testns2 dev reload $DL_HANDLE netns 1
+	check_err $? "Failed to reload devlink back"
+
+	ip netns del testns2
+	ip netns del testns1
+
+	# Wait until netns async cleanup is done.
+	devlink_wait 2000
+
+	log_test "resource test"
+}
+
+info_get()
+{
+	local name=$1
+
+	cmd_jq "devlink dev info $DL_HANDLE -j" ".[][][\"$name\"]" "-e"
+}
+
+dev_info_test()
+{
+	RET=0
+
+	driver=$(info_get "driver")
+	check_err $? "Failed to get driver name"
+	[ "$driver" == "netdevsim" ]
+	check_err $? "Unexpected driver name $driver"
+
+	log_test "dev_info test"
+}
+
+empty_reporter_test()
+{
+	RET=0
+
+	devlink health show $DL_HANDLE reporter empty >/dev/null
+	check_err $? "Failed show empty reporter"
+
+	devlink health dump show $DL_HANDLE reporter empty >/dev/null
+	check_err $? "Failed show dump of empty reporter"
+
+	devlink health diagnose $DL_HANDLE reporter empty >/dev/null
+	check_err $? "Failed diagnose empty reporter"
+
+	devlink health recover $DL_HANDLE reporter empty
+	check_err $? "Failed recover empty reporter"
+
+	log_test "empty reporter test"
+}
+
+check_reporter_info()
+{
+	local name=$1
+	local expected_state=$2
+	local expected_error=$3
+	local expected_recover=$4
+	local expected_grace_period=$5
+	local expected_auto_recover=$6
+
+	local show=$(devlink health show $DL_HANDLE reporter $name -j | jq -e -r ".[][][]")
+	check_err $? "Failed show $name reporter"
+
+	local state=$(echo $show | jq -r ".state")
+	[ "$state" == "$expected_state" ]
+	check_err $? "Unexpected \"state\" value (got $state, expected $expected_state)"
+
+	local error=$(echo $show | jq -r ".error")
+	[ "$error" == "$expected_error" ]
+	check_err $? "Unexpected \"error\" value (got $error, expected $expected_error)"
+
+	local recover=`echo $show | jq -r ".recover"`
+	[ "$recover" == "$expected_recover" ]
+	check_err $? "Unexpected \"recover\" value (got $recover, expected $expected_recover)"
+
+	local grace_period=$(echo $show | jq -r ".grace_period")
+	check_err $? "Failed get $name reporter grace_period"
+	[ "$grace_period" == "$expected_grace_period" ]
+	check_err $? "Unexpected \"grace_period\" value (got $grace_period, expected $expected_grace_period)"
+
+	local auto_recover=$(echo $show | jq -r ".auto_recover")
+	[ "$auto_recover" == "$expected_auto_recover" ]
+	check_err $? "Unexpected \"auto_recover\" value (got $auto_recover, expected $expected_auto_recover)"
+}
+
+dummy_reporter_test()
+{
+	RET=0
+
+	check_reporter_info dummy healthy 0 0 0 true
+
+	devlink health set $DL_HANDLE reporter dummy auto_recover false
+	check_err $? "Failed to dummy reporter auto_recover option"
+
+	check_reporter_info dummy healthy 0 0 0 false
+
+	local BREAK_MSG="foo bar"
+	echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health
+	check_err $? "Failed to break dummy reporter"
+
+	check_reporter_info dummy error 1 0 0 false
+
+	local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j)
+	check_err $? "Failed show dump of dummy reporter"
+
+	local dump_break_msg=$(echo $dump | jq -r ".break_message")
+	[ "$dump_break_msg" == "$BREAK_MSG" ]
+	check_err $? "Unexpected dump break message value (got $dump_break_msg, expected $BREAK_MSG)"
+
+	devlink health dump clear $DL_HANDLE reporter dummy
+	check_err $? "Failed clear dump of dummy reporter"
+
+	devlink health recover $DL_HANDLE reporter dummy
+	check_err $? "Failed recover dummy reporter"
+
+	check_reporter_info dummy healthy 1 1 0 false
+
+	devlink health set $DL_HANDLE reporter dummy auto_recover true
+	check_err $? "Failed to dummy reporter auto_recover option"
+
+	check_reporter_info dummy healthy 1 1 0 true
+
+	echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health
+	check_err $? "Failed to break dummy reporter"
+
+	check_reporter_info dummy healthy 2 2 0 true
+
+	local diagnose=$(devlink health diagnose $DL_HANDLE reporter dummy -j -p)
+	check_err $? "Failed show diagnose of dummy reporter"
+
+	local rcvrd_break_msg=$(echo $diagnose | jq -r ".recovered_break_message")
+	[ "$rcvrd_break_msg" == "$BREAK_MSG" ]
+	check_err $? "Unexpected recovered break message value (got $rcvrd_break_msg, expected $BREAK_MSG)"
+
+	devlink health set $DL_HANDLE reporter dummy grace_period 10
+	check_err $? "Failed to dummy reporter grace_period option"
+
+	check_reporter_info dummy healthy 2 2 10 true
+
+	echo "Y"> $DEBUGFS_DIR/health/fail_recover
+	check_err $? "Failed set dummy reporter recovery to fail"
+
+	echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health
+	check_fail $? "Unexpected success of dummy reporter break"
+
+	check_reporter_info dummy error 3 2 10 true
+
+	devlink health recover $DL_HANDLE reporter dummy
+	check_fail $? "Unexpected success of dummy reporter recover"
+
+	echo "N"> $DEBUGFS_DIR/health/fail_recover
+	check_err $? "Failed set dummy reporter recovery to be successful"
+
+	devlink health recover $DL_HANDLE reporter dummy
+	check_err $? "Failed recover dummy reporter"
+
+	check_reporter_info dummy healthy 3 3 10 true
+
+	echo 8192 > $DEBUGFS_DIR/health/binary_len
+	check_err $? "Failed set dummy reporter binary len to 8192"
+
+	local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j)
+	check_err $? "Failed show dump of dummy reporter"
+
+	devlink health dump clear $DL_HANDLE reporter dummy
+	check_err $? "Failed clear dump of dummy reporter"
+
+	log_test "dummy reporter test"
+}
+
+rate_leafs_get()
+{
+	local handle=$1
+
+	cmd_jq "devlink port function rate show -j" \
+	       '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))'
+}
+
+rate_nodes_get()
+{
+	local handle=$1
+
+	cmd_jq "devlink port function rate show -j" \
+		'.[] | to_entries | .[] | select(.value.type == "node") | .key | select(contains("'$handle'"))'
+}
+
+rate_attr_set()
+{
+	local handle=$1
+	local name=$2
+	local value=$3
+	local units=$4
+
+	devlink port function rate set $handle $name $value$units
+}
+
+rate_attr_get()
+{
+	local handle=$1
+	local name=$2
+
+	cmd_jq "devlink port function rate show $handle -j" '.[][].'$name
+}
+
+rate_attr_tx_rate_check()
+{
+	local handle=$1
+	local name=$2
+	local rate=$3
+	local debug_file=$4
+
+	rate_attr_set $handle $name $rate mbit
+	check_err $? "Failed to set $name value"
+
+	local debug_value=$(cat $debug_file)
+	check_err $? "Failed to read $name value from debugfs"
+	[ "$debug_value" == "$rate" ]
+	check_err $? "Unexpected $name debug value $debug_value != $rate"
+
+	local api_value=$(( $(rate_attr_get $handle $name) * 8 / 1000000 ))
+	check_err $? "Failed to get $name attr value"
+	[ "$api_value" == "$rate" ]
+	check_err $? "Unexpected $name attr value $api_value != $rate"
+}
+
+rate_attr_parent_check()
+{
+	local handle=$1
+	local parent=$2
+	local debug_file=$3
+
+	rate_attr_set $handle parent $parent
+	check_err $? "Failed to set parent"
+
+	debug_value=$(cat $debug_file)
+	check_err $? "Failed to get parent debugfs value"
+	[ "$debug_value" == "$parent" ]
+	check_err $? "Unexpected parent debug value $debug_value != $parent"
+
+	api_value=$(rate_attr_get $r_obj parent)
+	check_err $? "Failed to get parent attr value"
+	[ "$api_value" == "$parent" ]
+	check_err $? "Unexpected parent attr value $api_value != $parent"
+}
+
+rate_attr_tc_bw_check()
+{
+	local handle=$1
+	local tc_bw=$2
+	local debug_file=$3
+
+	local tc_bw_str=""
+	for bw in $tc_bw; do
+		local tc=${bw%%:*}
+		local value=${bw##*:}
+		tc_bw_str="$tc_bw_str $tc:$value"
+	done
+	tc_bw_str=${tc_bw_str# }
+
+	rate_attr_set "$handle" tc-bw "$tc_bw_str"
+	check_err $? "Failed to set tc-bw values"
+
+	for bw in $tc_bw; do
+		local tc=${bw%%:*}
+		local value=${bw##*:}
+		local debug_value
+		debug_value=$(cat "$debug_file"/tc"${tc}"_bw)
+		check_err $? "Failed to read tc-bw value from debugfs for tc$tc"
+		[ "$debug_value" == "$value" ]
+		check_err $? "Unexpected tc-bw debug value for tc$tc: $debug_value != $value"
+	done
+
+	for bw in $tc_bw; do
+		local tc=${bw%%:*}
+		local expected_value=${bw##*:}
+		local api_value
+		api_value=$(rate_attr_get "$handle" tc_"$tc")
+		if [ "$api_value" = "null" ]; then
+			api_value=0
+		fi
+		[ "$api_value" == "$expected_value" ]
+		check_err $? "Unexpected tc-bw value for tc$tc: $api_value != $expected_value"
+	done
+}
+
+rate_node_add()
+{
+	local handle=$1
+
+	devlink port function rate add $handle
+}
+
+rate_node_del()
+{
+	local handle=$1
+
+	devlink port function rate del $handle
+}
+
+rate_test()
+{
+	RET=0
+
+	echo $VF_COUNT > /sys/bus/netdevsim/devices/$DEV_NAME/sriov_numvfs
+	devlink dev eswitch set $DL_HANDLE mode switchdev
+	local leafs=`rate_leafs_get $DL_HANDLE`
+	local num_leafs=`echo $leafs | wc -w`
+	[ "$num_leafs" == "$VF_COUNT" ]
+	check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs"
+
+	rate=10
+	for r_obj in $leafs
+	do
+		rate_attr_tx_rate_check $r_obj tx_share $rate \
+			$DEBUGFS_DIR/ports/${r_obj##*/}/tx_share
+		rate=$(($rate+10))
+	done
+
+	rate=100
+	for r_obj in $leafs
+	do
+		rate_attr_tx_rate_check $r_obj tx_max $rate \
+			$DEBUGFS_DIR/ports/${r_obj##*/}/tx_max
+		rate=$(($rate+100))
+	done
+
+	local tc_bw="0:0 1:40 2:0 3:0 4:0 5:0 6:60 7:0"
+	for r_obj in $leafs
+	do
+		rate_attr_tc_bw_check "$r_obj" "$tc_bw" \
+			"$DEBUGFS_DIR"/ports/"${r_obj##*/}"
+	done
+
+	local node1_name='group1'
+	local node1="$DL_HANDLE/$node1_name"
+	rate_node_add "$node1"
+	check_err $? "Failed to add node $node1"
+
+	local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+	[ $num_nodes == 1 ]
+	check_err $? "Expected 1 rate node in output but got $num_nodes"
+
+	local node_tx_share=10
+	rate_attr_tx_rate_check $node1 tx_share $node_tx_share \
+		$DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_share
+
+	local node_tx_max=100
+	rate_attr_tx_rate_check $node1 tx_max $node_tx_max \
+		$DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max
+
+
+	local tc_bw="0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0"
+	rate_attr_tc_bw_check $node1 "$tc_bw" \
+		"$DEBUGFS_DIR"/rate_nodes/"${node1##*/}"
+
+
+	rate_node_del "$node1"
+	check_err $? "Failed to delete node $node1"
+	local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+	[ $num_nodes == 0 ]
+	check_err $? "Expected 0 rate node but got $num_nodes"
+
+	local node1_name='group1'
+	local node1="$DL_HANDLE/$node1_name"
+	rate_node_add "$node1"
+	check_err $? "Failed to add node $node1"
+
+	rate_attr_parent_check $r_obj $node1_name \
+		$DEBUGFS_DIR/ports/${r_obj##*/}/rate_parent
+
+	local node2_name='group2'
+	local node2="$DL_HANDLE/$node2_name"
+	rate_node_add "$node2"
+	check_err $? "Failed to add node $node2"
+
+	rate_attr_parent_check $node2 $node1_name \
+		$DEBUGFS_DIR/rate_nodes/$node2_name/rate_parent
+	rate_node_del "$node2"
+	check_err $? "Failed to delete node $node2"
+	rate_attr_set "$r_obj" noparent
+	check_err $? "Failed to unset $r_obj parent node"
+	rate_node_del "$node1"
+	check_err $? "Failed to delete node $node1"
+
+	log_test "rate test"
+}
+
+setup_prepare()
+{
+	modprobe netdevsim
+	echo "$BUS_ADDR $PORT_COUNT" > /sys/bus/netdevsim/new_device
+	while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+}
+
+cleanup()
+{
+	pre_cleanup
+	echo "$BUS_ADDR" > /sys/bus/netdevsim/del_device
+	modprobe -r netdevsim
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh
new file mode 100755
index 000000000000..7effd35369e1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="check_devlink_test check_ports_test"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+BUS_ADDR=10
+PORT_COUNT=4
+DEV_NAME=netdevsim$BUS_ADDR
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
+DL_HANDLE=netdevsim/$DEV_NAME
+NETNS_NAME=testns1
+
+port_netdev_get()
+{
+	local port_index=$1
+
+	cmd_jq "devlink -N $NETNS_NAME port show -j" \
+	       ".[][\"$DL_HANDLE/$port_index\"].netdev" "-e"
+}
+
+check_ports_test()
+{
+	RET=0
+
+	for i in $(seq 0 $(expr $PORT_COUNT - 1)); do
+		netdev_name=$(port_netdev_get $i)
+		check_err $? "Failed to get netdev name for port $DL_HANDLE/$i"
+		ip -n $NETNS_NAME link show $netdev_name &> /dev/null
+		check_err $? "Failed to find netdev $netdev_name"
+	done
+
+	log_test "check ports test"
+}
+
+check_devlink_test()
+{
+	RET=0
+
+	devlink -N $NETNS_NAME dev show $DL_HANDLE &> /dev/null
+	check_err $? "Failed to show devlink instance"
+
+	log_test "check devlink test"
+}
+
+setup_prepare()
+{
+	modprobe netdevsim
+	ip netns add $NETNS_NAME
+	ip netns exec $NETNS_NAME \
+		echo "$BUS_ADDR $PORT_COUNT" > /sys/bus/netdevsim/new_device
+	while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+}
+
+cleanup()
+{
+	pre_cleanup
+	echo "$BUS_ADDR" > /sys/bus/netdevsim/del_device
+	ip netns del $NETNS_NAME
+	modprobe -r netdevsim
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
new file mode 100755
index 000000000000..b64d98ca0df7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -0,0 +1,514 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking devlink-trap functionality. It makes use of
+# netdevsim which implements the required callbacks.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	init_test
+	trap_action_test
+	trap_metadata_test
+	bad_trap_test
+	bad_trap_action_test
+	trap_stats_test
+	trap_group_action_test
+	bad_trap_group_test
+	trap_group_stats_test
+	trap_policer_test
+	trap_policer_bind_test
+	port_del_test
+	dev_del_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
+SLEEP_TIME=1
+NETDEV=""
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
+require_command udevadm
+
+modprobe netdevsim &> /dev/null
+if [ ! -d "$NETDEVSIM_PATH" ]; then
+	echo "SKIP: No netdevsim support"
+	exit 1
+fi
+
+if [ -d "${NETDEVSIM_PATH}/devices/netdevsim${DEV_ADDR}" ]; then
+	echo "SKIP: Device netdevsim${DEV_ADDR} already exists"
+	exit 1
+fi
+
+check_netdev_down()
+{
+	state=$(cat /sys/class/net/${NETDEV}/flags)
+
+	if [ $((state & 1)) -ne 0 ]; then
+		echo "WARNING: unexpected interface UP, disable NetworkManager?"
+
+		ip link set dev $NETDEV down
+	fi
+}
+
+init_test()
+{
+	RET=0
+
+	test $(devlink_traps_num_get) -ne 0
+	check_err $? "No traps were registered"
+
+	log_test "Initialization"
+}
+
+trap_action_test()
+{
+	local orig_action
+	local trap_name
+	local action
+
+	RET=0
+
+	for trap_name in $(devlink_traps_get); do
+		# The action of non-drop traps cannot be changed.
+		if [ $(devlink_trap_type_get $trap_name) = "drop" ]; then
+			devlink_trap_action_set $trap_name "trap"
+			action=$(devlink_trap_action_get $trap_name)
+			if [ $action != "trap" ]; then
+				check_err 1 "Trap $trap_name did not change action to trap"
+			fi
+
+			devlink_trap_action_set $trap_name "drop"
+			action=$(devlink_trap_action_get $trap_name)
+			if [ $action != "drop" ]; then
+				check_err 1 "Trap $trap_name did not change action to drop"
+			fi
+		else
+			orig_action=$(devlink_trap_action_get $trap_name)
+
+			devlink_trap_action_set $trap_name "trap"
+			action=$(devlink_trap_action_get $trap_name)
+			if [ $action != $orig_action ]; then
+				check_err 1 "Trap $trap_name changed action when should not"
+			fi
+
+			devlink_trap_action_set $trap_name "drop"
+			action=$(devlink_trap_action_get $trap_name)
+			if [ $action != $orig_action ]; then
+				check_err 1 "Trap $trap_name changed action when should not"
+			fi
+		fi
+	done
+
+	log_test "Trap action"
+}
+
+trap_metadata_test()
+{
+	local trap_name
+
+	RET=0
+
+	for trap_name in $(devlink_traps_get); do
+		devlink_trap_metadata_test $trap_name "input_port"
+		check_err $? "Input port not reported as metadata of trap $trap_name"
+		if [ $trap_name == "ingress_flow_action_drop" ] ||
+		   [ $trap_name == "egress_flow_action_drop" ]; then
+			devlink_trap_metadata_test $trap_name "flow_action_cookie"
+			check_err $? "Flow action cookie not reported as metadata of trap $trap_name"
+		fi
+	done
+
+	log_test "Trap metadata"
+}
+
+bad_trap_test()
+{
+	RET=0
+
+	devlink_trap_action_set "made_up_trap" "drop"
+	check_fail $? "Did not get an error for non-existing trap"
+
+	log_test "Non-existing trap"
+}
+
+bad_trap_action_test()
+{
+	local traps_arr
+	local trap_name
+
+	RET=0
+
+	# Pick first trap.
+	traps_arr=($(devlink_traps_get))
+	trap_name=${traps_arr[0]}
+
+	devlink_trap_action_set $trap_name "made_up_action"
+	check_fail $? "Did not get an error for non-existing trap action"
+
+	log_test "Non-existing trap action"
+}
+
+trap_stats_test()
+{
+	local trap_name
+
+	RET=0
+
+	check_netdev_down
+	for trap_name in $(devlink_traps_get); do
+		devlink_trap_stats_idle_test $trap_name
+		check_err $? "Stats of trap $trap_name not idle when netdev down"
+
+		ip link set dev $NETDEV up
+
+		if [ $(devlink_trap_type_get $trap_name) = "drop" ]; then
+			devlink_trap_action_set $trap_name "trap"
+			devlink_trap_stats_idle_test $trap_name
+			check_fail $? "Stats of trap $trap_name idle when action is trap"
+
+			devlink_trap_action_set $trap_name "drop"
+			devlink_trap_stats_idle_test $trap_name
+			check_err $? "Stats of trap $trap_name not idle when action is drop"
+
+			echo "y"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+			devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+			check_fail $? "Managed to read trap (hard dropped) statistics when should not"
+			echo "n"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+			devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+			check_err $? "Did not manage to read trap (hard dropped) statistics when should"
+
+			devlink_trap_drop_stats_idle_test $trap_name
+			check_fail $? "Drop stats of trap $trap_name idle when should not"
+		else
+			devlink_trap_stats_idle_test $trap_name
+			check_fail $? "Stats of non-drop trap $trap_name idle when should not"
+		fi
+
+		ip link set dev $NETDEV down
+	done
+
+	log_test "Trap statistics"
+}
+
+trap_group_action_test()
+{
+	local curr_group group_name
+	local trap_name
+	local trap_type
+	local action
+
+	RET=0
+
+	for group_name in $(devlink_trap_groups_get); do
+		devlink_trap_group_action_set $group_name "trap"
+
+		for trap_name in $(devlink_traps_get); do
+			curr_group=$(devlink_trap_group_get $trap_name)
+			if [ $curr_group != $group_name ]; then
+				continue
+			fi
+
+			trap_type=$(devlink_trap_type_get $trap_name)
+			if [ $trap_type != "drop" ]; then
+				continue
+			fi
+
+			action=$(devlink_trap_action_get $trap_name)
+			if [ $action != "trap" ]; then
+				check_err 1 "Trap $trap_name did not change action to trap"
+			fi
+		done
+
+		devlink_trap_group_action_set $group_name "drop"
+
+		for trap_name in $(devlink_traps_get); do
+			curr_group=$(devlink_trap_group_get $trap_name)
+			if [ $curr_group != $group_name ]; then
+				continue
+			fi
+
+			trap_type=$(devlink_trap_type_get $trap_name)
+			if [ $trap_type != "drop" ]; then
+				continue
+			fi
+
+			action=$(devlink_trap_action_get $trap_name)
+			if [ $action != "drop" ]; then
+				check_err 1 "Trap $trap_name did not change action to drop"
+			fi
+		done
+	done
+
+	log_test "Trap group action"
+}
+
+bad_trap_group_test()
+{
+	RET=0
+
+	devlink_trap_group_action_set "made_up_trap_group" "drop"
+	check_fail $? "Did not get an error for non-existing trap group"
+
+	log_test "Non-existing trap group"
+}
+
+trap_group_stats_test()
+{
+	local group_name
+
+	RET=0
+
+	check_netdev_down
+	for group_name in $(devlink_trap_groups_get); do
+		devlink_trap_group_stats_idle_test $group_name
+		check_err $? "Stats of trap group $group_name not idle when netdev down"
+
+		ip link set dev $NETDEV up
+
+		devlink_trap_group_action_set $group_name "trap"
+		devlink_trap_group_stats_idle_test $group_name
+		check_fail $? "Stats of trap group $group_name idle when action is trap"
+
+		devlink_trap_group_action_set $group_name "drop"
+		ip link set dev $NETDEV down
+	done
+
+	log_test "Trap group statistics"
+}
+
+trap_policer_test()
+{
+	local packets_t0
+	local packets_t1
+
+	RET=0
+
+	if [ $(devlink_trap_policers_num_get) -eq 0 ]; then
+		check_err 1 "Failed to dump policers"
+	fi
+
+	devlink trap policer set $DEVLINK_DEV policer 1337 &> /dev/null
+	check_fail $? "Did not get an error for setting a non-existing policer"
+	devlink trap policer show $DEVLINK_DEV policer 1337 &> /dev/null
+	check_fail $? "Did not get an error for getting a non-existing policer"
+
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 2000 burst 16
+	check_err $? "Failed to set valid parameters for a valid policer"
+	if [ $(devlink_trap_policer_rate_get 1) -ne 2000 ]; then
+		check_err 1 "Policer rate was not changed"
+	fi
+	if [ $(devlink_trap_policer_burst_get 1) -ne 16 ]; then
+		check_err 1 "Policer burst size was not changed"
+	fi
+
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null
+	check_fail $? "Policer rate was changed to rate lower than limit"
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 9000 &> /dev/null
+	check_fail $? "Policer rate was changed to rate higher than limit"
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 2 &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size lower than limit"
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 65537 &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size higher than limit"
+	echo "y" > $DEBUGFS_DIR/fail_trap_policer_set
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 3000 &> /dev/null
+	check_fail $? "Managed to set policer rate when should not"
+	echo "n" > $DEBUGFS_DIR/fail_trap_policer_set
+	if [ $(devlink_trap_policer_rate_get 1) -ne 2000 ]; then
+		check_err 1 "Policer rate was changed to an invalid value"
+	fi
+	if [ $(devlink_trap_policer_burst_get 1) -ne 16 ]; then
+		check_err 1 "Policer burst size was changed to an invalid value"
+	fi
+
+	packets_t0=$(devlink_trap_policer_rx_dropped_get 1)
+	sleep .5
+	packets_t1=$(devlink_trap_policer_rx_dropped_get 1)
+	if [ ! $packets_t1 -gt $packets_t0 ]; then
+		check_err 1 "Policer drop counter was not incremented"
+	fi
+
+	echo "y"> $DEBUGFS_DIR/fail_trap_policer_counter_get
+	devlink -s trap policer show $DEVLINK_DEV policer 1 &> /dev/null
+	check_fail $? "Managed to read policer drop counter when should not"
+	echo "n"> $DEBUGFS_DIR/fail_trap_policer_counter_get
+	devlink -s trap policer show $DEVLINK_DEV policer 1 &> /dev/null
+	check_err $? "Did not manage to read policer drop counter when should"
+
+	log_test "Trap policer"
+}
+
+trap_group_check_policer()
+{
+	local group_name=$1; shift
+
+	devlink -j -p trap group show $DEVLINK_DEV group $group_name \
+		| jq -e '.[][][]["policer"]' &> /dev/null
+}
+
+trap_policer_bind_test()
+{
+	RET=0
+
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 1
+	check_err $? "Failed to bind a valid policer"
+	if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then
+		check_err 1 "Bound policer was not changed"
+	fi
+
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 1337 \
+		&> /dev/null
+	check_fail $? "Did not get an error for binding a non-existing policer"
+	if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then
+		check_err 1 "Bound policer was changed when should not"
+	fi
+
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 0
+	check_err $? "Failed to unbind a policer when using ID 0"
+	trap_group_check_policer "l2_drops"
+	check_fail $? "Trap group has a policer after unbinding with ID 0"
+
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 1
+	check_err $? "Failed to bind a valid policer"
+
+	devlink trap group set $DEVLINK_DEV group l2_drops nopolicer
+	check_err $? "Failed to unbind a policer when using 'nopolicer' keyword"
+	trap_group_check_policer "l2_drops"
+	check_fail $? "Trap group has a policer after unbinding with 'nopolicer' keyword"
+
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 1
+	check_err $? "Failed to bind a valid policer"
+
+	echo "y"> $DEBUGFS_DIR/fail_trap_group_set
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 2 \
+		&> /dev/null
+	check_fail $? "Managed to bind a policer when should not"
+	echo "n"> $DEBUGFS_DIR/fail_trap_group_set
+	devlink trap group set $DEVLINK_DEV group l2_drops policer 2
+	check_err $? "Did not manage to bind a policer when should"
+
+	devlink trap group set $DEVLINK_DEV group l2_drops action drop \
+		policer 1337 &> /dev/null
+	check_fail $? "Did not get an error for partially modified trap group"
+
+	log_test "Trap policer binding"
+}
+
+port_del_test()
+{
+	local group_name
+	local i
+
+	# The test never fails. It is meant to exercise different code paths
+	# and make sure we properly dismantle a port while packets are
+	# in-flight.
+	RET=0
+
+	devlink_traps_enable_all
+
+	for i in $(seq 1 10); do
+		ip link set dev $NETDEV up
+
+		sleep $SLEEP_TIME
+
+		netdevsim_port_destroy
+		netdevsim_port_create
+		udevadm settle
+	done
+
+	devlink_traps_disable_all
+
+	log_test "Port delete"
+}
+
+dev_del_test()
+{
+	local group_name
+	local i
+
+	# The test never fails. It is meant to exercise different code paths
+	# and make sure we properly unregister traps while packets are
+	# in-flight.
+	RET=0
+
+	devlink_traps_enable_all
+
+	for i in $(seq 1 10); do
+		ip link set dev $NETDEV up
+
+		sleep $SLEEP_TIME
+
+		cleanup
+		setup_prepare
+	done
+
+	devlink_traps_disable_all
+
+	log_test "Device delete"
+}
+
+netdevsim_dev_create()
+{
+	echo "$DEV_ADDR 0" > ${NETDEVSIM_PATH}/new_device
+}
+
+netdevsim_dev_destroy()
+{
+	echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+}
+
+netdevsim_port_create()
+{
+	echo 1 > ${NETDEVSIM_PATH}/devices/${DEV}/new_port
+}
+
+netdevsim_port_destroy()
+{
+	echo 1 > ${NETDEVSIM_PATH}/devices/${DEV}/del_port
+}
+
+setup_prepare()
+{
+	local netdev
+
+	netdevsim_dev_create
+
+	if [ ! -d "${NETDEVSIM_PATH}/devices/${DEV}" ]; then
+		echo "Failed to create netdevsim device"
+		exit 1
+	fi
+
+	netdevsim_port_create
+
+	if [ ! -d "${NETDEVSIM_PATH}/devices/${DEV}/net/" ]; then
+		echo "Failed to create netdevsim port"
+		exit 1
+	fi
+
+	# Wait for udev to rename newly created netdev.
+	udevadm settle
+
+	NETDEV=$(ls ${NETDEVSIM_PATH}/devices/${DEV}/net/)
+}
+
+cleanup()
+{
+	pre_cleanup
+	netdevsim_port_destroy
+	netdevsim_dev_destroy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh
new file mode 100755
index 000000000000..9adfba8f87e6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+function get_value {
+    local query="${SETTINGS_MAP[$1]}"
+
+    echo $(ethtool -c $NSIM_NETDEV | \
+        awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[ \t]/, "", $2); print $2}')
+}
+
+function update_current_settings {
+    for key in ${!SETTINGS_MAP[@]}; do
+        CURRENT_SETTINGS[$key]=$(get_value $key)
+    done
+    echo ${CURRENT_SETTINGS[@]}
+}
+
+if ! ethtool -h | grep -q coalesce; then
+    echo "SKIP: No --coalesce support in ethtool"
+    exit 4
+fi
+
+NSIM_NETDEV=$(make_netdev)
+
+set -o pipefail
+
+declare -A SETTINGS_MAP=(
+    ["rx-frames-low"]="rx-frame-low"
+    ["tx-frames-low"]="tx-frame-low"
+    ["rx-frames-high"]="rx-frame-high"
+    ["tx-frames-high"]="tx-frame-high"
+    ["rx-usecs"]="rx-usecs"
+    ["rx-frames"]="rx-frames"
+    ["rx-usecs-irq"]="rx-usecs-irq"
+    ["rx-frames-irq"]="rx-frames-irq"
+    ["tx-usecs"]="tx-usecs"
+    ["tx-frames"]="tx-frames"
+    ["tx-usecs-irq"]="tx-usecs-irq"
+    ["tx-frames-irq"]="tx-frames-irq"
+    ["stats-block-usecs"]="stats-block-usecs"
+    ["pkt-rate-low"]="pkt-rate-low"
+    ["rx-usecs-low"]="rx-usecs-low"
+    ["tx-usecs-low"]="tx-usecs-low"
+    ["pkt-rate-high"]="pkt-rate-high"
+    ["rx-usecs-high"]="rx-usecs-high"
+    ["tx-usecs-high"]="tx-usecs-high"
+    ["sample-interval"]="sample-interval"
+)
+
+declare -A CURRENT_SETTINGS=(
+    ["rx-frames-low"]=""
+    ["tx-frames-low"]=""
+    ["rx-frames-high"]=""
+    ["tx-frames-high"]=""
+    ["rx-usecs"]=""
+    ["rx-frames"]=""
+    ["rx-usecs-irq"]=""
+    ["rx-frames-irq"]=""
+    ["tx-usecs"]=""
+    ["tx-frames"]=""
+    ["tx-usecs-irq"]=""
+    ["tx-frames-irq"]=""
+    ["stats-block-usecs"]=""
+    ["pkt-rate-low"]=""
+    ["rx-usecs-low"]=""
+    ["tx-usecs-low"]=""
+    ["pkt-rate-high"]=""
+    ["rx-usecs-high"]=""
+    ["tx-usecs-high"]=""
+    ["sample-interval"]=""
+)
+
+declare -A EXPECTED_SETTINGS=(
+    ["rx-frames-low"]=""
+    ["tx-frames-low"]=""
+    ["rx-frames-high"]=""
+    ["tx-frames-high"]=""
+    ["rx-usecs"]=""
+    ["rx-frames"]=""
+    ["rx-usecs-irq"]=""
+    ["rx-frames-irq"]=""
+    ["tx-usecs"]=""
+    ["tx-frames"]=""
+    ["tx-usecs-irq"]=""
+    ["tx-frames-irq"]=""
+    ["stats-block-usecs"]=""
+    ["pkt-rate-low"]=""
+    ["rx-usecs-low"]=""
+    ["tx-usecs-low"]=""
+    ["pkt-rate-high"]=""
+    ["rx-usecs-high"]=""
+    ["tx-usecs-high"]=""
+    ["sample-interval"]=""
+)
+
+# populate the expected settings map
+for key in ${!SETTINGS_MAP[@]}; do
+    EXPECTED_SETTINGS[$key]=$(get_value $key)
+done
+
+# test
+for key in ${!SETTINGS_MAP[@]}; do
+    value=$((RANDOM % $((2**32-1))))
+
+    ethtool -C $NSIM_NETDEV "$key" "$value"
+
+    EXPECTED_SETTINGS[$key]="$value"
+    expected=${EXPECTED_SETTINGS[@]}
+    current=$(update_current_settings)
+
+    check $? "$current" "$expected"
+    set +x
+done
+
+# bool settings which ethtool displays on the same line
+ethtool -C $NSIM_NETDEV adaptive-rx on
+s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on  TX: off")
+check $? "$s" ""
+
+ethtool -C $NSIM_NETDEV adaptive-tx on
+s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on  TX: on")
+check $? "$s" ""
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
new file mode 100644
index 000000000000..80160579e0cc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0
+NSIM_NETDEV=
+num_passes=0
+num_errors=0
+
+function cleanup_nsim {
+    if [ -e $NSIM_DEV_SYS ]; then
+	echo $NSIM_ID > /sys/bus/netdevsim/del_device
+    fi
+}
+
+function cleanup {
+    cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function check {
+    local code=$1
+    local str=$2
+    local exp_str=$3
+    local exp_fail=$4
+
+    [ -z "$exp_fail" ] && cop="-ne" || cop="-eq"
+
+    if [ $code $cop 0 ]; then
+	((num_errors++))
+	return
+    fi
+
+    if [ "$str" != "$exp_str"  ]; then
+	echo -e "Expected: '$exp_str', got '$str'"
+	((num_errors++))
+	return
+    fi
+
+    ((num_passes++))
+}
+
+function make_netdev {
+    # Make a netdevsim
+    old_netdevs=$(ls /sys/class/net)
+
+    if ! $(lsmod | grep -q netdevsim); then
+	modprobe netdevsim
+    fi
+
+    echo $NSIM_ID $@ > /sys/bus/netdevsim/new_device
+    udevadm settle
+    # get new device name
+    ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/
+}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh
new file mode 100644
index 000000000000..bc210dc6ad2d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+NSIM_NETDEV=$(make_netdev)
+
+set -o pipefail
+
+FEATS="
+  tx-checksum-ip-generic
+  tx-scatter-gather
+  tx-tcp-segmentation
+  generic-segmentation-offload
+  generic-receive-offload"
+
+for feat in $FEATS ; do
+    s=$(ethtool --json -k $NSIM_NETDEV | jq ".[].\"$feat\".active" 2>/dev/null)
+    check $? "$s" true
+
+    s=$(ethtool --json -k $NSIM_NETDEV | jq ".[].\"$feat\".fixed" 2>/dev/null)
+    check $? "$s" false
+done
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
new file mode 100755
index 000000000000..6c52ce1b0450
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+NSIM_NETDEV=$(make_netdev)
+[ a$ETHTOOL == a ] && ETHTOOL=ethtool
+
+set -o pipefail
+
+# Since commit 2b3ddcb35357 ("ethtool: fec: Change the prompt ...")
+# in ethtool CLI the Configured lines start with Supported/Configured.
+configured=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2 | head -1 | cut -d' ' -f1)
+
+# netdevsim starts out with None/None
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "$configured FEC encodings: None
+Active FEC encoding: None"
+
+# Test Auto
+$ETHTOOL --set-fec $NSIM_NETDEV encoding auto
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "$configured FEC encodings: Auto
+Active FEC encoding: Off"
+
+# Test case in-sensitivity
+for o in off Off OFF; do
+    $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+    check $?
+    s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+    check $? "$s" "$configured FEC encodings: Off
+Active FEC encoding: Off"
+done
+
+for o in BaseR baser BAser; do
+    $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+    check $?
+    s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+    check $? "$s" "$configured FEC encodings: BaseR
+Active FEC encoding: BaseR"
+done
+
+for o in llrs rs; do
+    $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+    check $?
+    s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+    check $? "$s" "$configured FEC encodings: ${o^^}
+Active FEC encoding: ${o^^}"
+done
+
+# Test multiple bits
+$ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "$configured FEC encodings: RS LLRS
+Active FEC encoding: LLRS"
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding rs off auto
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "$configured FEC encodings: Auto Off RS
+Active FEC encoding: RS"
+
+# Make sure other link modes are rejected
+$ETHTOOL --set-fec $NSIM_NETDEV encoding FIBRE 2>/dev/null
+check $? '' '' 1
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding bla-bla-bla 2>/dev/null
+check $? '' '' 1
+
+# Try JSON
+$ETHTOOL --json --show-fec $NSIM_NETDEV | jq empty >>/dev/null 2>&1
+if [ $? -eq 0 ]; then
+    $ETHTOOL --set-fec $NSIM_NETDEV encoding auto
+    check $?
+
+    s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]')
+    check $? "$s" '"Auto"'
+    s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]')
+    check $? "$s" '"Off"'
+
+    $ETHTOOL --set-fec $NSIM_NETDEV encoding auto RS
+    check $?
+
+    s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]')
+    check $? "$s" '"Auto"
+"RS"'
+    s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]')
+    check $? "$s" '"RS"'
+fi
+
+# Test error injection
+echo 11 > $NSIM_DEV_DFS/ethtool/get_err
+
+$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1
+check $? '' '' 1
+
+echo 0 > $NSIM_DEV_DFS/ethtool/get_err
+echo 11 > $NSIM_DEV_DFS/ethtool/set_err
+
+$ETHTOOL --show-fec $NSIM_NETDEV  >>/dev/null 2>&1
+check $?
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding RS 2>/dev/null
+check $? '' '' 1
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
new file mode 100755
index 000000000000..b4a7abfe5454
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+# Bail if ethtool is too old
+if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then
+    echo "SKIP: No --include-statistics support in ethtool"
+    exit 4
+fi
+
+NSIM_NETDEV=$(make_netdev)
+
+set -o pipefail
+
+echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx
+echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx
+
+s=$(ethtool --json -a $NSIM_NETDEV | jq '.[].statistics')
+check $? "$s" "null"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics')
+check $? "$s" "{}"
+
+echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length')
+check $? "$s" "1"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames')
+check $? "$s" "2"
+
+echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length')
+check $? "$s" "2"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.rx_pause_frames')
+check $? "$s" "1"
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames')
+check $? "$s" "2"
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
new file mode 100755
index 000000000000..6800de816e8b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
@@ -0,0 +1,402 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the FIB offload API. It makes use of netdevsim
+# which registers a listener to the FIB notification chain.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ipv4_identical_routes
+	ipv4_tos
+	ipv4_metric
+	ipv4_replace
+	ipv4_delete
+	ipv4_plen
+	ipv4_replay
+	ipv4_flush
+	ipv4_error_path
+	ipv4_delete_fail
+	ipv6_add
+	ipv6_metric
+	ipv6_append_single
+	ipv6_replace_single
+	ipv6_metric_multipath
+	ipv6_append_multipath
+	ipv6_replace_multipath
+	ipv6_append_multipath_to_single
+	ipv6_delete_single
+	ipv6_delete_multipath
+	ipv6_replay_single
+	ipv6_replay_multipath
+	ipv6_error_path
+	ipv6_delete_fail
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/fib_offload_lib.sh
+
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
+ipv4_identical_routes()
+{
+	fib_ipv4_identical_routes_test "testns1"
+}
+
+ipv4_tos()
+{
+	fib_ipv4_tos_test "testns1"
+}
+
+ipv4_metric()
+{
+	fib_ipv4_metric_test "testns1"
+}
+
+ipv4_replace()
+{
+	fib_ipv4_replace_test "testns1"
+}
+
+ipv4_delete()
+{
+	fib_ipv4_delete_test "testns1"
+}
+
+ipv4_plen()
+{
+	fib_ipv4_plen_test "testns1"
+}
+
+ipv4_replay_metric()
+{
+	fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay_tos()
+{
+	fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay_plen()
+{
+	fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay()
+{
+	ipv4_replay_metric
+	ipv4_replay_tos
+	ipv4_replay_plen
+}
+
+ipv4_flush()
+{
+	fib_ipv4_flush_test "testns1"
+}
+
+ipv4_error_path_add()
+{
+	local lsb
+
+	RET=0
+
+	ip -n testns1 link add name dummy1 type dummy
+	ip -n testns1 link set dev dummy1 up
+
+	devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 10
+	devlink -N testns1 dev reload $DEVLINK_DEV
+
+	for lsb in $(seq 1 20); do
+		ip -n testns1 route add 192.0.2.${lsb}/32 dev dummy1 \
+			&> /dev/null
+	done
+
+	log_test "IPv4 error path - add"
+
+	ip -n testns1 link del dev dummy1
+}
+
+ipv4_error_path_replay()
+{
+	local lsb
+
+	RET=0
+
+	ip -n testns1 link add name dummy1 type dummy
+	ip -n testns1 link set dev dummy1 up
+
+	devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 100
+	devlink -N testns1 dev reload $DEVLINK_DEV
+
+	for lsb in $(seq 1 20); do
+		ip -n testns1 route add 192.0.2.${lsb}/32 dev dummy1
+	done
+
+	devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 10
+	devlink -N testns1 dev reload $DEVLINK_DEV &> /dev/null
+
+	log_test "IPv4 error path - replay"
+
+	ip -n testns1 link del dev dummy1
+
+	# Successfully reload after deleting all the routes.
+	devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 100
+	devlink -N testns1 dev reload $DEVLINK_DEV
+}
+
+ipv4_error_path()
+{
+	# Test the different error paths of the notifiers by limiting the size
+	# of the "IPv4/fib" resource.
+	ipv4_error_path_add
+	ipv4_error_path_replay
+}
+
+ipv4_delete_fail()
+{
+	RET=0
+
+	echo "y" > $DEBUGFS_DIR/fib/fail_route_delete
+
+	ip -n testns1 link add name dummy1 type dummy
+	ip -n testns1 link set dev dummy1 up
+
+	ip -n testns1 route add 192.0.2.0/24 dev dummy1
+	ip -n testns1 route del 192.0.2.0/24 dev dummy1 &> /dev/null
+
+	# We should not be able to delete the netdev if we are leaking a
+	# reference.
+	ip -n testns1 link del dev dummy1
+
+	log_test "IPv4 route delete failure"
+
+	echo "n" > $DEBUGFS_DIR/fib/fail_route_delete
+}
+
+ipv6_add()
+{
+	fib_ipv6_add_test "testns1"
+}
+
+ipv6_metric()
+{
+	fib_ipv6_metric_test "testns1"
+}
+
+ipv6_append_single()
+{
+	fib_ipv6_append_single_test "testns1"
+}
+
+ipv6_replace_single()
+{
+	fib_ipv6_replace_single_test "testns1"
+}
+
+ipv6_metric_multipath()
+{
+	fib_ipv6_metric_multipath_test "testns1"
+}
+
+ipv6_append_multipath()
+{
+	fib_ipv6_append_multipath_test "testns1"
+}
+
+ipv6_replace_multipath()
+{
+	fib_ipv6_replace_multipath_test "testns1"
+}
+
+ipv6_append_multipath_to_single()
+{
+	fib_ipv6_append_multipath_to_single_test "testns1"
+}
+
+ipv6_delete_single()
+{
+	fib_ipv6_delete_single_test "testns1"
+}
+
+ipv6_delete_multipath()
+{
+	fib_ipv6_delete_multipath_test "testns1"
+}
+
+ipv6_replay_single()
+{
+	fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv6_replay_multipath()
+{
+	fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv6_error_path_add_single()
+{
+	local lsb
+
+	RET=0
+
+	ip -n testns1 link add name dummy1 type dummy
+	ip -n testns1 link set dev dummy1 up
+
+	devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10
+	devlink -N testns1 dev reload $DEVLINK_DEV
+
+	for lsb in $(seq 1 20); do
+		ip -n testns1 route add 2001:db8:1::${lsb}/128 dev dummy1 \
+			&> /dev/null
+	done
+
+	log_test "IPv6 error path - add single"
+
+	ip -n testns1 link del dev dummy1
+}
+
+ipv6_error_path_add_multipath()
+{
+	local lsb
+
+	RET=0
+
+	for i in $(seq 1 2); do
+		ip -n testns1 link add name dummy$i type dummy
+		ip -n testns1 link set dev dummy$i up
+		ip -n testns1 address add 2001:db8:$i::1/64 dev dummy$i
+	done
+
+	devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10
+	devlink -N testns1 dev reload $DEVLINK_DEV
+
+	for lsb in $(seq 1 20); do
+		ip -n testns1 route add 2001:db8:10::${lsb}/128 \
+			nexthop via 2001:db8:1::2 dev dummy1 \
+			nexthop via 2001:db8:2::2 dev dummy2 &> /dev/null
+	done
+
+	log_test "IPv6 error path - add multipath"
+
+	for i in $(seq 1 2); do
+		ip -n testns1 link del dev dummy$i
+	done
+}
+
+ipv6_error_path_replay()
+{
+	local lsb
+
+	RET=0
+
+	ip -n testns1 link add name dummy1 type dummy
+	ip -n testns1 link set dev dummy1 up
+
+	devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 100
+	devlink -N testns1 dev reload $DEVLINK_DEV
+
+	for lsb in $(seq 1 20); do
+		ip -n testns1 route add 2001:db8:1::${lsb}/128 dev dummy1
+	done
+
+	devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10
+	devlink -N testns1 dev reload $DEVLINK_DEV &> /dev/null
+
+	log_test "IPv6 error path - replay"
+
+	ip -n testns1 link del dev dummy1
+
+	# Successfully reload after deleting all the routes.
+	devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 100
+	devlink -N testns1 dev reload $DEVLINK_DEV
+}
+
+ipv6_error_path()
+{
+	# Test the different error paths of the notifiers by limiting the size
+	# of the "IPv6/fib" resource.
+	ipv6_error_path_add_single
+	ipv6_error_path_add_multipath
+	ipv6_error_path_replay
+}
+
+ipv6_delete_fail()
+{
+	RET=0
+
+	echo "y" > $DEBUGFS_DIR/fib/fail_route_delete
+
+	ip -n testns1 link add name dummy1 type dummy
+	ip -n testns1 link set dev dummy1 up
+
+	ip -n testns1 route add 2001:db8:1::/64 dev dummy1
+	ip -n testns1 route del 2001:db8:1::/64 dev dummy1 &> /dev/null
+
+	# We should not be able to delete the netdev if we are leaking a
+	# reference.
+	ip -n testns1 link del dev dummy1
+
+	log_test "IPv6 route delete failure"
+
+	echo "n" > $DEBUGFS_DIR/fib/fail_route_delete
+}
+
+fib_notify_on_flag_change_set()
+{
+	local notify=$1; shift
+
+	ip netns exec testns1 sysctl -qw net.ipv4.fib_notify_on_flag_change=$notify
+	ip netns exec testns1 sysctl -qw net.ipv6.fib_notify_on_flag_change=$notify
+
+	log_info "Set fib_notify_on_flag_change to $notify"
+}
+
+setup_prepare()
+{
+	local netdev
+
+	modprobe netdevsim &> /dev/null
+
+	echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+	while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+	ip netns add testns1
+	if [ $? -ne 0 ]; then
+		echo "Failed to add netns \"testns1\""
+		exit 1
+	fi
+
+	devlink dev reload $DEVLINK_DEV netns testns1
+	if [ $? -ne 0 ]; then
+		echo "Failed to reload into netns \"testns1\""
+		exit 1
+	fi
+}
+
+cleanup()
+{
+	pre_cleanup
+	ip netns del testns1
+	echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+	modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+fib_notify_on_flag_change_set 1
+tests_run
+
+fib_notify_on_flag_change_set 0
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh
new file mode 100755
index 000000000000..9896580c3d85
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh
@@ -0,0 +1,430 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ipv4_route_addition_test
+	ipv4_route_deletion_test
+	ipv4_route_replacement_test
+	ipv4_route_offload_failed_test
+	ipv6_route_addition_test
+	ipv6_route_deletion_test
+	ipv6_route_replacement_test
+	ipv6_route_offload_failed_test
+"
+
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+check_rt_offload_failed()
+{
+	local outfile=$1; shift
+	local line
+
+	# Make sure that the first notification was emitted without
+	# RTM_F_OFFLOAD_FAILED flag and the second with RTM_F_OFFLOAD_FAILED
+	# flag
+	head -n 1 $outfile | grep -q "rt_offload_failed"
+	if [[ $? -eq 0 ]]; then
+		return 1
+	fi
+
+	head -n 2 $outfile | tail -n 1 | grep -q "rt_offload_failed"
+}
+
+check_rt_trap()
+{
+	local outfile=$1; shift
+	local line
+
+	# Make sure that the first notification was emitted without RTM_F_TRAP
+	# flag and the second with RTM_F_TRAP flag
+	head -n 1 $outfile | grep -q "rt_trap"
+	if [[ $? -eq 0 ]]; then
+		return 1
+	fi
+
+	head -n 2 $outfile | tail -n 1 | grep -q "rt_trap"
+}
+
+route_notify_check()
+{
+	local outfile=$1; shift
+	local expected_num_lines=$1; shift
+	local offload_failed=${1:-0}; shift
+
+	# check the monitor results
+	lines=`wc -l $outfile | cut "-d " -f1`
+	test $lines -eq $expected_num_lines
+	check_err $? "$expected_num_lines notifications were expected but $lines were received"
+
+	if [[ $expected_num_lines -eq 1 ]]; then
+		return
+	fi
+
+	if [[ $offload_failed -eq 0 ]]; then
+		check_rt_trap $outfile
+		check_err $? "Wrong RTM_F_TRAP flags in notifications"
+	else
+		check_rt_offload_failed $outfile
+		check_err $? "Wrong RTM_F_OFFLOAD_FAILED flags in notifications"
+	fi
+}
+
+route_addition_check()
+{
+	local ip=$1; shift
+	local notify=$1; shift
+	local route=$1; shift
+	local expected_num_notifications=$1; shift
+	local offload_failed=${1:-0}; shift
+
+	ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify
+
+	local outfile=$(mktemp)
+
+	$IP monitor route &> $outfile &
+	sleep 1
+	$IP route add $route dev dummy1
+	sleep 1
+	kill_process %%
+
+	route_notify_check $outfile $expected_num_notifications $offload_failed
+	rm -f $outfile
+
+	$IP route del $route dev dummy1
+}
+
+ipv4_route_addition_test()
+{
+	RET=0
+
+	local ip="ipv4"
+	local route=192.0.2.0/24
+
+	# Make sure a single notification will be emitted for the programmed
+	# route.
+	local notify=0
+	local expected_num_notifications=1
+	# route_addition_check will assign value to RET.
+	route_addition_check $ip $notify $route $expected_num_notifications
+
+	# Make sure two notifications will be emitted for the programmed route.
+	notify=1
+	expected_num_notifications=2
+	route_addition_check $ip $notify $route $expected_num_notifications
+
+	# notify=2 means emit notifications only for failed route installation,
+	# make sure a single notification will be emitted for the programmed
+	# route.
+	notify=2
+	expected_num_notifications=1
+	route_addition_check $ip $notify $route $expected_num_notifications
+
+	log_test "IPv4 route addition"
+}
+
+route_deletion_check()
+{
+	local ip=$1; shift
+	local notify=$1; shift
+	local route=$1; shift
+	local expected_num_notifications=$1; shift
+
+	ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify
+	$IP route add $route dev dummy1
+	sleep 1
+
+	local outfile=$(mktemp)
+
+	$IP monitor route &> $outfile &
+	sleep 1
+	$IP route del $route dev dummy1
+	sleep 1
+	kill_process %%
+
+	route_notify_check $outfile $expected_num_notifications
+	rm -f $outfile
+}
+
+ipv4_route_deletion_test()
+{
+	RET=0
+
+	local ip="ipv4"
+	local route=192.0.2.0/24
+	local expected_num_notifications=1
+
+	# Make sure a single notification will be emitted for the deleted route,
+	# regardless of fib_notify_on_flag_change value.
+	local notify=0
+	# route_deletion_check will assign value to RET.
+	route_deletion_check $ip $notify $route $expected_num_notifications
+
+	notify=1
+	route_deletion_check $ip $notify $route $expected_num_notifications
+
+	log_test "IPv4 route deletion"
+}
+
+route_replacement_check()
+{
+	local ip=$1; shift
+	local notify=$1; shift
+	local route=$1; shift
+	local expected_num_notifications=$1; shift
+
+	ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify
+	$IP route add $route dev dummy1
+	sleep 1
+
+	local outfile=$(mktemp)
+
+	$IP monitor route &> $outfile &
+	sleep 1
+	$IP route replace $route dev dummy2
+	sleep 1
+	kill_process %%
+
+	route_notify_check $outfile $expected_num_notifications
+	rm -f $outfile
+
+	$IP route del $route dev dummy2
+}
+
+ipv4_route_replacement_test()
+{
+	RET=0
+
+	local ip="ipv4"
+	local route=192.0.2.0/24
+
+	$IP link add name dummy2 type dummy
+	$IP link set dev dummy2 up
+
+	# Make sure a single notification will be emitted for the new route.
+	local notify=0
+	local expected_num_notifications=1
+	# route_replacement_check will assign value to RET.
+	route_replacement_check $ip $notify $route $expected_num_notifications
+
+	# Make sure two notifications will be emitted for the new route.
+	notify=1
+	expected_num_notifications=2
+	route_replacement_check $ip $notify $route $expected_num_notifications
+
+	# notify=2 means emit notifications only for failed route installation,
+	# make sure a single notification will be emitted for the new route.
+	notify=2
+	expected_num_notifications=1
+	route_replacement_check $ip $notify $route $expected_num_notifications
+
+	$IP link del name dummy2
+
+	log_test "IPv4 route replacement"
+}
+
+ipv4_route_offload_failed_test()
+{
+
+	RET=0
+
+	local ip="ipv4"
+	local route=192.0.2.0/24
+	local offload_failed=1
+
+	echo "y"> $DEBUGFS_DIR/fib/fail_route_offload
+	check_err $? "Failed to setup route offload to fail"
+
+	# Make sure a single notification will be emitted for the programmed
+	# route.
+	local notify=0
+	local expected_num_notifications=1
+	route_addition_check $ip $notify $route $expected_num_notifications \
+		$offload_failed
+
+	# Make sure two notifications will be emitted for the new route.
+	notify=1
+	expected_num_notifications=2
+	route_addition_check $ip $notify $route $expected_num_notifications \
+		$offload_failed
+
+	# notify=2 means emit notifications only for failed route installation,
+	# make sure two notifications will be emitted for the new route.
+	notify=2
+	expected_num_notifications=2
+	route_addition_check $ip $notify $route $expected_num_notifications \
+		$offload_failed
+
+	echo "n"> $DEBUGFS_DIR/fib/fail_route_offload
+	check_err $? "Failed to setup route offload not to fail"
+
+	log_test "IPv4 route offload failed"
+}
+
+ipv6_route_addition_test()
+{
+	RET=0
+
+	local ip="ipv6"
+	local route=2001:db8:1::/64
+
+	# Make sure a single notification will be emitted for the programmed
+	# route.
+	local notify=0
+	local expected_num_notifications=1
+	route_addition_check $ip $notify $route $expected_num_notifications
+
+	# Make sure two notifications will be emitted for the programmed route.
+	notify=1
+	expected_num_notifications=2
+	route_addition_check $ip $notify $route $expected_num_notifications
+
+	# notify=2 means emit notifications only for failed route installation,
+	# make sure a single notification will be emitted for the programmed
+	# route.
+	notify=2
+	expected_num_notifications=1
+	route_addition_check $ip $notify $route $expected_num_notifications
+
+	log_test "IPv6 route addition"
+}
+
+ipv6_route_deletion_test()
+{
+	RET=0
+
+	local ip="ipv6"
+	local route=2001:db8:1::/64
+	local expected_num_notifications=1
+
+	# Make sure a single notification will be emitted for the deleted route,
+	# regardless of fib_notify_on_flag_change value.
+	local notify=0
+	route_deletion_check $ip $notify $route $expected_num_notifications
+
+	notify=1
+	route_deletion_check $ip $notify $route $expected_num_notifications
+
+	log_test "IPv6 route deletion"
+}
+
+ipv6_route_replacement_test()
+{
+	RET=0
+
+	local ip="ipv6"
+	local route=2001:db8:1::/64
+
+	$IP link add name dummy2 type dummy
+	$IP link set dev dummy2 up
+
+	# Make sure a single notification will be emitted for the new route.
+	local notify=0
+	local expected_num_notifications=1
+	route_replacement_check $ip $notify $route $expected_num_notifications
+
+	# Make sure two notifications will be emitted for the new route.
+	notify=1
+	expected_num_notifications=2
+	route_replacement_check $ip $notify $route $expected_num_notifications
+
+	# notify=2 means emit notifications only for failed route installation,
+	# make sure a single notification will be emitted for the new route.
+	notify=2
+	expected_num_notifications=1
+	route_replacement_check $ip $notify $route $expected_num_notifications
+
+	$IP link del name dummy2
+
+	log_test "IPv6 route replacement"
+}
+
+ipv6_route_offload_failed_test()
+{
+
+	RET=0
+
+	local ip="ipv6"
+	local route=2001:db8:1::/64
+	local offload_failed=1
+
+	echo "y"> $DEBUGFS_DIR/fib/fail_route_offload
+	check_err $? "Failed to setup route offload to fail"
+
+	# Make sure a single notification will be emitted for the programmed
+	# route.
+	local notify=0
+	local expected_num_notifications=1
+	route_addition_check $ip $notify $route $expected_num_notifications \
+		$offload_failed
+
+	# Make sure two notifications will be emitted for the new route.
+	notify=1
+	expected_num_notifications=2
+	route_addition_check $ip $notify $route $expected_num_notifications \
+		$offload_failed
+
+	# notify=2 means emit notifications only for failed route installation,
+	# make sure two notifications will be emitted for the new route.
+	notify=2
+	expected_num_notifications=2
+	route_addition_check $ip $notify $route $expected_num_notifications \
+		$offload_failed
+
+	echo "n"> $DEBUGFS_DIR/fib/fail_route_offload
+	check_err $? "Failed to setup route offload not to fail"
+
+	log_test "IPv6 route offload failed"
+}
+
+setup_prepare()
+{
+	modprobe netdevsim &> /dev/null
+	echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+	while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+	ip netns add testns1
+
+	if [ $? -ne 0 ]; then
+		echo "Failed to add netns \"testns1\""
+		exit 1
+	fi
+
+	devlink dev reload $DEVLINK_DEV netns testns1
+
+	if [ $? -ne 0 ]; then
+		echo "Failed to reload into netns \"testns1\""
+		exit 1
+	fi
+
+	IP="ip -n testns1"
+
+	$IP link add name dummy1 type dummy
+	$IP link set dev dummy1 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	$IP link del name dummy1
+	ip netns del testns1
+	echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+	modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
new file mode 100755
index 000000000000..cba5ac08426b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
@@ -0,0 +1,421 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	l3_reporting_test
+	l3_fail_next_test
+	l3_counter_test
+	l3_rollback_test
+	l3_monitor_test
+"
+
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR_1=1337
+DEV_ADDR_2=1057
+DEV_ADDR_3=5417
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+DUMMY_IFINDEX=
+
+DEV_ADDR()
+{
+	local n=$1; shift
+	local var=DEV_ADDR_$n
+
+	echo ${!var}
+}
+
+DEV()
+{
+	echo netdevsim$(DEV_ADDR $1)
+}
+
+DEVLINK_DEV()
+{
+	echo netdevsim/$(DEV $1)
+}
+
+SYSFS_NET_DIR()
+{
+	echo /sys/bus/netdevsim/devices/$(DEV $1)/net/
+}
+
+DEBUGFS_DIR()
+{
+	echo /sys/kernel/debug/netdevsim/$(DEV $1)/
+}
+
+nsim_add()
+{
+	local n=$1; shift
+
+	echo "$(DEV_ADDR $n) 1" > ${NETDEVSIM_PATH}/new_device
+	while [ ! -d $(SYSFS_NET_DIR $n) ] ; do :; done
+}
+
+nsim_reload()
+{
+	local n=$1; shift
+	local ns=$1; shift
+
+	devlink dev reload $(DEVLINK_DEV $n) netns $ns
+
+	if [ $? -ne 0 ]; then
+		echo "Failed to reload $(DEV $n) into netns \"testns1\""
+		exit 1
+	fi
+
+}
+
+nsim_del()
+{
+	local n=$1; shift
+
+	echo "$(DEV_ADDR $n)" > ${NETDEVSIM_PATH}/del_device
+}
+
+nsim_hwstats_toggle()
+{
+	local action=$1; shift
+	local instance=$1; shift
+	local netdev=$1; shift
+	local type=$1; shift
+
+	local ifindex=$($IP -j link show dev $netdev | jq '.[].ifindex')
+
+	echo $ifindex > $(DEBUGFS_DIR $instance)/hwstats/$type/$action
+}
+
+nsim_hwstats_enable()
+{
+	nsim_hwstats_toggle enable_ifindex "$@"
+}
+
+nsim_hwstats_disable()
+{
+	nsim_hwstats_toggle disable_ifindex "$@"
+}
+
+nsim_hwstats_fail_next_enable()
+{
+	nsim_hwstats_toggle fail_next_enable "$@"
+}
+
+setup_prepare()
+{
+	modprobe netdevsim &> /dev/null
+	nsim_add 1
+	nsim_add 2
+	nsim_add 3
+
+	ip netns add testns1
+
+	if [ $? -ne 0 ]; then
+		echo "Failed to add netns \"testns1\""
+		exit 1
+	fi
+
+	nsim_reload 1 testns1
+	nsim_reload 2 testns1
+	nsim_reload 3 testns1
+
+	IP="ip -n testns1"
+
+	$IP link add name dummy1 type dummy
+	$IP link set dev dummy1 up
+	DUMMY_IFINDEX=$($IP -j link show dev dummy1 | jq '.[].ifindex')
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	$IP link del name dummy1
+	ip netns del testns1
+	nsim_del 3
+	nsim_del 2
+	nsim_del 1
+	modprobe -r netdevsim &> /dev/null
+}
+
+netdev_hwstats_used()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+
+	$IP -j stats show dev "$netdev" group offload subgroup hw_stats_info |
+	    jq '.[].info.l3_stats.used'
+}
+
+netdev_check_used()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+
+	[[ $(netdev_hwstats_used $netdev $type) == "true" ]]
+}
+
+netdev_check_unused()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+
+	[[ $(netdev_hwstats_used $netdev $type) == "false" ]]
+}
+
+netdev_hwstats_request()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+
+	$IP -j stats show dev "$netdev" group offload subgroup hw_stats_info |
+	    jq ".[].info.${type}_stats.request"
+}
+
+netdev_check_requested()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+
+	[[ $(netdev_hwstats_request $netdev $type) == "true" ]]
+}
+
+netdev_check_unrequested()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+
+	[[ $(netdev_hwstats_request $netdev $type) == "false" ]]
+}
+
+reporting_test()
+{
+	local type=$1; shift
+	local instance=1
+
+	RET=0
+
+	[[ -n $(netdev_hwstats_used dummy1 $type) ]]
+	check_err $? "$type stats not reported"
+
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used before either device or netdevsim request"
+
+	nsim_hwstats_enable $instance dummy1 $type
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used before device request"
+	netdev_check_unrequested dummy1 $type
+	check_err $? "$type stats reported as requested before device request"
+
+	$IP stats set dev dummy1 ${type}_stats on
+	netdev_check_used dummy1 $type
+	check_err $? "$type stats reported as not used after both device and netdevsim request"
+	netdev_check_requested dummy1 $type
+	check_err $? "$type stats reported as not requested after device request"
+
+	nsim_hwstats_disable $instance dummy1 $type
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used after netdevsim request withdrawn"
+
+	nsim_hwstats_enable $instance dummy1 $type
+	netdev_check_used dummy1 $type
+	check_err $? "$type stats reported as not used after netdevsim request reenabled"
+
+	$IP stats set dev dummy1 ${type}_stats off
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used after device request withdrawn"
+	netdev_check_unrequested dummy1 $type
+	check_err $? "$type stats reported as requested after device request withdrawn"
+
+	nsim_hwstats_disable $instance dummy1 $type
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used after both requests withdrawn"
+
+	log_test "Reporting of $type stats usage"
+}
+
+l3_reporting_test()
+{
+	reporting_test l3
+}
+
+__fail_next_test()
+{
+	local instance=$1; shift
+	local type=$1; shift
+
+	RET=0
+
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used before either device or netdevsim request"
+
+	nsim_hwstats_enable $instance dummy1 $type
+	nsim_hwstats_fail_next_enable $instance dummy1 $type
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used before device request"
+	netdev_check_unrequested dummy1 $type
+	check_err $? "$type stats reported as requested before device request"
+
+	$IP stats set dev dummy1 ${type}_stats on 2>/dev/null
+	check_fail $? "$type stats request not bounced as it should have been"
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used after bounce"
+	netdev_check_unrequested dummy1 $type
+	check_err $? "$type stats reported as requested after bounce"
+
+	$IP stats set dev dummy1 ${type}_stats on
+	check_err $? "$type stats request failed when it shouldn't have"
+	netdev_check_used dummy1 $type
+	check_err $? "$type stats reported as not used after both device and netdevsim request"
+	netdev_check_requested dummy1 $type
+	check_err $? "$type stats reported as not requested after device request"
+
+	$IP stats set dev dummy1 ${type}_stats off
+	nsim_hwstats_disable $instance dummy1 $type
+
+	log_test "Injected failure of $type stats enablement (netdevsim #$instance)"
+}
+
+fail_next_test()
+{
+	__fail_next_test 1 "$@"
+	__fail_next_test 2 "$@"
+	__fail_next_test 3 "$@"
+}
+
+l3_fail_next_test()
+{
+	fail_next_test l3
+}
+
+get_hwstat()
+{
+	local netdev=$1; shift
+	local type=$1; shift
+	local selector=$1; shift
+
+	$IP -j stats show dev $netdev group offload subgroup ${type}_stats |
+		  jq ".[0].stats64.${selector}"
+}
+
+counter_test()
+{
+	local type=$1; shift
+	local instance=1
+
+	RET=0
+
+	nsim_hwstats_enable $instance dummy1 $type
+	$IP stats set dev dummy1 ${type}_stats on
+	netdev_check_used dummy1 $type
+	check_err $? "$type stats reported as not used after both device and netdevsim request"
+
+	# Netdevsim counts 10pps on ingress. We should see maybe a couple
+	# packets, unless things take a reeealy long time.
+	local pkts=$(get_hwstat dummy1 l3 rx.packets)
+	((pkts < 10))
+	check_err $? "$type stats show >= 10 packets after first enablement"
+
+	sleep 2.5
+
+	local pkts=$(get_hwstat dummy1 l3 rx.packets)
+	((pkts >= 20))
+	check_err $? "$type stats show < 20 packets after 2.5s passed"
+
+	$IP stats set dev dummy1 ${type}_stats off
+
+	sleep 2
+
+	$IP stats set dev dummy1 ${type}_stats on
+	local pkts=$(get_hwstat dummy1 l3 rx.packets)
+	((pkts < 10))
+	check_err $? "$type stats show >= 10 packets after second enablement"
+
+	$IP stats set dev dummy1 ${type}_stats off
+	nsim_hwstats_fail_next_enable $instance dummy1 $type
+	$IP stats set dev dummy1 ${type}_stats on 2>/dev/null
+	check_fail $? "$type stats request not bounced as it should have been"
+
+	sleep 2
+
+	$IP stats set dev dummy1 ${type}_stats on
+	local pkts=$(get_hwstat dummy1 l3 rx.packets)
+	((pkts < 10))
+	check_err $? "$type stats show >= 10 packets after post-fail enablement"
+
+	$IP stats set dev dummy1 ${type}_stats off
+
+	log_test "Counter values in $type stats"
+}
+
+l3_counter_test()
+{
+	counter_test l3
+}
+
+rollback_test()
+{
+	local type=$1; shift
+
+	RET=0
+
+	nsim_hwstats_enable 1 dummy1 l3
+	nsim_hwstats_enable 2 dummy1 l3
+	nsim_hwstats_enable 3 dummy1 l3
+
+	# The three netdevsim instances are registered in order of their number
+	# one after another. It is reasonable to expect that whatever
+	# notifications take place hit no. 2 in between hitting nos. 1 and 3,
+	# whatever the actual order. This allows us to test that a fail caused
+	# by no. 2 does not leave the system in a partial state, and rolls
+	# everything back.
+
+	nsim_hwstats_fail_next_enable 2 dummy1 l3
+	$IP stats set dev dummy1 ${type}_stats on 2>/dev/null
+	check_fail $? "$type stats request not bounced as it should have been"
+
+	netdev_check_unused dummy1 $type
+	check_err $? "$type stats reported as used after bounce"
+	netdev_check_unrequested dummy1 $type
+	check_err $? "$type stats reported as requested after bounce"
+
+	sleep 2
+
+	$IP stats set dev dummy1 ${type}_stats on
+	check_err $? "$type stats request not upheld as it should have been"
+
+	local pkts=$(get_hwstat dummy1 l3 rx.packets)
+	((pkts < 10))
+	check_err $? "$type stats show $pkts packets after post-fail enablement"
+
+	$IP stats set dev dummy1 ${type}_stats off
+
+	nsim_hwstats_disable 3 dummy1 l3
+	nsim_hwstats_disable 2 dummy1 l3
+	nsim_hwstats_disable 1 dummy1 l3
+
+	log_test "Failure in $type stats enablement rolled back"
+}
+
+l3_rollback_test()
+{
+	rollback_test l3
+}
+
+l3_monitor_test()
+{
+	hw_stats_monitor_test dummy1 l3		   \
+		"nsim_hwstats_enable 1 dummy1 l3"  \
+		"nsim_hwstats_disable 1 dummy1 l3" \
+		"$IP"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh b/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh
new file mode 100755
index 000000000000..98033e6667d2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+NSIM_NETDEV=$(make_netdev)
+MACSEC_NETDEV=macsec_nsim
+
+set -o pipefail
+
+if ! ethtool -k $NSIM_NETDEV | grep -q 'macsec-hw-offload: on'; then
+    echo "SKIP: netdevsim doesn't support MACsec offload"
+    exit 4
+fi
+
+if ! ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac 2>/dev/null; then
+    echo "SKIP: couldn't create macsec device"
+    exit 4
+fi
+ip link del $MACSEC_NETDEV
+
+#
+# test macsec offload API
+#
+
+ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}" type macsec port 4 offload mac
+check $?
+
+ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}2" type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac
+check $?
+
+ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}3" type macsec sci abbacdde01020304 offload mac
+check $?
+
+ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}4" type macsec port 8 offload mac 2> /dev/null
+check $? '' '' 1
+
+ip macsec add "${MACSEC_NETDEV}" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+check $?
+
+ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef"
+check $?
+
+ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \
+    key 00 0123456789abcdef0123456789abcdef
+check $?
+
+ip macsec add "${MACSEC_NETDEV}" rx port 1235 address "1c:ed:de:ad:be:ef" 2> /dev/null
+check $? '' '' 1
+
+# can't disable macsec offload when SAs are configured
+ip link set "${MACSEC_NETDEV}" type macsec offload off 2> /dev/null
+check $? '' '' 1
+
+ip macsec offload "${MACSEC_NETDEV}" off 2> /dev/null
+check $? '' '' 1
+
+# toggle macsec offload via rtnetlink
+ip link set "${MACSEC_NETDEV}2" type macsec offload off
+check $?
+
+ip link set "${MACSEC_NETDEV}2" type macsec offload mac
+check $?
+
+# toggle macsec offload via genetlink
+ip macsec offload "${MACSEC_NETDEV}2" off
+check $?
+
+ip macsec offload "${MACSEC_NETDEV}2" mac
+check $?
+
+for dev in ${MACSEC_NETDEV}{,2,3} ; do
+    ip link del $dev
+    check $?
+done
+
+
+#
+# test ethtool features when toggling offload
+#
+
+ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac
+TMP_FEATS_ON_1="$(ethtool -k $MACSEC_NETDEV)"
+
+ip link set $MACSEC_NETDEV type macsec offload off
+TMP_FEATS_OFF_1="$(ethtool -k $MACSEC_NETDEV)"
+
+ip link set $MACSEC_NETDEV type macsec offload mac
+TMP_FEATS_ON_2="$(ethtool -k $MACSEC_NETDEV)"
+
+[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_2" ]
+check $?
+
+ip link del $MACSEC_NETDEV
+
+ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec
+check $?
+
+TMP_FEATS_OFF_2="$(ethtool -k $MACSEC_NETDEV)"
+[ "$TMP_FEATS_OFF_1" = "$TMP_FEATS_OFF_2" ]
+check $?
+
+ip link set $MACSEC_NETDEV type macsec offload mac
+check $?
+
+TMP_FEATS_ON_3="$(ethtool -k $MACSEC_NETDEV)"
+[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_3" ]
+check $?
+
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
new file mode 100755
index 000000000000..01d0c044a5fc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -0,0 +1,1058 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the nexthop offload API. It makes use of netdevsim
+# which registers a listener to the nexthop notification chain.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	nexthop_single_add_test
+	nexthop_single_add_err_test
+	nexthop_group_add_test
+	nexthop_group_add_err_test
+	nexthop_res_group_add_test
+	nexthop_res_group_add_err_test
+	nexthop_group_replace_test
+	nexthop_group_replace_err_test
+	nexthop_res_group_replace_test
+	nexthop_res_group_replace_err_test
+	nexthop_res_group_idle_timer_test
+	nexthop_res_group_idle_timer_del_test
+	nexthop_res_group_increase_idle_timer_test
+	nexthop_res_group_decrease_idle_timer_test
+	nexthop_res_group_unbalanced_timer_test
+	nexthop_res_group_unbalanced_timer_del_test
+	nexthop_res_group_no_unbalanced_timer_test
+	nexthop_res_group_short_unbalanced_timer_test
+	nexthop_res_group_increase_unbalanced_timer_test
+	nexthop_res_group_decrease_unbalanced_timer_test
+	nexthop_res_group_force_migrate_busy_test
+	nexthop_single_replace_test
+	nexthop_single_replace_err_test
+	nexthop_single_in_group_replace_test
+	nexthop_single_in_group_replace_err_test
+	nexthop_single_in_res_group_replace_test
+	nexthop_single_in_res_group_replace_err_test
+	nexthop_single_in_group_delete_test
+	nexthop_single_in_group_delete_err_test
+	nexthop_single_in_res_group_delete_test
+	nexthop_single_in_res_group_delete_err_test
+	nexthop_replay_test
+	nexthop_replay_err_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
+nexthop_check()
+{
+	local nharg="$1"; shift
+	local expected="$1"; shift
+
+	out=$($IP nexthop show ${nharg} | sed -e 's/ *$//')
+	if [[ "$out" != "$expected" ]]; then
+		return 1
+	fi
+
+	return 0
+}
+
+nexthop_bucket_nhid_count_check()
+{
+	local group_id=$1; shift
+	local expected
+	local count
+	local nhid
+	local ret
+
+	while (($# > 0)); do
+		nhid=$1; shift
+		expected=$1; shift
+
+		count=$($IP nexthop bucket show id $group_id nhid $nhid |
+			grep "trap" | wc -l)
+		if ((expected != count)); then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
+nexthop_resource_check()
+{
+	local expected_occ=$1; shift
+
+	occ=$($DEVLINK -jp resource show $DEVLINK_DEV \
+		| jq '.[][][] | select(.name=="nexthops") | .["occ"]')
+
+	if [ $expected_occ -ne $occ ]; then
+		return 1
+	fi
+
+	return 0
+}
+
+nexthop_resource_set()
+{
+	local size=$1; shift
+
+	$DEVLINK resource set $DEVLINK_DEV path nexthops size $size
+	$DEVLINK dev reload $DEVLINK_DEV
+}
+
+nexthop_single_add_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry"
+
+	nexthop_resource_check 1
+	check_err $? "Wrong nexthop occupancy"
+
+	$IP nexthop del id 1
+	nexthop_resource_check 0
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	log_test "Single nexthop add and delete"
+}
+
+nexthop_single_add_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 1
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null
+	check_fail $? "Nexthop addition succeeded when should fail"
+
+	nexthop_resource_check 1
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop add failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_group_add_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	$IP nexthop add id 10 group 1/2
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_resource_check 4
+	check_err $? "Wrong nexthop occupancy"
+
+	$IP nexthop del id 10
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	$IP nexthop add id 10 group 1,20/2,39
+	nexthop_check "id 10" "id 10 group 1,20/2,39 trap"
+	check_err $? "Unexpected weighted nexthop group entry"
+
+	nexthop_resource_check 61
+	check_err $? "Wrong weighted nexthop occupancy"
+
+	$IP nexthop del id 10
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	log_test "Nexthop group add and delete"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_group_add_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 2
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	$IP nexthop add id 10 group 1/2 &> /dev/null
+	check_fail $? "Nexthop group addition succeeded when should fail"
+
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Nexthop group add failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_res_group_add_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4
+	nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10 1 2
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 2 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 6
+	check_err $? "Wrong nexthop occupancy"
+
+	$IP nexthop del id 10
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	$IP nexthop add id 10 group 1,3/2,2 type resilient buckets 5
+	nexthop_check "id 10" "id 10 group 1,3/2,2 type resilient buckets 5 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected weighted nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10 1 3
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 2 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 7
+	check_err $? "Wrong weighted nexthop occupancy"
+
+	$IP nexthop del id 10
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	log_test "Resilient nexthop group add and delete"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_add_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 2
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4 &> /dev/null
+	check_fail $? "Nexthop group addition succeeded when should fail"
+
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Resilient nexthop group add failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_group_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop replace id 10 group 1/2/3
+	nexthop_check "id 10" "id 10 group 1/2/3 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_resource_check 6
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Nexthop group replace"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_group_replace_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 5
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop replace id 10 group 1/2/3 &> /dev/null
+	check_fail $? "Nexthop group replacement succeeded when should fail"
+
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry after failure"
+
+	nexthop_resource_check 5
+	check_err $? "Wrong nexthop occupancy after failure"
+
+	log_test "Nexthop group replace failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_res_group_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+	$IP nexthop replace id 10 group 1/2/3 type resilient
+	nexthop_check "id 10" "id 10 group 1/2/3 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10 1 2
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 2 2
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 3 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 9
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Resilient nexthop group replace"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_replace_err_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+	ip netns exec testns1 \
+		echo 1 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+	$IP nexthop replace id 10 group 1/2/3 type resilient &> /dev/null
+	check_fail $? "Nexthop group replacement succeeded when should fail"
+
+	nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry after failure"
+
+	nexthop_bucket_nhid_count_check 10 1 3
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 2 3
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 9
+	check_err $? "Wrong nexthop occupancy after failure"
+
+	log_test "Resilient nexthop group replace failure"
+
+	$IP nexthop flush &> /dev/null
+	ip netns exec testns1 \
+		echo 0 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+}
+
+nexthop_res_mark_buckets_busy()
+{
+	local group_id=$1; shift
+	local nhid=$1; shift
+	local count=$1; shift
+	local index
+
+	for index in $($IP -j nexthop bucket show id $group_id nhid $nhid |
+		       jq '.[].bucket.index' | head -n ${count:--0})
+	do
+		echo $group_id $index \
+			> $DEBUGFS_NET_DIR/fib/nexthop_bucket_activity
+	done
+}
+
+nexthop_res_num_nhid_buckets()
+{
+	local group_id=$1; shift
+	local nhid=$1; shift
+
+	$IP -j nexthop bucket show id $group_id nhid $nhid | jq length
+}
+
+nexthop_res_group_idle_timer_test()
+{
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 8 idle_timer 4
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10  1 4  2 4
+	check_err $? "Group expected to be unbalanced"
+
+	sleep 6
+
+	nexthop_bucket_nhid_count_check 10  1 2  2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after idle timer"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_idle_timer_del_test()
+{
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1,50/2,50/3,1 \
+	    type resilient buckets 8 idle_timer 6
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+	nexthop_bucket_nhid_count_check 10  1 4  2 4  3 0
+	check_err $? "Group expected to be unbalanced"
+
+	sleep 4
+
+	# Deletion prompts group replacement. Check that the bucket timers
+	# are kept.
+	$IP nexthop delete id 3
+
+	nexthop_bucket_nhid_count_check 10  1 4  2 4
+	check_err $? "Group expected to still be unbalanced"
+
+	sleep 4
+
+	nexthop_bucket_nhid_count_check 10  1 2  2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after idle timer (with delete)"
+
+	$IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_test()
+{
+	local timer=$1; shift
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 4
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	sleep 2
+	$IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+	sleep 4
+
+	# 6 seconds, past the original timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group still expected to be unbalanced"
+
+	sleep 4
+
+	# 10 seconds, past the new timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after $timer increase"
+
+	$IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_decrease_timer_test()
+{
+	local timer=$1; shift
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 8
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	sleep 2
+	$IP nexthop replace id 10 group 1/2,3 type resilient $timer 4
+	sleep 4
+
+	# 6 seconds, past the new timer, before the old timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after $timer decrease"
+
+	$IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_del_test()
+{
+	local timer=$1; shift
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1,100/2,100/3,1 \
+	    type resilient buckets 8 $timer 4
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1,100/2,300/3,1 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	sleep 2
+	$IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+	sleep 4
+
+	# 6 seconds, past the original timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group still expected to be unbalanced"
+
+	sleep 4
+
+	# 10 seconds, past the new timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after $timer increase"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_idle_timer_test()
+{
+	__nexthop_res_group_increase_timer_test idle_timer
+}
+
+nexthop_res_group_decrease_idle_timer_test()
+{
+	__nexthop_res_group_decrease_timer_test idle_timer
+}
+
+nexthop_res_group_unbalanced_timer_test()
+{
+	local i
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient \
+	    buckets 8 idle_timer 6 unbalanced_timer 10
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	for i in 1 2; do
+		sleep 4
+		nexthop_bucket_nhid_count_check 10  1 4  2 4
+		check_err $? "$i: Group expected to be unbalanced"
+		nexthop_res_mark_buckets_busy 10 1
+	done
+
+	# 3 x sleep 4 > unbalanced timer 10
+	sleep 4
+	nexthop_bucket_nhid_count_check 10  1 2  2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after unbalanced timer"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_unbalanced_timer_del_test()
+{
+	local i
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1,50/2,50/3,1 type resilient \
+	    buckets 8 idle_timer 6 unbalanced_timer 10
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+	# Check that NH delete does not reset unbalanced time.
+	sleep 4
+	$IP nexthop delete id 3
+	nexthop_bucket_nhid_count_check 10  1 4  2 4
+	check_err $? "1: Group expected to be unbalanced"
+	nexthop_res_mark_buckets_busy 10 1
+
+	sleep 4
+	nexthop_bucket_nhid_count_check 10  1 4  2 4
+	check_err $? "2: Group expected to be unbalanced"
+	nexthop_res_mark_buckets_busy 10 1
+
+	# 3 x sleep 4 > unbalanced timer 10
+	sleep 4
+	nexthop_bucket_nhid_count_check 10  1 2  2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after unbalanced timer (with delete)"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_no_unbalanced_timer_test()
+{
+	local i
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 8
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	for i in $(seq 3); do
+		sleep 60
+		nexthop_bucket_nhid_count_check 10 2 6
+		check_fail $? "$i: Group expected to be unbalanced"
+		nexthop_res_mark_buckets_busy 10 1
+	done
+
+	log_test "Buckets never force-migrated without unbalanced timer"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_short_unbalanced_timer_test()
+{
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient \
+	    buckets 8 idle_timer 120 unbalanced_timer 4
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	sleep 5
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after unbalanced < idle timer"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_unbalanced_timer_test()
+{
+	__nexthop_res_group_increase_timer_test unbalanced_timer
+}
+
+nexthop_res_group_decrease_unbalanced_timer_test()
+{
+	__nexthop_res_group_decrease_timer_test unbalanced_timer
+}
+
+nexthop_res_group_force_migrate_busy_test()
+{
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient \
+	    buckets 8 idle_timer 120
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	$IP nexthop replace id 10 group 2 type resilient
+	nexthop_bucket_nhid_count_check 10 2 8
+	check_err $? "All buckets expected to have migrated"
+
+	log_test "Busy buckets force-migrated when NH removed"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+	$IP nexthop replace id 1 via 192.0.2.3 dev dummy1
+	nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry"
+
+	nexthop_resource_check 1
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_replace_err_test()
+{
+	RET=0
+
+	# This is supposed to cause the replace to fail because the new nexthop
+	# is programmed before deleting the replaced one.
+	nexthop_resource_set 1
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+	$IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null
+	check_fail $? "Nexthop replace succeeded when should fail"
+
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after failure"
+
+	nexthop_resource_check 1
+	check_err $? "Wrong nexthop occupancy after failure"
+
+	log_test "Single nexthop replace failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_single_in_group_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+	check_err $? "Failed to replace nexthop when should not"
+
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_resource_check 4
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace while in group"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_group_replace_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 5
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+	check_fail $? "Nexthop replacement succeeded when should fail"
+
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after failure"
+
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry after failure"
+
+	nexthop_resource_check 4
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace while in group failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_single_in_res_group_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+	$IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+	check_err $? "Failed to replace nexthop when should not"
+
+	nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10  1 2  2 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 6
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace while in resilient group"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_replace_err_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+	ip netns exec testns1 \
+		echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+	$IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+	check_fail $? "Nexthop replacement succeeded when should fail"
+
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after failure"
+
+	nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry after failure"
+
+	nexthop_bucket_nhid_count_check 10  1 2  2 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 6
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace while in resilient group failure"
+
+	$IP nexthop flush &> /dev/null
+	ip netns exec testns1 \
+		echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
+nexthop_single_in_group_delete_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop del id 1
+	nexthop_check "id 10" "id 10 group 2 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop delete while in group"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_group_delete_err_test()
+{
+	RET=0
+
+	# First, nexthop 1 will be deleted, which will reduce the occupancy to
+	# 5. Afterwards, a replace notification will be sent for nexthop group
+	# 10 with only two nexthops. Since the new group is allocated before
+	# the old is deleted, the replacement will fail as it will result in an
+	# occupancy of 7.
+	nexthop_resource_set 6
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2/3
+
+	$IP nexthop del id 1
+
+	nexthop_resource_check 5
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop delete while in group failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_single_in_res_group_delete_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+	$IP nexthop del id 1
+	nexthop_check "id 10" "id 10 group 2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10 2 4
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 5
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop delete while in resilient group"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_delete_err_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2/3 type resilient buckets 6
+
+	ip netns exec testns1 \
+		echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+	$IP nexthop del id 1
+
+	# We failed to replace the two nexthop buckets that were originally
+	# assigned to nhid 1.
+	nexthop_bucket_nhid_count_check 10  2 2  3 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 8
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop delete while in resilient group failure"
+
+	$IP nexthop flush &> /dev/null
+	ip netns exec testns1 \
+		echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
+nexthop_replay_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$DEVLINK dev reload $DEVLINK_DEV
+	check_err $? "Failed to reload when should not"
+
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after reload"
+
+	nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after reload"
+
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry after reload"
+
+	nexthop_resource_check 4
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Nexthop replay"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_replay_err_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	# Reduce size of nexthop resource so that reload will fail.
+	$DEVLINK resource set $DEVLINK_DEV path nexthops size 3
+	$DEVLINK dev reload $DEVLINK_DEV &> /dev/null
+	check_fail $? "Reload succeeded when should fail"
+
+	$DEVLINK resource set $DEVLINK_DEV path nexthops size 9999
+	$DEVLINK dev reload $DEVLINK_DEV
+	check_err $? "Failed to reload when should not"
+
+	log_test "Nexthop replay failure"
+
+	$IP nexthop flush &> /dev/null
+}
+
+setup_prepare()
+{
+	local netdev
+
+	modprobe netdevsim &> /dev/null
+
+	echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+	while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+	set -e
+
+	ip netns add testns1
+	devlink dev reload $DEVLINK_DEV netns testns1
+
+	IP="ip -netns testns1"
+	DEVLINK="devlink -N testns1"
+
+	$IP link add name dummy1 up type dummy
+	$IP address add 192.0.2.1/24 dev dummy1
+
+	set +e
+}
+
+cleanup()
+{
+	pre_cleanup
+	ip netns del testns1
+	echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+	modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+xfail_on_slow tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
new file mode 100755
index 000000000000..7f32b5600925
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+lib_dir=$(dirname $0)/../../../net
+source $lib_dir/lib.sh
+
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+socat_check()
+{
+	if [ ! -x "$(command -v socat)" ]; then
+		echo "socat command not found. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+setup_ns()
+{
+	set -e
+	ip netns add nssv
+	ip netns add nscl
+
+	NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \
+		-path $NSIM_DEV_1_SYS/net -exec basename {} \;)
+	NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \
+		-path $NSIM_DEV_2_SYS/net -exec basename {} \;)
+
+	ip link set $NSIM_DEV_1_NAME netns nssv
+	ip link set $NSIM_DEV_2_NAME netns nscl
+
+	ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME
+	ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME
+
+	ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up
+	ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up
+	set +e
+}
+
+cleanup_ns()
+{
+	ip netns del nscl
+	ip netns del nssv
+}
+
+###
+### Code start
+###
+
+socat_check || exit 4
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_DEV_1_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_1_FD}</var/run/netns/nssv
+NSIM_DEV_1_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_DEV_1_NAME/ifindex)
+
+NSIM_DEV_2_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_2_FD}</var/run/netns/nscl
+NSIM_DEV_2_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_DEV_2_NAME/ifindex)
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:2000" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+	echo "linking with non-existent netdevsim should fail"
+	cleanup_ns
+	exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX 2000:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+	echo "linking with non-existent netnsid should fail"
+	cleanup_ns
+	exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+	echo "linking with self should fail"
+	cleanup_ns
+	exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK
+if [ $? -ne 0 ]; then
+	echo "linking netdevsim1 with netdevsim2 should succeed"
+	cleanup_ns
+	exit 1
+fi
+
+# argument error checking
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:a" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+	echo "invalid arg should fail"
+	cleanup_ns
+	exit 1
+fi
+
+# send/recv packets
+
+tmp_file=$(mktemp)
+ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file &
+pid=$!
+res=0
+
+wait_local_port_listen nssv 1234 tcp
+
+echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234
+
+count=$(cat $tmp_file | wc -c)
+if [[ $count -ne 3 ]]; then
+	echo "expected 3 bytes, got $count"
+	res=1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL
+
+kill $pid
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit $res
diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
new file mode 100755
index 000000000000..e689ff7a0b12
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the psample module. It makes use of netdevsim
+# which periodically generates "sampled" packets.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	psample_enable_test
+	psample_group_num_test
+	psample_md_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/
+CAPTURE_FILE=$(mktemp)
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+psample_capture()
+{
+	rm -f $CAPTURE_FILE
+
+	timeout 2 ip netns exec testns1 psample &> $CAPTURE_FILE
+}
+
+psample_enable_test()
+{
+	RET=0
+
+	echo 1 > $PSAMPLE_DIR/enable
+	check_err $? "Failed to enable sampling when should not"
+
+	echo 1 > $PSAMPLE_DIR/enable 2>/dev/null
+	check_fail $? "Sampling enablement succeeded when should fail"
+
+	psample_capture
+	if [ $(cat $CAPTURE_FILE | wc -l) -eq 0 ]; then
+		check_err 1 "Failed to capture sampled packets"
+	fi
+
+	echo 0 > $PSAMPLE_DIR/enable
+	check_err $? "Failed to disable sampling when should not"
+
+	echo 0 > $PSAMPLE_DIR/enable 2>/dev/null
+	check_fail $? "Sampling disablement succeeded when should fail"
+
+	psample_capture
+	if [ $(cat $CAPTURE_FILE | wc -l) -ne 0 ]; then
+		check_err 1 "Captured sampled packets when should not"
+	fi
+
+	log_test "psample enable / disable"
+}
+
+psample_group_num_test()
+{
+	RET=0
+
+	echo 1234 > $PSAMPLE_DIR/group_num
+	echo 1 > $PSAMPLE_DIR/enable
+
+	psample_capture
+	grep -q -e "group 1234" $CAPTURE_FILE
+	check_err $? "Sampled packets reported with wrong group number"
+
+	# New group number should only be used after disable / enable.
+	echo 4321 > $PSAMPLE_DIR/group_num
+
+	psample_capture
+	grep -q -e "group 4321" $CAPTURE_FILE
+	check_fail $? "Group number changed while sampling is active"
+
+	echo 0 > $PSAMPLE_DIR/enable && echo 1 > $PSAMPLE_DIR/enable
+
+	psample_capture
+	grep -q -e "group 4321" $CAPTURE_FILE
+	check_err $? "Group number did not change after restarting sampling"
+
+	log_test "psample group number"
+
+	echo 0 > $PSAMPLE_DIR/enable
+}
+
+psample_md_test()
+{
+	RET=0
+
+	echo 1 > $PSAMPLE_DIR/enable
+
+	echo 1234 > $PSAMPLE_DIR/in_ifindex
+	echo 4321 > $PSAMPLE_DIR/out_ifindex
+	psample_capture
+
+	grep -q -e "in-ifindex 1234" $CAPTURE_FILE
+	check_err $? "Sampled packets reported with wrong in-ifindex"
+
+	grep -q -e "out-ifindex 4321" $CAPTURE_FILE
+	check_err $? "Sampled packets reported with wrong out-ifindex"
+
+	echo 5 > $PSAMPLE_DIR/out_tc
+	psample_capture
+
+	grep -q -e "out-tc 5" $CAPTURE_FILE
+	check_err $? "Sampled packets reported with wrong out-tc"
+
+	echo $((2**16 - 1)) > $PSAMPLE_DIR/out_tc
+	psample_capture
+
+	grep -q -e "out-tc " $CAPTURE_FILE
+	check_fail $? "Sampled packets reported with out-tc when should not"
+
+	echo 1 > $PSAMPLE_DIR/out_tc
+	echo 10000 > $PSAMPLE_DIR/out_tc_occ_max
+	psample_capture
+
+	grep -q -e "out-tc-occ " $CAPTURE_FILE
+	check_err $? "Sampled packets not reported with out-tc-occ when should"
+
+	echo 0 > $PSAMPLE_DIR/out_tc_occ_max
+	psample_capture
+
+	grep -q -e "out-tc-occ " $CAPTURE_FILE
+	check_fail $? "Sampled packets reported with out-tc-occ when should not"
+
+	echo 10000 > $PSAMPLE_DIR/latency_max
+	psample_capture
+
+	grep -q -e "latency " $CAPTURE_FILE
+	check_err $? "Sampled packets not reported with latency when should"
+
+	echo 0 > $PSAMPLE_DIR/latency_max
+	psample_capture
+
+	grep -q -e "latency " $CAPTURE_FILE
+	check_fail $? "Sampled packets reported with latency when should not"
+
+	log_test "psample metadata"
+
+	echo 0 > $PSAMPLE_DIR/enable
+}
+
+setup_prepare()
+{
+	modprobe netdevsim &> /dev/null
+
+	echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+	while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+	set -e
+
+	ip netns add testns1
+	devlink dev reload $DEVLINK_DEV netns testns1
+
+	set +e
+}
+
+cleanup()
+{
+	pre_cleanup
+	rm -f $CAPTURE_FILE
+	ip netns del testns1
+	echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+	modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/settings b/tools/testing/selftests/drivers/net/netdevsim/settings
new file mode 100644
index 000000000000..a62d2fa1275c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/settings
@@ -0,0 +1 @@
+timeout=600
diff --git a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
new file mode 100755
index 000000000000..b411fe66510f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+set -o pipefail
+
+n_children() {
+    n=$(tc qdisc show dev $NDEV | grep '^qdisc' | wc -l)
+    echo $((n - 1))
+}
+
+tcq() {
+    tc qdisc $1 dev $NDEV ${@:2}
+}
+
+n_child_assert() {
+    n=$(n_children)
+    if [ $n -ne $1 ]; then
+	echo "ERROR ($root): ${@:2}, expected $1 have $n"
+	((num_errors++))
+    else
+	((num_passes++))
+    fi
+}
+
+
+for root in mq mqprio; do
+    NDEV=$(make_netdev 1 4)
+
+    opts=
+    [ $root == "mqprio" ] && opts='hw 0 num_tc 1 map 0 0 0 0  queues 1@0'
+
+    tcq add root handle 100: $root $opts
+    n_child_assert 4 'Init'
+
+    # All defaults
+
+    for n in 3 2 1 2 3 4 1 4; do
+	ethtool -L $NDEV combined $n
+	n_child_assert $n "Change queues to $n while down"
+    done
+
+    ip link set dev $NDEV up
+
+    for n in 3 2 1 2 3 4 1 4; do
+	ethtool -L $NDEV combined $n
+	n_child_assert $n "Change queues to $n while up"
+    done
+
+    # One real one
+    tcq replace parent 100:4 handle 204: pfifo_fast
+    n_child_assert 4 "One real queue"
+
+    ethtool -L $NDEV combined 1
+    n_child_assert 2 "One real queue, one default"
+
+    ethtool -L $NDEV combined 4
+    n_child_assert 4 "One real queue, rest default"
+
+    # Remove real one
+    tcq del parent 100:4 handle 204:
+
+    # Replace default with pfifo
+    tcq replace parent 100:1 handle 205: pfifo limit 1000
+    n_child_assert 3 "Deleting real one, replacing default one with pfifo"
+
+    ethtool -L $NDEV combined 1
+    n_child_assert 1 "Grafted, one"
+
+    cleanup_nsim
+done
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
new file mode 100755
index 000000000000..4c859ecdad94
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -0,0 +1,942 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+VNI_GEN=$RANDOM
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID
+NSIM_NETDEV=
+HAS_ETHTOOL=
+STATIC_ENTRIES=
+EXIT_STATUS=0
+num_cases=0
+num_errors=0
+
+clean_up_devs=( )
+
+function err_cnt {
+    echo "ERROR:" $@
+    EXIT_STATUS=1
+    ((num_errors++))
+    ((num_cases++))
+}
+
+function pass_cnt {
+    ((num_cases++))
+}
+
+function cleanup_tuns {
+    for dev in "${clean_up_devs[@]}"; do
+	[ -e /sys/class/net/$dev ] && ip link del dev $dev
+    done
+    clean_up_devs=( )
+}
+
+function cleanup_nsim {
+    if [ -e $NSIM_DEV_SYS ]; then
+	echo $NSIM_ID > /sys/bus/netdevsim/del_device
+    fi
+}
+
+function cleanup {
+    cleanup_tuns
+    cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function new_vxlan {
+    local dev=$1
+    local dstport=$2
+    local lower=$3
+    local ipver=$4
+    local flags=$5
+
+    local group ipfl
+
+    [ "$ipver" != '6' ] && group=239.1.1.1 || group=fff1::1
+    [ "$ipver" != '6' ] || ipfl="-6"
+
+    [[ ! "$flags" =~ "external" ]] && flags="$flags id $((VNI_GEN++))"
+
+    ip $ipfl link add $dev type vxlan \
+       group $group \
+       dev $lower \
+       dstport $dstport \
+       $flags
+
+    ip link set dev $dev up
+
+    clean_up_devs=("${clean_up_devs[@]}" $dev)
+
+    check_tables
+}
+
+function new_geneve {
+    local dev=$1
+    local dstport=$2
+    local ipver=$3
+    local flags=$4
+
+    local group ipfl
+
+    [ "$ipver" != '6' ] && remote=1.1.1.2 || group=::2
+    [ "$ipver" != '6' ] || ipfl="-6"
+
+    [[ ! "$flags" =~ "external" ]] && flags="$flags vni $((VNI_GEN++))"
+
+    ip $ipfl link add $dev type geneve \
+       remote $remote  \
+       dstport $dstport \
+       $flags
+
+    ip link set dev $dev up
+
+    clean_up_devs=("${clean_up_devs[@]}" $dev)
+
+    check_tables
+}
+
+function del_dev {
+    local dev=$1
+
+    ip link del dev $dev
+    check_tables
+}
+
+# Helpers for netdevsim port/type encoding
+function mke {
+    local port=$1
+    local type=$2
+
+    echo $((port << 16 | type))
+}
+
+function pre {
+    local val=$1
+
+    echo -e "port: $((val >> 16))\ttype: $((val & 0xffff))"
+}
+
+function pre_ethtool {
+    local val=$1
+    local port=$((val >> 16))
+    local type=$((val & 0xffff))
+
+    case $type in
+	1)
+	    type_name="vxlan"
+	    ;;
+	2)
+	    type_name="geneve"
+	    ;;
+	4)
+	    type_name="vxlan-gpe"
+	    ;;
+	*)
+	    type_name="bit X"
+	    ;;
+    esac
+
+    echo "port $port, $type_name"
+}
+
+function check_table {
+    local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1
+    local -n expected=$2
+    local last=$3
+
+    read -a have < $path
+
+    if [ ${#expected[@]} -ne ${#have[@]} ]; then
+	echo "check_table: BAD NUMBER OF ITEMS"
+	return 0
+    fi
+
+    for i in "${!expected[@]}"; do
+	if [ -n "$HAS_ETHTOOL" -a ${expected[i]} -ne 0 ]; then
+	    pp_expected=`pre_ethtool ${expected[i]}`
+	    ethtool --show-tunnels $NSIM_NETDEV | grep "$pp_expected" >/dev/null
+	    if [ $? -ne 0 -a $last -ne 0 ]; then
+		err_cnt "ethtool table $1 on port $port: $pfx - $msg"
+		echo "       check_table: ethtool does not contain '$pp_expected'"
+		ethtool --show-tunnels $NSIM_NETDEV
+		return 0
+
+	    fi
+	fi
+
+	if [ ${expected[i]} != ${have[i]} ]; then
+	    if [ $last -ne 0 ]; then
+		err_cnt "table $1 on port $port: $pfx - $msg"
+		echo "       check_table: wrong entry $i"
+		echo "       expected: `pre ${expected[i]}`"
+		echo "       have:     `pre ${have[i]}`"
+		return 0
+	    fi
+	    return 1
+	fi
+    done
+
+    pass_cnt
+    return 0
+}
+
+function check_tables {
+    # Need retries in case we have workqueue making the changes
+    local retries=10
+
+    while ! check_table 0 exp0 $((retries == 0)); do
+	sleep 0.02
+	((retries--))
+    done
+    while ! check_table 1 exp1 $((retries == 0)); do
+	sleep 0.02
+	((retries--))
+    done
+
+    if [ -n "$HAS_ETHTOOL" -a -n "${STATIC_ENTRIES[0]}" ]; then
+	fail=0
+	for i in "${!STATIC_ENTRIES[@]}"; do
+	    pp_expected=`pre_ethtool ${STATIC_ENTRIES[i]}`
+	    cnt=$(ethtool --show-tunnels $NSIM_NETDEV | grep -c "$pp_expected")
+	    if [ $cnt -ne 1 ]; then
+		err_cnt "ethtool static entry: $pfx - $msg"
+		echo "       check_table: ethtool does not contain '$pp_expected'"
+		ethtool --show-tunnels $NSIM_NETDEV
+		fail=1
+	    fi
+	done
+	[ $fail == 0 ] && pass_cnt
+    fi
+}
+
+function print_table {
+    local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1
+    read -a have < $path
+
+    tree $NSIM_DEV_DFS/
+
+    echo "Port $port table $1:"
+
+    for i in "${!have[@]}"; do
+	echo "    `pre ${have[i]}`"
+    done
+
+}
+
+function print_tables {
+    print_table 0
+    print_table 1
+}
+
+function get_netdev_name {
+    local -n old=$1
+
+    udevadm settle
+    new=$(ls /sys/class/net)
+
+    for netdev in $new; do
+	for check in $old; do
+            [ $netdev == $check ] && break
+	done
+
+	if [ $netdev != $check ]; then
+	    echo $netdev
+	    break
+	fi
+    done
+}
+
+###
+### Code start
+###
+
+# Probe ethtool support
+ethtool -h | grep show-tunnels 2>&1 >/dev/null && HAS_ETHTOOL=y
+
+modprobe netdevsim
+
+# Basic test
+pfx="basic"
+
+for port in 0 1; do
+    old_netdevs=$(ls /sys/class/net)
+    if [ $port -eq 0 ]; then
+	echo $NSIM_ID > /sys/bus/netdevsim/new_device
+    else
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+	echo 1 > $NSIM_DEV_SYS/new_port
+    fi
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
+    ip link set dev $NSIM_NETDEV up
+
+    msg="new NIC device created"
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+    check_tables
+
+    msg="VxLAN v4 devices"
+    exp0=( `mke 4789 1` 0 0 0 )
+    new_vxlan vxlan0 4789 $NSIM_NETDEV
+    new_vxlan vxlan1 4789 $NSIM_NETDEV
+
+    msg="VxLAN v4 devices go down"
+    exp0=( 0 0 0 0 )
+    ip link set dev vxlan1 down
+    ip link set dev vxlan0 down
+    check_tables
+
+    msg="VxLAN v6 devices"
+    exp0=( `mke 4789 1` 0 0 0 )
+    new_vxlan vxlanA 4789 $NSIM_NETDEV 6
+
+    for ifc in vxlan0 vxlan1; do
+	ip link set dev $ifc up
+    done
+
+    new_vxlan vxlanB 4789 $NSIM_NETDEV 6
+
+    msg="another VxLAN v6 devices"
+    exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+    new_vxlan vxlanC 4790 $NSIM_NETDEV 6
+
+    msg="Geneve device"
+    exp1=( `mke 6081 2` 0 0 0 )
+    new_geneve gnv0 6081
+
+    msg="NIC device goes down"
+    ip link set dev $NSIM_NETDEV down
+    if [ $port -eq 1 ]; then
+	exp0=( 0 0 0 0 )
+	exp1=( 0 0 0 0 )
+    fi
+    check_tables
+    msg="NIC device goes up again"
+    ip link set dev $NSIM_NETDEV up
+    exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+    exp1=( `mke 6081 2` 0 0 0 )
+    check_tables
+
+    cleanup_tuns
+
+    msg="tunnels destroyed"
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+    check_tables
+
+    modprobe -r geneve
+    modprobe -r vxlan
+    modprobe -r udp_tunnel
+
+    check_tables
+done
+
+modprobe -r netdevsim
+
+# Module tests
+pfx="module tests"
+
+if modinfo netdevsim | grep udp_tunnel >/dev/null; then
+    err_cnt "netdevsim depends on udp_tunnel"
+else
+    pass_cnt
+fi
+
+modprobe netdevsim
+
+old_netdevs=$(ls /sys/class/net)
+port=0
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 0 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+msg="create VxLANs"
+exp0=( `mke 10000 1` 0 0 0 )
+new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+exp0=( 0 0 0 0 )
+
+modprobe -r netdevsim
+modprobe netdevsim
+
+# Overflow the table
+
+function overflow_table0 {
+    local pfx=$1
+
+    msg="create VxLANs 1/5"
+    exp0=( `mke 10000 1` 0 0 0 )
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    msg="create VxLANs 2/5"
+    exp0=( `mke 10000 1` `mke 10001 1` 0 0 )
+    new_vxlan vxlan1 10001 $NSIM_NETDEV
+
+    msg="create VxLANs 3/5"
+    exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` 0 )
+    new_vxlan vxlan2 10002 $NSIM_NETDEV
+
+    msg="create VxLANs 4/5"
+    exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` `mke 10003 1` )
+    new_vxlan vxlan3 10003 $NSIM_NETDEV
+
+    msg="create VxLANs 5/5"
+    new_vxlan vxlan4 10004 $NSIM_NETDEV
+}
+
+function overflow_table1 {
+    local pfx=$1
+
+    msg="create GENEVE 1/5"
+    exp1=( `mke 20000 2` 0 0 0 )
+    new_geneve gnv0 20000
+
+    msg="create GENEVE 2/5"
+    exp1=( `mke 20000 2` `mke 20001 2` 0 0 )
+    new_geneve gnv1 20001
+
+    msg="create GENEVE 3/5"
+    exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` 0 )
+    new_geneve gnv2 20002
+
+    msg="create GENEVE 4/5"
+    exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` `mke 20003 2` )
+    new_geneve gnv3 20003
+
+    msg="create GENEVE 5/5"
+    new_geneve gnv4 20004
+}
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
+    ip link set dev $NSIM_NETDEV up
+
+    overflow_table0 "overflow NIC table"
+    overflow_table1 "overflow NIC table"
+
+    msg="replace VxLAN in overflow table"
+    exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+    del_dev vxlan1
+
+    msg="vacate VxLAN in overflow table"
+    exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+    del_dev vxlan2
+
+    msg="replace GENEVE in overflow table"
+    exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` )
+    del_dev gnv1
+
+    msg="vacate GENEVE in overflow table"
+    exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` )
+    del_dev gnv2
+
+    msg="table sharing - share"
+    exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` )
+    new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external"
+
+    msg="table sharing - overflow"
+    new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external"
+    msg="table sharing - overflow v6"
+    new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external"
+
+    exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` )
+    del_dev gnv4
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Sync all
+pfx="sync all"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
+    ip link set dev $NSIM_NETDEV up
+
+    overflow_table0 "overflow NIC table"
+    overflow_table1 "overflow NIC table"
+
+    msg="replace VxLAN in overflow table"
+    exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+    del_dev vxlan1
+
+    msg="vacate VxLAN in overflow table"
+    exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+    del_dev vxlan2
+
+    msg="replace GENEVE in overflow table"
+    exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` )
+    del_dev gnv1
+
+    msg="vacate GENEVE in overflow table"
+    exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` )
+    del_dev gnv2
+
+    msg="table sharing - share"
+    exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` )
+    new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external"
+
+    msg="table sharing - overflow"
+    new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external"
+    msg="table sharing - overflow v6"
+    new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external"
+
+    exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` )
+    del_dev gnv4
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Destroy full NIC
+pfx="destroy full"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
+    ip link set dev $NSIM_NETDEV up
+
+    overflow_table0 "destroy NIC"
+    overflow_table1 "destroy NIC"
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# IPv4 only
+pfx="IPv4 only"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
+    ip link set dev $NSIM_NETDEV up
+
+    msg="create VxLANs v6"
+    new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+    msg="create VxLANs v6"
+    new_vxlan vxlanA1 10000 $NSIM_NETDEV 6
+
+    ip link set dev vxlanA0 down
+    ip link set dev vxlanA0 up
+    check_tables
+
+    msg="create VxLANs v4"
+    exp0=( `mke 10000 1` 0 0 0 )
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    msg="down VxLANs v4"
+    exp0=( 0 0 0 0 )
+    ip link set dev vxlan0 down
+    check_tables
+
+    msg="up VxLANs v4"
+    exp0=( `mke 10000 1` 0 0 0 )
+    ip link set dev vxlan0 up
+    check_tables
+
+    msg="destroy VxLANs v4"
+    exp0=( 0 0 0 0 )
+    del_dev vxlan0
+
+    msg="recreate VxLANs v4"
+    exp0=( `mke 10000 1` 0 0 0 )
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    del_dev vxlanA0
+    del_dev vxlanA1
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Failures
+pfx="error injection"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
+    ip link set dev $NSIM_NETDEV up
+
+    echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error
+
+    msg="1 - create VxLANs v6"
+    exp0=( 0 0 0 0 )
+    new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+    msg="1 - create VxLANs v4"
+    exp0=( `mke 10000 1` 0 0 0 )
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    msg="1 - remove VxLANs v4"
+    del_dev vxlan0
+
+    msg="1 - remove VxLANs v6"
+    exp0=( 0 0 0 0 )
+    del_dev vxlanA0
+
+    msg="2 - create GENEVE"
+    exp1=( `mke 20000 2` 0 0 0 )
+    new_geneve gnv0 20000
+
+    msg="2 - destroy GENEVE"
+    echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error
+    exp1=( `mke 20000 2` 0 0 0 )
+    del_dev gnv0
+
+    msg="2 - create second GENEVE"
+    exp1=( 0 `mke 20001 2` 0 0 )
+    new_geneve gnv0 20001
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# netdev flags
+pfx="netdev flags"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
+    ip link set dev $NSIM_NETDEV up
+
+    msg="create VxLANs v6"
+    exp0=( `mke 10000 1` 0 0 0 )
+    new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+    msg="create VxLANs v4"
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    msg="turn off"
+    exp0=( 0 0 0 0 )
+    ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+    check_tables
+
+    msg="turn on"
+    exp0=( `mke 10000 1` 0 0 0 )
+    ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+    check_tables
+
+    msg="remove both"
+    del_dev vxlanA0
+    exp0=( 0 0 0 0 )
+    del_dev vxlan0
+    check_tables
+
+    ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+
+    msg="create VxLANs v4 - off"
+    exp0=( 0 0 0 0 )
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    msg="created off - turn on"
+    exp0=( `mke 10000 1` 0 0 0 )
+    ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+    check_tables
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# device initiated reset
+pfx="reset notification"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+    if [ $port -ne 0 ]; then
+	echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+    fi
+
+    echo $port > $NSIM_DEV_SYS/new_port
+    NSIM_NETDEV=`get_netdev_name old_netdevs`
+    ip link set dev $NSIM_NETDEV up
+
+    msg="create VxLANs v6"
+    exp0=( `mke 10000 1` 0 0 0 )
+    new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+    msg="create VxLANs v4"
+    new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+    echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
+    check_tables
+
+    msg="NIC device goes down"
+    ip link set dev $NSIM_NETDEV down
+    if [ $port -eq 1 ]; then
+	exp0=( 0 0 0 0 )
+	exp1=( 0 0 0 0 )
+    fi
+    check_tables
+
+    echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
+    check_tables
+
+    msg="NIC device goes up again"
+    ip link set dev $NSIM_NETDEV up
+    exp0=( `mke 10000 1` 0 0 0 )
+    check_tables
+
+    msg="remove both"
+    del_dev vxlanA0
+    exp0=( 0 0 0 0 )
+    del_dev vxlan0
+    check_tables
+
+    echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
+    check_tables
+
+    msg="destroy NIC"
+    echo $port > $NSIM_DEV_SYS/del_port
+
+    cleanup_tuns
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# shared port tables
+pfx="table sharing"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+echo 0 > $NSIM_DEV_DFS/udp_ports_open_only
+echo 1 > $NSIM_DEV_DFS/udp_ports_shared
+
+old_netdevs=$(ls /sys/class/net)
+echo 1 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+old_netdevs=$(ls /sys/class/net)
+echo 2 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV2=`get_netdev_name old_netdevs`
+
+msg="VxLAN v4 devices"
+exp0=( `mke 4789 1` 0 0 0 )
+exp1=( 0 0 0 0 )
+new_vxlan vxlan0 4789 $NSIM_NETDEV
+new_vxlan vxlan1 4789 $NSIM_NETDEV2
+
+msg="VxLAN v4 devices go down"
+exp0=( 0 0 0 0 )
+ip link set dev vxlan1 down
+ip link set dev vxlan0 down
+check_tables
+
+for ifc in vxlan0 vxlan1; do
+    ip link set dev $ifc up
+done
+
+msg="VxLAN v6 device"
+exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+new_vxlan vxlanC 4790 $NSIM_NETDEV 6
+
+msg="Geneve device"
+exp1=( `mke 6081 2` 0 0 0 )
+new_geneve gnv0 6081
+
+msg="NIC device goes down"
+ip link set dev $NSIM_NETDEV down
+check_tables
+
+msg="NIC device goes up again"
+ip link set dev $NSIM_NETDEV up
+check_tables
+
+for i in `seq 2`; do
+    msg="turn feature off - 1, rep $i"
+    ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+    check_tables
+
+    msg="turn feature off - 2, rep $i"
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+    ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload off
+    check_tables
+
+    msg="turn feature on - 1, rep $i"
+    exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+    exp1=( `mke 6081 2` 0 0 0 )
+    ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+    check_tables
+
+    msg="turn feature on - 2, rep $i"
+    ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload on
+    check_tables
+done
+
+msg="tunnels destroyed 1"
+cleanup_tuns
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+overflow_table0 "overflow NIC table"
+
+msg="re-add a port"
+
+echo 2 > $NSIM_DEV_SYS/del_port
+echo 2 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+check_tables
+
+msg="replace VxLAN in overflow table"
+exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+del_dev vxlan1
+
+msg="vacate VxLAN in overflow table"
+exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+del_dev vxlan2
+
+echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
+check_tables
+
+msg="tunnels destroyed 2"
+cleanup_tuns
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+echo 1 > $NSIM_DEV_SYS/del_port
+echo 2 > $NSIM_DEV_SYS/del_port
+
+cleanup_nsim
+
+# Static IANA port
+pfx="static IANA vxlan"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+echo 1 > $NSIM_DEV_DFS/udp_ports_static_iana_vxlan
+STATIC_ENTRIES=( `mke 4789 1` )
+
+port=1
+old_netdevs=$(ls /sys/class/net)
+echo $port > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+msg="check empty"
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+msg="add on static port"
+new_vxlan vxlan0 4789 $NSIM_NETDEV
+new_vxlan vxlan1 4789 $NSIM_NETDEV
+
+msg="add on different port"
+exp0=( `mke 4790 1` 0 0 0 )
+new_vxlan vxlan2 4790 $NSIM_NETDEV
+
+cleanup_tuns
+
+msg="tunnels destroyed"
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+msg="different type"
+new_geneve gnv0	4789
+
+cleanup_tuns
+cleanup_nsim
+
+# END
+
+modprobe -r netdevsim
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $num_cases checks"
+else
+    echo "FAILED $num_errors/$num_cases checks"
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py
new file mode 100755
index 000000000000..408bd54d6779
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netpoll_basic.py
@@ -0,0 +1,396 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Author: Breno Leitao <leitao@debian.org>
+"""
+ This test aims to evaluate the netpoll polling mechanism (as in
+ netpoll_poll_dev()). It presents a complex scenario where the network
+ attempts to send a packet but fails, prompting it to poll the NIC from within
+ the netpoll TX side.
+
+ This has been a crucial path in netpoll that was previously untested. Jakub
+ suggested using a single RX/TX queue, pushing traffic to the NIC, and then
+ sending netpoll messages (via netconsole) to trigger the poll.
+
+ In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If
+ so, the test passes, otherwise it will be skipped. This test is very dependent on
+ the driver and environment, given we are trying to trigger a tricky scenario.
+"""
+
+import errno
+import logging
+import os
+import random
+import string
+import threading
+import time
+from typing import Optional
+
+from lib.py import (
+    bpftrace,
+    CmdExitFailure,
+    defer,
+    ethtool,
+    GenerateTraffic,
+    ksft_exit,
+    ksft_pr,
+    ksft_run,
+    KsftFailEx,
+    KsftSkipEx,
+    NetDrvEpEnv,
+    KsftXfailEx,
+)
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole"
+NETCONS_REMOTE_PORT: int = 6666
+NETCONS_LOCAL_PORT: int = 1514
+
+# Max number of netcons messages to send. Each iteration will setup
+# netconsole and send MAX_WRITES messages
+ITERATIONS: int = 20
+# Number of writes to /dev/kmsg per iteration
+MAX_WRITES: int = 40
+# MAPS contains the information coming from bpftrace it will have only one
+# key: "hits", which tells the number of times netpoll_poll_dev() was called
+MAPS: dict[str, int] = {}
+# Thread to run bpftrace in parallel
+BPF_THREAD: Optional[threading.Thread] = None
+# Time bpftrace will be running in parallel.
+BPFTRACE_TIMEOUT: int = 10
+
+
+def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]:
+    """
+    Read the ringsize using ethtool. This will be used to restore it after the test
+    """
+    try:
+        ethtool_result = ethtool(f"-g {interface_name}", json=True)[0]
+        rxs = ethtool_result["rx"]
+        txs = ethtool_result["tx"]
+    except (KeyError, IndexError) as exception:
+        raise KsftSkipEx(
+            f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them."
+        ) from exception
+
+    return rxs, txs
+
+
+def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool:
+    """Try to the number of RX and TX ringsize."""
+    rxs = ring_size[0]
+    txs = ring_size[1]
+
+    logging.debug("Setting ring size to %d/%d", rxs, txs)
+    try:
+        ethtool(f"-G {interface_name} rx {rxs} tx {txs}")
+    except CmdExitFailure:
+        # This might fail on real device, retry with a higher value,
+        # worst case, keep it as it is.
+        return False
+
+    return True
+
+
+def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]:
+    """Read the number of RX, TX and combined queues using ethtool"""
+
+    try:
+        ethtool_result = ethtool(f"-l {interface_name}", json=True)[0]
+        rxq = ethtool_result.get("rx", -1)
+        txq = ethtool_result.get("tx", -1)
+        combined = ethtool_result.get("combined", -1)
+
+    except IndexError as exception:
+        raise KsftSkipEx(
+            f"Failed to read queues numbers: {exception}. Not going to mess with them."
+        ) from exception
+
+    return rxq, txq, combined
+
+
+def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None:
+    """Set the number of RX, TX and combined queues using ethtool"""
+    rxq, txq, combined = queues
+
+    cmdline = f"-L {interface_name}"
+
+    if rxq != -1:
+        cmdline += f" rx {rxq}"
+    if txq != -1:
+        cmdline += f" tx {txq}"
+    if combined != -1:
+        cmdline += f" combined {combined}"
+
+    logging.debug("calling: ethtool %s", cmdline)
+
+    try:
+        ethtool(cmdline)
+    except CmdExitFailure as exception:
+        raise KsftSkipEx(
+            f"Failed to configure RX/TX queues: {exception}. Ethtool not available?"
+        ) from exception
+
+
+def netcons_generate_random_target_name() -> str:
+    """Generate a random target name starting with 'netcons'"""
+    random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
+    return f"netcons_{random_suffix}"
+
+
+def netcons_create_target(
+    config_data: dict[str, str],
+    target_name: str,
+) -> None:
+    """Create a netconsole dynamic target against the interfaces"""
+    logging.debug("Using netconsole name: %s", target_name)
+    try:
+        os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True)
+        logging.debug(
+            "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name
+        )
+    except OSError as exception:
+        if exception.errno != errno.EEXIST:
+            raise KsftFailEx(
+                f"Failed to create netconsole target directory: {exception}"
+            ) from exception
+
+    try:
+        for key, value in config_data.items():
+            path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}"
+            logging.debug("Writing %s to %s", key, path)
+            with open(path, "w", encoding="utf-8") as file:
+                # Always convert to string to write to file
+                file.write(str(value))
+
+        # Read all configuration values for debugging purposes
+        for debug_key in config_data.keys():
+            with open(
+                f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}",
+                "r",
+                encoding="utf-8",
+            ) as file:
+                content = file.read()
+                logging.debug(
+                    "%s/%s/%s : %s",
+                    NETCONSOLE_CONFIGFS_PATH,
+                    target_name,
+                    debug_key,
+                    content.strip(),
+                )
+
+    except Exception as exception:
+        raise KsftFailEx(
+            f"Failed to configure netconsole target: {exception}"
+        ) from exception
+
+
+def netcons_configure_target(
+    cfg: NetDrvEpEnv, interface_name: str, target_name: str
+) -> None:
+    """Configure netconsole on the interface with the given target name"""
+    config_data = {
+        "extended": "1",
+        "dev_name": interface_name,
+        "local_port": NETCONS_LOCAL_PORT,
+        "remote_port": NETCONS_REMOTE_PORT,
+        "local_ip": cfg.addr,
+        "remote_ip": cfg.remote_addr,
+        "remote_mac": "00:00:00:00:00:00",  # Not important for this test
+        "enabled": "1",
+    }
+
+    netcons_create_target(config_data, target_name)
+    logging.debug(
+        "Created netconsole target: %s on interface %s", target_name, interface_name
+    )
+
+
+def netcons_delete_target(name: str) -> None:
+    """Delete a netconsole dynamic target"""
+    target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}"
+    try:
+        if os.path.exists(target_path):
+            os.rmdir(target_path)
+    except OSError as exception:
+        raise KsftFailEx(
+            f"Failed to delete netconsole target: {exception}"
+        ) from exception
+
+
+def netcons_load_module() -> None:
+    """Try to load the netconsole module"""
+    os.system("modprobe netconsole")
+
+
+def bpftrace_call() -> None:
+    """Call bpftrace to find how many times netpoll_poll_dev() is called.
+    Output is saved in the global variable `maps`"""
+
+    # This is going to update the global variable, that will be seen by the
+    # main function
+    global MAPS  # pylint: disable=W0603
+
+    # This will be passed to bpftrace as in bpftrace -e "expr"
+    expr = "kprobe:netpoll_poll_dev { @hits = count(); }"
+
+    MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True)
+    logging.debug("BPFtrace output: %s", MAPS)
+
+
+def bpftrace_start():
+    """Start a thread to call `call_bpf` in a parallel thread"""
+    global BPF_THREAD  # pylint: disable=W0603
+
+    BPF_THREAD = threading.Thread(target=bpftrace_call)
+    BPF_THREAD.start()
+    if not BPF_THREAD.is_alive():
+        raise KsftSkipEx("BPFtrace thread is not alive. Skipping test")
+
+
+def bpftrace_stop() -> None:
+    """Stop the bpftrace thread"""
+    if BPF_THREAD:
+        BPF_THREAD.join()
+
+
+def bpftrace_any_hit(join: bool) -> bool:
+    """Check if netpoll_poll_dev() was called by checking the global variable `maps`"""
+    if not BPF_THREAD:
+        raise KsftFailEx("BPFtrace didn't start")
+
+    if BPF_THREAD.is_alive():
+        if join:
+            # Wait for bpftrace to finish
+            BPF_THREAD.join()
+        else:
+            # bpftrace is still running, so, we will not check the result yet
+            return False
+
+    logging.debug("MAPS coming from bpftrace = %s", MAPS)
+    if "hits" not in MAPS.keys():
+        raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}")
+
+    logging.debug("Got a total of %d hits", MAPS["hits"])
+    return MAPS["hits"] > 0
+
+
+def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
+    """Print messages to the console, trying to trigger a netpoll poll"""
+    # Start bpftrace in parallel, so, it is watching
+    # netpoll_poll_dev() while we are sending netconsole messages
+    bpftrace_start()
+    defer(bpftrace_stop)
+
+    do_netpoll_flush(cfg, ifname, target_name)
+
+    if bpftrace_any_hit(join=True):
+        ksft_pr("netpoll_poll_dev() was called. Success")
+        return
+
+    raise KsftXfailEx("netpoll_poll_dev() was not called during the test...")
+
+
+def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
+    """Print messages to the console, trying to trigger a netpoll poll"""
+    netcons_configure_target(cfg, ifname, target_name)
+    retry = 0
+
+    for i in range(int(ITERATIONS)):
+        if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False):
+            # bpftrace is done, stop sending messages
+            break
+
+        msg = f"netcons test #{i}"
+        with open("/dev/kmsg", "w", encoding="utf-8") as kmsg:
+            for j in range(MAX_WRITES):
+                try:
+                    kmsg.write(f"{msg}-{j}\n")
+                except OSError as exception:
+                    # in some cases, kmsg can be busy, so, we will retry
+                    time.sleep(1)
+                    retry += 1
+                    if retry < 5:
+                        logging.info("Failed to write to kmsg. Retrying")
+                        # Just retry a few times
+                        continue
+                    raise KsftFailEx(
+                        f"Failed to write to kmsg: {exception}"
+                    ) from exception
+
+        netcons_delete_target(target_name)
+        netcons_configure_target(cfg, ifname, target_name)
+        # If we sleep here, we will have a better chance of triggering
+        # This number is based on a few tests I ran while developing this test
+        time.sleep(0.4)
+
+
+def configure_network(ifname: str) -> None:
+    """Configure ring size and queue numbers"""
+
+    # Set defined queues to 1 to force congestion
+    prev_queues = ethtool_get_queues_cnt(ifname)
+    logging.debug("RX/TX/combined queues: %s", prev_queues)
+    # Only set the queues to 1 if they exists in the device. I.e, they are > 0
+    ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues))
+    defer(ethtool_set_queues_cnt, ifname, prev_queues)
+
+    # Try to set the ring size to some low value.
+    # Do not fail if the hardware do not accepted desired values
+    prev_ring_size = ethtool_get_ringsize(ifname)
+    for size in [(1, 1), (128, 128), (256, 256)]:
+        if ethtool_set_ringsize(ifname, size):
+            # hardware accepted the desired ringsize
+            logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size)
+            break
+    defer(ethtool_set_ringsize, ifname, prev_ring_size)
+
+
+def test_netpoll(cfg: NetDrvEpEnv) -> None:
+    """
+    Test netpoll by sending traffic to the interface and then sending
+    netconsole messages to trigger a poll
+    """
+
+    ifname = cfg.ifname
+    configure_network(ifname)
+    target_name = netcons_generate_random_target_name()
+    traffic = None
+
+    try:
+        traffic = GenerateTraffic(cfg)
+        do_netpoll_flush_monitored(cfg, ifname, target_name)
+    finally:
+        if traffic:
+            traffic.stop()
+
+        # Revert RX/TX queues
+        netcons_delete_target(target_name)
+
+
+def test_check_dependencies() -> None:
+    """Check if the dependencies are met"""
+    if not os.path.exists(NETCONSOLE_CONFIGFS_PATH):
+        raise KsftSkipEx(
+            f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set."  # pylint: disable=C0301
+        )
+
+
+def main() -> None:
+    """Main function to run the test"""
+    netcons_load_module()
+    test_check_dependencies()
+    with NetDrvEpEnv(__file__) as cfg:
+        ksft_run(
+            [test_netpoll],
+            args=(cfg,),
+        )
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh
new file mode 100755
index 000000000000..c51c83421c61
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh
@@ -0,0 +1,253 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2022 NXP
+
+# The script is mostly generic, with the exception of the
+# ethtool per-TC counter names ("rx_green_prio_${tc}")
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command dcb
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h1_vlan_create()
+{
+	local vid=$1
+
+	vlan_create $h1 $vid
+	simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+	ip link set $h1.$vid type vlan \
+		egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \
+		ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+h1_vlan_destroy()
+{
+	local vid=$1
+
+	simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+	vlan_destroy $h1 $vid
+}
+
+h2_vlan_create()
+{
+	local vid=$1
+
+	vlan_create $h2 $vid
+	simple_if_init $h2.$vid $H2_IPV4/24 $H2_IPV6/64
+	ip link set $h2.$vid type vlan \
+		egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \
+		ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+h2_vlan_destroy()
+{
+	local vid=$1
+
+	simple_if_fini $h2.$vid $H2_IPV4/24 $H2_IPV6/64
+	vlan_destroy $h2 $vid
+}
+
+vlans_prepare()
+{
+	h1_vlan_create 100
+	h2_vlan_create 100
+
+	tc qdisc add dev ${h1}.100 clsact
+	tc filter add dev ${h1}.100 egress protocol ipv4 \
+		flower ip_proto icmp action skbedit priority 3
+	tc filter add dev ${h1}.100 egress protocol ipv6 \
+		flower ip_proto icmpv6 action skbedit priority 3
+}
+
+vlans_destroy()
+{
+	tc qdisc del dev ${h1}.100 clsact
+
+	h1_vlan_destroy 100
+	h2_vlan_destroy 100
+}
+
+switch_create()
+{
+	ip link set ${swp1} up
+	ip link set ${swp2} up
+
+	# Ports should trust VLAN PCP even with vlan_filtering=0
+	ip link add br0 type bridge
+	ip link set ${swp1} master br0
+	ip link set ${swp2} master br0
+	ip link set br0 up
+}
+
+switch_destroy()
+{
+	ip link del br0
+}
+
+setup_prepare()
+{
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+	switch_destroy
+
+	vrf_cleanup
+}
+
+dscp_cs_to_tos()
+{
+	local dscp_cs=$1
+
+	# https://datatracker.ietf.org/doc/html/rfc2474
+	# 4.2.2.1  The Class Selector Codepoints
+	echo $((${dscp_cs} << 5))
+}
+
+run_test()
+{
+	local test_name=$1; shift
+	local if_name=$1; shift
+	local tc=$1; shift
+	local tos=$1; shift
+	local counter_name="rx_green_prio_${tc}"
+	local ipv4_before
+	local ipv4_after
+	local ipv6_before
+	local ipv6_after
+
+	ipv4_before=$(ethtool_stats_get ${swp1} "${counter_name}")
+	ping_do ${if_name} $H2_IPV4 "-Q ${tos}"
+	ipv4_after=$(ethtool_stats_get ${swp1} "${counter_name}")
+
+	if [ $((${ipv4_after} - ${ipv4_before})) -lt ${PING_COUNT} ]; then
+		RET=1
+	else
+		RET=0
+	fi
+	log_test "IPv4 ${test_name}"
+
+	ipv6_before=$(ethtool_stats_get ${swp1} "${counter_name}")
+	ping_do ${if_name} $H2_IPV6 "-Q ${tos}"
+	ipv6_after=$(ethtool_stats_get ${swp1} "${counter_name}")
+
+	if [ $((${ipv6_after} - ${ipv6_before})) -lt ${PING_COUNT} ]; then
+		RET=1
+	else
+		RET=0
+	fi
+	log_test "IPv6 ${test_name}"
+}
+
+port_default_prio_get()
+{
+	local if_name=$1
+	local prio
+
+	prio="$(dcb -j app show dev ${if_name} default-prio | \
+		jq '.default_prio[]')"
+	if [ -z "${prio}" ]; then
+		prio=0
+	fi
+
+	echo ${prio}
+}
+
+test_port_default()
+{
+	local orig=$(port_default_prio_get ${swp1})
+	local dmac=$(mac_get ${h2})
+
+	dcb app replace dev ${swp1} default-prio 5
+
+	run_test "Port-default QoS classification" ${h1} 5 0
+
+	dcb app replace dev ${swp1} default-prio ${orig}
+}
+
+test_vlan_pcp()
+{
+	vlans_prepare
+
+	run_test "Trusted VLAN PCP QoS classification" ${h1}.100 3 0
+
+	vlans_destroy
+}
+
+test_ip_dscp()
+{
+	local port_default=$(port_default_prio_get ${swp1})
+	local tos=$(dscp_cs_to_tos 4)
+
+	dcb app add dev ${swp1} dscp-prio CS4:4
+	run_test "Trusted DSCP QoS classification" ${h1} 4 ${tos}
+	dcb app del dev ${swp1} dscp-prio CS4:4
+
+	vlans_prepare
+	run_test "Untrusted DSCP QoS classification follows VLAN PCP" \
+		${h1}.100 3 ${tos}
+	vlans_destroy
+
+	run_test "Untrusted DSCP QoS classification follows port default" \
+		${h1} ${port_default} ${tos}
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+	test_port_default
+	test_vlan_pcp
+	test_ip_dscp
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
new file mode 100755
index 000000000000..8972f42dfe03
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
@@ -0,0 +1,323 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2021-2022 NXP
+
+# Note: On LS1028A, in lack of enough user ports, this setup requires patching
+# the device tree to use the second CPU port as a user port
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/tsn_lib.sh
+
+UDS_ADDRESS_H1="/var/run/ptp4l_h1"
+UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1"
+
+# Tunables
+NUM_PKTS=1000
+STREAM_VID=100
+STREAM_PRIO=6
+# Use a conservative cycle of 10 ms to allow the test to still pass when the
+# kernel has some extra overhead like lockdep etc
+CYCLE_TIME_NS=10000000
+# Create two Gate Control List entries, one OPEN and one CLOSE, of equal
+# durations
+GATE_DURATION_NS=$((${CYCLE_TIME_NS} / 2))
+# Give 2/3 of the cycle time to user space and 1/3 to the kernel
+FUDGE_FACTOR=$((${CYCLE_TIME_NS} / 3))
+# Shift the isochron base time by half the gate time, so that packets are
+# always received by swp1 close to the middle of the time slot, to minimize
+# inaccuracies due to network sync
+SHIFT_TIME_NS=$((${GATE_DURATION_NS} / 2))
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# Chain number exported by the ocelot driver for
+# Per-Stream Filtering and Policing filters
+PSFP()
+{
+	echo 30000
+}
+
+psfp_chain_create()
+{
+	local if_name=$1
+
+	tc qdisc add dev $if_name clsact
+
+	tc filter add dev $if_name ingress chain 0 pref 49152 flower \
+		skip_sw action goto chain $(PSFP)
+}
+
+psfp_chain_destroy()
+{
+	local if_name=$1
+
+	tc qdisc del dev $if_name clsact
+}
+
+psfp_filter_check()
+{
+	local expected=$1
+	local packets=""
+	local drops=""
+	local stats=""
+
+	stats=$(tc -j -s filter show dev ${swp1} ingress chain $(PSFP) pref 1)
+	packets=$(echo ${stats} | jq ".[1].options.actions[].stats.packets")
+	drops=$(echo ${stats} | jq ".[1].options.actions[].stats.drops")
+
+	if ! [ "${packets}" = "${expected}" ]; then
+		printf "Expected filter to match on %d packets but matched on %d instead\n" \
+			"${expected}" "${packets}"
+	fi
+
+	echo "Hardware filter reports ${drops} drops"
+}
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+switch_create()
+{
+	local h2_mac_addr=$(mac_get $h2)
+
+	ip link set ${swp1} up
+	ip link set ${swp2} up
+
+	ip link add br0 type bridge vlan_filtering 1
+	ip link set ${swp1} master br0
+	ip link set ${swp2} master br0
+	ip link set br0 up
+
+	bridge vlan add dev ${swp2} vid ${STREAM_VID}
+	bridge vlan add dev ${swp1} vid ${STREAM_VID}
+	# PSFP on Ocelot requires the filter to also be added to the bridge
+	# FDB, and not be removed
+	bridge fdb add dev ${swp2} \
+		${h2_mac_addr} vlan ${STREAM_VID} static master
+
+	psfp_chain_create ${swp1}
+
+	tc filter add dev ${swp1} ingress chain $(PSFP) pref 1 \
+		protocol 802.1Q flower skip_sw \
+		dst_mac ${h2_mac_addr} vlan_id ${STREAM_VID} \
+		action gate base-time 0.000000000 \
+		sched-entry OPEN  ${GATE_DURATION_NS} -1 -1 \
+		sched-entry CLOSE ${GATE_DURATION_NS} -1 -1
+}
+
+switch_destroy()
+{
+	psfp_chain_destroy ${swp1}
+	ip link del br0
+}
+
+txtime_setup()
+{
+	local if_name=$1
+
+	tc qdisc add dev ${if_name} clsact
+	# Classify PTP on TC 7 and isochron on TC 6
+	tc filter add dev ${if_name} egress protocol 0x88f7 \
+		flower action skbedit priority 7
+	tc filter add dev ${if_name} egress protocol 802.1Q \
+		flower vlan_ethtype 0xdead action skbedit priority 6
+	tc qdisc add dev ${if_name} handle 100: parent root mqprio num_tc 8 \
+		queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+		map 0 1 2 3 4 5 6 7 \
+		hw 1
+	# Set up TC 6 for SO_TXTIME. tc-mqprio queues count from 1.
+	tc qdisc replace dev ${if_name} parent 100:$((${STREAM_PRIO} + 1)) etf \
+		clockid CLOCK_TAI offload delta ${FUDGE_FACTOR}
+}
+
+txtime_cleanup()
+{
+	local if_name=$1
+
+	tc qdisc del dev ${if_name} root
+	tc qdisc del dev ${if_name} clsact
+}
+
+setup_prepare()
+{
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+
+	txtime_setup ${h1}
+
+	# Set up swp1 as a master PHC for h1, synchronized to the local
+	# CLOCK_REALTIME.
+	phc2sys_start ${UDS_ADDRESS_SWP1}
+
+	# Assumption true for LS1028A: h1 and h2 use the same PHC. So by
+	# synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized
+	# to swp1 (and both to CLOCK_REALTIME).
+	ptp4l_start ${h1} true ${UDS_ADDRESS_H1}
+	ptp4l_start ${swp1} false ${UDS_ADDRESS_SWP1}
+
+	# Make sure there are no filter matches at the beginning of the test
+	psfp_filter_check 0
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ptp4l_stop ${swp1}
+	ptp4l_stop ${h1}
+	phc2sys_stop
+	isochron_recv_stop
+
+	txtime_cleanup ${h1}
+
+	h2_destroy
+	h1_destroy
+	switch_destroy
+
+	vrf_cleanup
+}
+
+debug_incorrectly_dropped_packets()
+{
+	local isochron_dat=$1
+	local dropped_seqids
+	local seqid
+
+	echo "Packets incorrectly dropped:"
+
+	dropped_seqids=$(isochron report \
+		--input-file "${isochron_dat}" \
+		--printf-format "%u RX hw %T\n" \
+		--printf-args "qR" | \
+		grep 'RX hw 0.000000000' | \
+		awk '{print $1}')
+
+	for seqid in ${dropped_seqids}; do
+		isochron report \
+			--input-file "${isochron_dat}" \
+			--start ${seqid} --stop ${seqid} \
+			--printf-format "seqid %u scheduled for %T, HW TX timestamp %T\n" \
+			--printf-args "qST"
+	done
+}
+
+debug_incorrectly_received_packets()
+{
+	local isochron_dat=$1
+
+	echo "Packets incorrectly received:"
+
+	isochron report \
+		--input-file "${isochron_dat}" \
+		--printf-format "seqid %u scheduled for %T, HW TX timestamp %T, HW RX timestamp %T\n" \
+		--printf-args "qSTR" |
+		grep -v 'HW RX timestamp 0.000000000'
+}
+
+run_test()
+{
+	local base_time=$1
+	local expected=$2
+	local test_name=$3
+	local debug=$4
+	local isochron_dat="$(mktemp)"
+	local extra_args=""
+	local received
+
+	isochron_do \
+		"${h1}" \
+		"${h2}" \
+		"${UDS_ADDRESS_H1}" \
+		"" \
+		"${base_time}" \
+		"${CYCLE_TIME_NS}" \
+		"${SHIFT_TIME_NS}" \
+		"${GATE_DURATION_NS}" \
+		"${NUM_PKTS}" \
+		"${STREAM_VID}" \
+		"${STREAM_PRIO}" \
+		"" \
+		"${isochron_dat}"
+
+	received=$(isochron_report_num_received "${isochron_dat}")
+	if [ "${received}" = "${expected}" ]; then
+		RET=0
+	else
+		RET=1
+		echo "Expected isochron to receive ${expected} packets but received ${received}"
+	fi
+
+	log_test "${test_name}"
+
+	if [ "$RET" = "1" ]; then
+		${debug} "${isochron_dat}"
+	fi
+
+	rm ${isochron_dat} 2> /dev/null
+}
+
+test_gate_in_band()
+{
+	# Send packets in-band with the OPEN gate entry
+	run_test 0.000000000 ${NUM_PKTS} "In band" \
+		debug_incorrectly_dropped_packets
+
+	psfp_filter_check ${NUM_PKTS}
+}
+
+test_gate_out_of_band()
+{
+	# Send packets in-band with the CLOSE gate entry
+	run_test 0.005000000 0 "Out of band" \
+		debug_incorrectly_received_packets
+
+	psfp_filter_check $((2 * ${NUM_PKTS}))
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+	test_gate_in_band
+	test_gate_out_of_band
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
new file mode 100755
index 000000000000..aff0a59f92d9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -0,0 +1,352 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2020 NXP
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command tcpdump
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+# Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number
+# used by the kernel driver. The numbers are:
+# VCAP IS1 lookup 0:            10000
+# VCAP IS1 lookup 1:            11000
+# VCAP IS1 lookup 2:            12000
+# VCAP IS2 lookup 0 policy 0:   20000
+# VCAP IS2 lookup 0 policy 1:   20001
+# VCAP IS2 lookup 0 policy 255: 20255
+# VCAP IS2 lookup 1 policy 0:   21000
+# VCAP IS2 lookup 1 policy 1:   21001
+# VCAP IS2 lookup 1 policy 255: 21255
+IS1()
+{
+	local lookup=$1
+
+	echo $((10000 + 1000 * lookup))
+}
+
+IS2()
+{
+	local lookup=$1
+	local pag=$2
+
+	echo $((20000 + 1000 * lookup + pag))
+}
+
+ES0()
+{
+	echo 0
+}
+
+# The Ocelot switches have a fixed ingress pipeline composed of:
+#
+# +----------------------------------------------+      +-----------------------------------------+
+# |                   VCAP IS1                   |      |                  VCAP IS2               |
+# |                                              |      |                                         |
+# | +----------+    +----------+    +----------+ |      |            +----------+    +----------+ |
+# | | Lookup 0 |    | Lookup 1 |    | Lookup 2 | | --+------> PAG 0: | Lookup 0 | -> | Lookup 1 | |
+# | +----------+ -> +----------+ -> +----------+ |   |  |            +----------+    +----------+ |
+# | |key&action|    |key&action|    |key&action| |   |  |            |key&action|    |key&action| |
+# | |key&action|    |key&action|    |key&action| |   |  |            |    ..    |    |    ..    | |
+# | |    ..    |    |    ..    |    |    ..    | |   |  |            +----------+    +----------+ |
+# | +----------+    +----------+    +----------+ |   |  |                                         |
+# |                                 selects PAG  |   |  |            +----------+    +----------+ |
+# +----------------------------------------------+   +------> PAG 1: | Lookup 0 | -> | Lookup 1 | |
+#                                                    |  |            +----------+    +----------+ |
+#                                                    |  |            |key&action|    |key&action| |
+#                                                    |  |            |    ..    |    |    ..    | |
+#                                                    |  |            +----------+    +----------+ |
+#                                                    |  |      ...                                |
+#                                                    |  |                                         |
+#                                                    |  |            +----------+    +----------+ |
+#                                                    +----> PAG 254: | Lookup 0 | -> | Lookup 1 | |
+#                                                    |  |            +----------+    +----------+ |
+#                                                    |  |            |key&action|    |key&action| |
+#                                                    |  |            |    ..    |    |    ..    | |
+#                                                    |  |            +----------+    +----------+ |
+#                                                    |  |                                         |
+#                                                    |  |            +----------+    +----------+ |
+#                                                    +----> PAG 255: | Lookup 0 | -> | Lookup 1 | |
+#                                                       |            +----------+    +----------+ |
+#                                                       |            |key&action|    |key&action| |
+#                                                       |            |    ..    |    |    ..    | |
+#                                                       |            +----------+    +----------+ |
+#                                                       +-----------------------------------------+
+#
+# Both the VCAP IS1 (Ingress Stage 1) and IS2 (Ingress Stage 2) are indexed
+# (looked up) multiple times: IS1 3 times, and IS2 2 times. Each filter
+# (key and action pair) can be configured to only match during the first, or
+# second, etc, lookup.
+#
+# During one TCAM lookup, the filter processing stops at the first entry that
+# matches, then the pipeline jumps to the next lookup.
+# The driver maps each individual lookup of each individual ingress TCAM to a
+# separate chain number. For correct rule offloading, it is mandatory that each
+# filter installed in one TCAM is terminated by a non-optional GOTO action to
+# the next lookup from the fixed pipeline.
+#
+# A chain can only be used if there is a GOTO action correctly set up from the
+# prior lookup in the processing pipeline. Setting up all chains is not
+# mandatory.
+
+# NOTE: VCAP IS1 currently uses only S1_NORMAL half keys and VCAP IS2
+# dynamically chooses between MAC_ETYPE, ARP, IP4_TCP_UDP, IP4_OTHER, which are
+# all half keys as well.
+
+create_tcam_skeleton()
+{
+	local eth=$1
+
+	tc qdisc add dev $eth clsact
+
+	# VCAP IS1 is the Ingress Classification TCAM and can offload the
+	# following actions:
+	# - skbedit priority
+	# - vlan pop
+	# - vlan modify
+	# - goto (only in lookup 2, the last IS1 lookup)
+	tc filter add dev $eth ingress chain 0 pref 49152 flower \
+		skip_sw action goto chain $(IS1 0)
+	tc filter add dev $eth ingress chain $(IS1 0) pref 49152 \
+		flower skip_sw action goto chain $(IS1 1)
+	tc filter add dev $eth ingress chain $(IS1 1) pref 49152 \
+		flower skip_sw action goto chain $(IS1 2)
+	tc filter add dev $eth ingress chain $(IS1 2) pref 49152 \
+		flower skip_sw action goto chain $(IS2 0 0)
+
+	# VCAP IS2 is the Security Enforcement ingress TCAM and can offload the
+	# following actions:
+	# - trap
+	# - drop
+	# - police
+	# The two VCAP IS2 lookups can be segmented into up to 256 groups of
+	# rules, called Policies. A Policy is selected through the Policy
+	# Association Group (PAG) action of VCAP IS1 (which is the
+	# GOTO offload).
+	tc filter add dev $eth ingress chain $(IS2 0 0) pref 49152 \
+		flower skip_sw action goto chain $(IS2 1 0)
+}
+
+setup_prepare()
+{
+	ip link set $swp1 up
+	ip link set $swp2 up
+	ip link set $h2 up
+	ip link set $h1 up
+
+	create_tcam_skeleton $swp1
+
+	ip link add br0 type bridge
+	ip link set $swp1 master br0
+	ip link set $swp2 master br0
+	ip link set br0 up
+
+	ip link add link $h1 name $h1.100 type vlan id 100
+	ip link set $h1.100 up
+
+	ip link add link $h1 name $h1.200 type vlan id 200
+	ip link set $h1.200 up
+
+	tc filter add dev $swp1 ingress chain $(IS1 1) pref 1 \
+		protocol 802.1Q flower skip_sw vlan_id 100 \
+		action vlan pop \
+		action goto chain $(IS1 2)
+
+	tc filter add dev $swp1 egress chain $(ES0) pref 1 \
+		flower skip_sw indev $swp2 \
+		action vlan push protocol 802.1Q id 100
+
+	tc filter add dev $swp1 ingress chain $(IS1 0) pref 2 \
+		protocol ipv4 flower skip_sw src_ip 10.1.1.2 \
+		action skbedit priority 7 \
+		action goto chain $(IS1 1)
+
+	tc filter add dev $swp1 ingress chain $(IS2 0 0) pref 1 \
+		protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
+		action police rate 50mbit burst 64k conform-exceed drop/pipe \
+		action goto chain $(IS2 1 0)
+}
+
+cleanup()
+{
+	ip link del $h1.200
+	ip link del $h1.100
+	tc qdisc del dev $swp1 clsact
+	ip link del br0
+}
+
+test_vlan_pop()
+{
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+
+	RET=0
+
+	tcpdump_start $h2
+
+	# Work around Mausezahn VLAN builder bug
+	# (https://github.com/netsniff-ng/netsniff-ng/issues/225) by using
+	# an 8021q upper
+	$MZ $h1.100 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
+
+	sleep 1
+
+	tcpdump_stop $h2
+
+	tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, ethertype IPv4"
+	check_err "$?" "untagged reception"
+
+	tcpdump_cleanup $h2
+
+	log_test "VLAN pop"
+}
+
+test_vlan_push()
+{
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+
+	RET=0
+
+	tcpdump_start $h1.100
+
+	$MZ $h2 -q -c 1 -p 64 -a $h2_mac -b $h1_mac -t ip
+
+	sleep 1
+
+	tcpdump_stop $h1.100
+
+	tcpdump_show $h1.100 | grep -q "$h2_mac > $h1_mac"
+	check_err "$?" "tagged reception"
+
+	tcpdump_cleanup $h1.100
+
+	log_test "VLAN push"
+}
+
+test_vlan_ingress_modify()
+{
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+
+	RET=0
+
+	ip link set br0 type bridge vlan_filtering 1
+	bridge vlan add dev $swp1 vid 200
+	bridge vlan add dev $swp1 vid 300
+	bridge vlan add dev $swp2 vid 300
+
+	tc filter add dev $swp1 ingress chain $(IS1 2) pref 3 \
+		protocol 802.1Q flower skip_sw vlan_id 200 src_mac $h1_mac \
+		action vlan modify id 300 \
+		action goto chain $(IS2 0 0)
+
+	tcpdump_start $h2
+
+	$MZ $h1.200 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
+
+	sleep 1
+
+	tcpdump_stop $h2
+
+	tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, .* vlan 300"
+	check_err "$?" "tagged reception"
+
+	tcpdump_cleanup $h2
+
+	tc filter del dev $swp1 ingress chain $(IS1 2) pref 3
+
+	bridge vlan del dev $swp1 vid 200
+	bridge vlan del dev $swp1 vid 300
+	bridge vlan del dev $swp2 vid 300
+	ip link set br0 type bridge vlan_filtering 0
+
+	log_test "Ingress VLAN modification"
+}
+
+test_vlan_egress_modify()
+{
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+
+	RET=0
+
+	tc qdisc add dev $swp2 clsact
+
+	ip link set br0 type bridge vlan_filtering 1
+	bridge vlan add dev $swp1 vid 200
+	bridge vlan add dev $swp2 vid 200
+
+	tc filter add dev $swp2 egress chain $(ES0) pref 3 \
+		protocol 802.1Q flower skip_sw vlan_id 200 vlan_prio 0 \
+		action vlan modify id 300 priority 7
+
+	tcpdump_start $h2
+
+	$MZ $h1.200 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
+
+	sleep 1
+
+	tcpdump_stop $h2
+
+	tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, .* vlan 300"
+	check_err "$?" "tagged reception"
+
+	tcpdump_cleanup $h2
+
+	tc filter del dev $swp2 egress chain $(ES0) pref 3
+	tc qdisc del dev $swp2 clsact
+
+	bridge vlan del dev $swp1 vid 200
+	bridge vlan del dev $swp2 vid 200
+	ip link set br0 type bridge vlan_filtering 0
+
+	log_test "Egress VLAN modification"
+}
+
+test_skbedit_priority()
+{
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+	local num_pkts=100
+
+	before=$(ethtool_stats_get $swp1 'rx_green_prio_7')
+
+	$MZ $h1 -q -c $num_pkts -p 64 -a $h1_mac -b $h2_mac -t ip -A 10.1.1.2
+
+	after=$(ethtool_stats_get $swp1 'rx_green_prio_7')
+
+	if [ $((after - before)) = $num_pkts ]; then
+		RET=0
+	else
+		RET=1
+	fi
+
+	log_test "Frame prioritization"
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+	test_vlan_pop
+	test_vlan_push
+	test_vlan_ingress_modify
+	test_vlan_egress_modify
+	test_skbedit_priority
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
new file mode 100755
index 000000000000..da3623c5e8a9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import random, string, time
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, KsftSkipEx, KsftFailEx
+from lib.py import EthtoolFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, wait_port_listen, rand_port
+from lib.py import defer, ethtool, ip
+
+no_sleep=False
+
+def _test_v4(cfg) -> None:
+    if not cfg.addr_v["4"]:
+        return
+
+    cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"])
+    cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
+    cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["4"])
+    cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
+
+def _test_v6(cfg) -> None:
+    if not cfg.addr_v["6"]:
+        return
+
+    cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"])
+    cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote)
+    cmd("ping -s 65000 -c 1 -W0.5 " + cfg.remote_addr_v["6"])
+    cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote)
+
+def _test_tcp(cfg) -> None:
+    cfg.require_cmd("socat", local=False, remote=True)
+
+    port = rand_port()
+    listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT"
+
+    test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
+    with bkg(listen_cmd, exit_wait=True) as nc:
+        wait_port_listen(port)
+
+        cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
+            shell=True, host=cfg.remote)
+    ksft_eq(nc.stdout.strip(), test_string)
+
+    test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
+    with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
+        wait_port_listen(port, host=cfg.remote)
+
+        cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
+    ksft_eq(nc.stdout.strip(), test_string)
+
+def _schedule_checksum_reset(cfg, netnl) -> None:
+    features = ethtool(f"-k {cfg.ifname}", json=True)
+    setting = ""
+    for side in ["tx", "rx"]:
+        f = features[0][side + "-checksumming"]
+        if not f["fixed"]:
+            setting += " " + side
+            setting += " " + ("on" if f["requested"] or f["active"] else "off")
+    defer(ethtool, f" -K {cfg.ifname} " + setting)
+
+def _set_offload_checksum(cfg, netnl, on) -> None:
+    try:
+        ethtool(f" -K {cfg.ifname} rx {on} tx {on} ")
+    except:
+        return
+
+def _set_xdp_generic_sb_on(cfg) -> None:
+    prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+    cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+    cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True)
+    defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off")
+
+    if no_sleep != True:
+        time.sleep(10)
+
+def _set_xdp_generic_mb_on(cfg) -> None:
+    prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+    cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+    defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
+    ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog))
+    defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off")
+
+    if no_sleep != True:
+        time.sleep(10)
+
+def _set_xdp_native_sb_on(cfg) -> None:
+    prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+    cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+    cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True)
+    defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+    xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+    if xdp_info['xdp']['mode'] != 1:
+        """
+        If the interface doesn't support native-mode, it falls back to generic mode.
+        The mode value 1 is native and 2 is generic.
+        So it raises an exception if mode is not 1(native mode).
+        """
+        raise KsftSkipEx('device does not support native-XDP')
+
+    if no_sleep != True:
+        time.sleep(10)
+
+def _set_xdp_native_mb_on(cfg) -> None:
+    prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+    cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+    defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
+    try:
+        cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True)
+        defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+    except Exception as e:
+        raise KsftSkipEx('device does not support native-multi-buffer XDP')
+
+    if no_sleep != True:
+        time.sleep(10)
+
+def _set_xdp_offload_on(cfg) -> None:
+    prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+    cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+    try:
+        cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True)
+    except Exception as e:
+        raise KsftSkipEx('device does not support offloaded XDP')
+    defer(ip, f"link set dev {cfg.ifname} xdpoffload off")
+    cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+    if no_sleep != True:
+        time.sleep(10)
+
+def get_interface_info(cfg) -> None:
+    global no_sleep
+
+    if cfg.remote_ifname == "":
+        raise KsftFailEx('Can not get remote interface')
+    local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+    if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim":
+        no_sleep=True
+    if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth":
+        no_sleep=True
+
+def set_interface_init(cfg) -> None:
+    cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+    cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True)
+    cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True)
+    cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True)
+    cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+def test_default_v4(cfg, netnl) -> None:
+    cfg.require_ipver("4")
+
+    _schedule_checksum_reset(cfg, netnl)
+    _set_offload_checksum(cfg, netnl, "off")
+    _test_v4(cfg)
+    _test_tcp(cfg)
+    _set_offload_checksum(cfg, netnl, "on")
+    _test_v4(cfg)
+    _test_tcp(cfg)
+
+def test_default_v6(cfg, netnl) -> None:
+    cfg.require_ipver("6")
+
+    _schedule_checksum_reset(cfg, netnl)
+    _set_offload_checksum(cfg, netnl, "off")
+    _test_v6(cfg)
+    _test_tcp(cfg)
+    _set_offload_checksum(cfg, netnl, "on")
+    _test_v6(cfg)
+    _test_tcp(cfg)
+
+def test_xdp_generic_sb(cfg, netnl) -> None:
+    _schedule_checksum_reset(cfg, netnl)
+    _set_xdp_generic_sb_on(cfg)
+    _set_offload_checksum(cfg, netnl, "off")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+    _set_offload_checksum(cfg, netnl, "on")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+
+def test_xdp_generic_mb(cfg, netnl) -> None:
+    _schedule_checksum_reset(cfg, netnl)
+    _set_xdp_generic_mb_on(cfg)
+    _set_offload_checksum(cfg, netnl, "off")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+    _set_offload_checksum(cfg, netnl, "on")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+
+def test_xdp_native_sb(cfg, netnl) -> None:
+    _schedule_checksum_reset(cfg, netnl)
+    _set_xdp_native_sb_on(cfg)
+    _set_offload_checksum(cfg, netnl, "off")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+    _set_offload_checksum(cfg, netnl, "on")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+
+def test_xdp_native_mb(cfg, netnl) -> None:
+    _schedule_checksum_reset(cfg, netnl)
+    _set_xdp_native_mb_on(cfg)
+    _set_offload_checksum(cfg, netnl, "off")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+    _set_offload_checksum(cfg, netnl, "on")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+
+def test_xdp_offload(cfg, netnl) -> None:
+    _set_xdp_offload_on(cfg)
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+
+def main() -> None:
+    with NetDrvEpEnv(__file__) as cfg:
+        get_interface_info(cfg)
+        set_interface_init(cfg)
+        ksft_run([test_default_v4,
+                  test_default_v6,
+                  test_xdp_generic_sb,
+                  test_xdp_generic_mb,
+                  test_xdp_native_sb,
+                  test_xdp_native_mb,
+                  test_xdp_offload],
+                 args=(cfg, EthtoolFamily()))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py
new file mode 100755
index 000000000000..06559ef49b9a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/psp.py
@@ -0,0 +1,640 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Test suite for PSP capable drivers."""
+
+import errno
+import fcntl
+import socket
+import struct
+import termios
+import time
+
+from lib.py import defer
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_true, ksft_eq, ksft_ne, ksft_gt, ksft_raises
+from lib.py import ksft_not_none
+from lib.py import KsftSkipEx
+from lib.py import NetDrvEpEnv, PSPFamily, NlError
+from lib.py import bkg, rand_port, wait_port_listen
+
+
+def _get_outq(s):
+    one = b'\0' * 4
+    outq = fcntl.ioctl(s.fileno(), termios.TIOCOUTQ, one)
+    return struct.unpack("I", outq)[0]
+
+
+def _send_with_ack(cfg, msg):
+    cfg.comm_sock.send(msg)
+    response = cfg.comm_sock.recv(4)
+    if response != b'ack\0':
+        raise RuntimeError("Unexpected server response", response)
+
+
+def _remote_read_len(cfg):
+    cfg.comm_sock.send(b'read len\0')
+    return int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8'))
+
+
+def _make_clr_conn(cfg, ipver=None):
+    _send_with_ack(cfg, b'conn clr\0')
+    remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr
+    s = socket.create_connection((remote_addr, cfg.comm_port), )
+    return s
+
+
+def _make_psp_conn(cfg, version=0, ipver=None):
+    _send_with_ack(cfg, b'conn psp\0' + struct.pack('BB', version, version))
+    remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr
+    s = socket.create_connection((remote_addr, cfg.comm_port), )
+    return s
+
+
+def _close_conn(cfg, s):
+    _send_with_ack(cfg, b'data close\0')
+    s.close()
+
+
+def _close_psp_conn(cfg, s):
+    _close_conn(cfg, s)
+
+
+def _spi_xchg(s, rx):
+    s.send(struct.pack('I', rx['spi']) + rx['key'])
+    tx = s.recv(4 + len(rx['key']))
+    return {
+        'spi': struct.unpack('I', tx[:4])[0],
+        'key': tx[4:]
+    }
+
+
+def _send_careful(cfg, s, rounds):
+    data = b'0123456789' * 200
+    for i in range(rounds):
+        n = 0
+        for _ in range(10): # allow 10 retries
+            try:
+                n += s.send(data[n:], socket.MSG_DONTWAIT)
+                if n == len(data):
+                    break
+            except BlockingIOError:
+                time.sleep(0.05)
+        else:
+            rlen = _remote_read_len(cfg)
+            outq = _get_outq(s)
+            report = f'sent: {i * len(data) + n} remote len: {rlen} outq: {outq}'
+            raise RuntimeError(report)
+
+    return len(data) * rounds
+
+
+def _check_data_rx(cfg, exp_len):
+    read_len = -1
+    for _ in range(30):
+        cfg.comm_sock.send(b'read len\0')
+        read_len = int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8'))
+        if read_len == exp_len:
+            break
+        time.sleep(0.01)
+    ksft_eq(read_len, exp_len)
+
+
+def _check_data_outq(s, exp_len, force_wait=False):
+    outq = 0
+    for _ in range(10):
+        outq = _get_outq(s)
+        if not force_wait and outq == exp_len:
+            break
+        time.sleep(0.01)
+    ksft_eq(outq, exp_len)
+
+
+def _get_stat(cfg, key):
+    return cfg.pspnl.get_stats({'dev-id': cfg.psp_dev_id})[key]
+
+#
+# Test case boiler plate
+#
+
+def _init_psp_dev(cfg):
+    if not hasattr(cfg, 'psp_dev_id'):
+        # Figure out which local device we are testing against
+        for dev in cfg.pspnl.dev_get({}, dump=True):
+            if dev['ifindex'] == cfg.ifindex:
+                cfg.psp_info = dev
+                cfg.psp_dev_id = cfg.psp_info['id']
+                break
+        else:
+            raise KsftSkipEx("No PSP devices found")
+
+    # Enable PSP if necessary
+    cap = cfg.psp_info['psp-versions-cap']
+    ena = cfg.psp_info['psp-versions-ena']
+    if cap != ena:
+        cfg.pspnl.dev_set({'id': cfg.psp_dev_id, 'psp-versions-ena': cap})
+        defer(cfg.pspnl.dev_set, {'id': cfg.psp_dev_id,
+                                  'psp-versions-ena': ena })
+
+#
+# Test cases
+#
+
+def dev_list_devices(cfg):
+    """ Dump all devices """
+    _init_psp_dev(cfg)
+
+    devices = cfg.pspnl.dev_get({}, dump=True)
+
+    found = False
+    for dev in devices:
+        found |= dev['id'] == cfg.psp_dev_id
+    ksft_true(found)
+
+
+def dev_get_device(cfg):
+    """ Get the device we intend to use """
+    _init_psp_dev(cfg)
+
+    dev = cfg.pspnl.dev_get({'id': cfg.psp_dev_id})
+    ksft_eq(dev['id'], cfg.psp_dev_id)
+
+
+def dev_get_device_bad(cfg):
+    """ Test getting device which doesn't exist """
+    raised = False
+    try:
+        cfg.pspnl.dev_get({'id': 1234567})
+    except NlError as e:
+        ksft_eq(e.nl_msg.error, -errno.ENODEV)
+        raised = True
+    ksft_true(raised)
+
+
+def dev_rotate(cfg):
+    """ Test key rotation """
+    _init_psp_dev(cfg)
+
+    prev_rotations = _get_stat(cfg, 'key-rotations')
+
+    rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+    ksft_eq(rot['id'], cfg.psp_dev_id)
+    rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+    ksft_eq(rot['id'], cfg.psp_dev_id)
+
+    cur_rotations = _get_stat(cfg, 'key-rotations')
+    ksft_eq(cur_rotations, prev_rotations + 2)
+
+
+def dev_rotate_spi(cfg):
+    """ Test key rotation and SPI check """
+    _init_psp_dev(cfg)
+
+    top_a = top_b = 0
+    with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+        assoc_a = cfg.pspnl.rx_assoc({"version": 0,
+                                     "dev-id": cfg.psp_dev_id,
+                                     "sock-fd": s.fileno()})
+        top_a = assoc_a['rx-key']['spi'] >> 31
+        s.close()
+    rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+    with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+        ksft_eq(rot['id'], cfg.psp_dev_id)
+        assoc_b = cfg.pspnl.rx_assoc({"version": 0,
+                                    "dev-id": cfg.psp_dev_id,
+                                    "sock-fd": s.fileno()})
+        top_b = assoc_b['rx-key']['spi'] >> 31
+        s.close()
+    ksft_ne(top_a, top_b)
+
+
+def assoc_basic(cfg):
+    """ Test creating associations """
+    _init_psp_dev(cfg)
+
+    with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+        assoc = cfg.pspnl.rx_assoc({"version": 0,
+                                  "dev-id": cfg.psp_dev_id,
+                                  "sock-fd": s.fileno()})
+        ksft_eq(assoc['dev-id'], cfg.psp_dev_id)
+        ksft_gt(assoc['rx-key']['spi'], 0)
+        ksft_eq(len(assoc['rx-key']['key']), 16)
+
+        assoc = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+                                  "version": 0,
+                                  "tx-key": assoc['rx-key'],
+                                  "sock-fd": s.fileno()})
+        ksft_eq(len(assoc), 0)
+        s.close()
+
+
+def assoc_bad_dev(cfg):
+    """ Test creating associations with bad device ID """
+    _init_psp_dev(cfg)
+
+    with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+        with ksft_raises(NlError) as cm:
+            cfg.pspnl.rx_assoc({"version": 0,
+                              "dev-id": cfg.psp_dev_id + 1234567,
+                              "sock-fd": s.fileno()})
+        ksft_eq(cm.exception.nl_msg.error, -errno.ENODEV)
+
+
+def assoc_sk_only_conn(cfg):
+    """ Test creating associations based on socket """
+    _init_psp_dev(cfg)
+
+    with _make_clr_conn(cfg) as s:
+        assoc = cfg.pspnl.rx_assoc({"version": 0,
+                                  "sock-fd": s.fileno()})
+        ksft_eq(assoc['dev-id'], cfg.psp_dev_id)
+        cfg.pspnl.tx_assoc({"version": 0,
+                          "tx-key": assoc['rx-key'],
+                          "sock-fd": s.fileno()})
+        _close_conn(cfg, s)
+
+
+def assoc_sk_only_mismatch(cfg):
+    """ Test creating associations based on socket (dev mismatch) """
+    _init_psp_dev(cfg)
+
+    with _make_clr_conn(cfg) as s:
+        with ksft_raises(NlError) as cm:
+            cfg.pspnl.rx_assoc({"version": 0,
+                              "dev-id": cfg.psp_dev_id + 1234567,
+                              "sock-fd": s.fileno()})
+        the_exception = cm.exception
+        ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id")
+        ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+
+
+def assoc_sk_only_mismatch_tx(cfg):
+    """ Test creating associations based on socket (dev mismatch) """
+    _init_psp_dev(cfg)
+
+    with _make_clr_conn(cfg) as s:
+        with ksft_raises(NlError) as cm:
+            assoc = cfg.pspnl.rx_assoc({"version": 0,
+                                      "sock-fd": s.fileno()})
+            cfg.pspnl.tx_assoc({"version": 0,
+                              "tx-key": assoc['rx-key'],
+                              "dev-id": cfg.psp_dev_id + 1234567,
+                              "sock-fd": s.fileno()})
+        the_exception = cm.exception
+        ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id")
+        ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+
+
+def assoc_sk_only_unconn(cfg):
+    """ Test creating associations based on socket (unconnected, should fail) """
+    _init_psp_dev(cfg)
+
+    with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+        with ksft_raises(NlError) as cm:
+            cfg.pspnl.rx_assoc({"version": 0,
+                              "sock-fd": s.fileno()})
+        the_exception = cm.exception
+        ksft_eq(the_exception.nl_msg.extack['miss-type'], "dev-id")
+        ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+
+
+def assoc_version_mismatch(cfg):
+    """ Test creating associations where Rx and Tx PSP versions do not match """
+    _init_psp_dev(cfg)
+
+    versions = list(cfg.psp_info['psp-versions-cap'])
+    if len(versions) < 2:
+        raise KsftSkipEx("Not enough PSP versions supported by the device for the test")
+
+    # Translate versions to integers
+    versions = [cfg.pspnl.consts["version"].entries[v].value for v in versions]
+
+    with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+        rx = cfg.pspnl.rx_assoc({"version": versions[0],
+                                 "dev-id": cfg.psp_dev_id,
+                                 "sock-fd": s.fileno()})
+
+        for version in versions[1:]:
+            with ksft_raises(NlError) as cm:
+                cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+                                    "version": version,
+                                    "tx-key": rx['rx-key'],
+                                    "sock-fd": s.fileno()})
+            the_exception = cm.exception
+            ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+
+
+def assoc_twice(cfg):
+    """ Test reusing Tx assoc for two sockets """
+    _init_psp_dev(cfg)
+
+    def rx_assoc_check(s):
+        assoc = cfg.pspnl.rx_assoc({"version": 0,
+                                  "dev-id": cfg.psp_dev_id,
+                                  "sock-fd": s.fileno()})
+        ksft_eq(assoc['dev-id'], cfg.psp_dev_id)
+        ksft_gt(assoc['rx-key']['spi'], 0)
+        ksft_eq(len(assoc['rx-key']['key']), 16)
+
+        return assoc
+
+    with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+        assoc = rx_assoc_check(s)
+        tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+                               "version": 0,
+                               "tx-key": assoc['rx-key'],
+                               "sock-fd": s.fileno()})
+        ksft_eq(len(tx), 0)
+
+        # Use the same Tx assoc second time
+        with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s2:
+            rx_assoc_check(s2)
+            tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+                                   "version": 0,
+                                   "tx-key": assoc['rx-key'],
+                                   "sock-fd": s2.fileno()})
+            ksft_eq(len(tx), 0)
+
+        s.close()
+
+
+def _data_basic_send(cfg, version, ipver):
+    """ Test basic data send """
+    _init_psp_dev(cfg)
+
+    # Version 0 is required by spec, don't let it skip
+    if version:
+        name = cfg.pspnl.consts["version"].entries_by_val[version].name
+        if name not in cfg.psp_info['psp-versions-cap']:
+            with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+                with ksft_raises(NlError) as cm:
+                    cfg.pspnl.rx_assoc({"version": version,
+                                        "dev-id": cfg.psp_dev_id,
+                                        "sock-fd": s.fileno()})
+                ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP)
+            raise KsftSkipEx("PSP version not supported", name)
+
+    s = _make_psp_conn(cfg, version, ipver)
+
+    rx_assoc = cfg.pspnl.rx_assoc({"version": version,
+                                   "dev-id": cfg.psp_dev_id,
+                                   "sock-fd": s.fileno()})
+    rx = rx_assoc['rx-key']
+    tx = _spi_xchg(s, rx)
+
+    cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+                        "version": version,
+                        "tx-key": tx,
+                        "sock-fd": s.fileno()})
+
+    data_len = _send_careful(cfg, s, 100)
+    _check_data_rx(cfg, data_len)
+    _close_psp_conn(cfg, s)
+
+
+def __bad_xfer_do(cfg, s, tx, version='hdr0-aes-gcm-128'):
+    # Make sure we accept the ACK for the SPI before we seal with the bad assoc
+    _check_data_outq(s, 0)
+
+    cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+                        "version": version,
+                        "tx-key": tx,
+                        "sock-fd": s.fileno()})
+
+    data_len = _send_careful(cfg, s, 20)
+    _check_data_outq(s, data_len, force_wait=True)
+    _check_data_rx(cfg, 0)
+    _close_psp_conn(cfg, s)
+
+
+def data_send_bad_key(cfg):
+    """ Test send data with bad key """
+    _init_psp_dev(cfg)
+
+    s = _make_psp_conn(cfg)
+
+    rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+                                   "dev-id": cfg.psp_dev_id,
+                                   "sock-fd": s.fileno()})
+    rx = rx_assoc['rx-key']
+    tx = _spi_xchg(s, rx)
+    tx['key'] = (tx['key'][0] ^ 0xff).to_bytes(1, 'little') + tx['key'][1:]
+    __bad_xfer_do(cfg, s, tx)
+
+
+def data_send_disconnect(cfg):
+    """ Test socket close after sending data """
+    _init_psp_dev(cfg)
+
+    with _make_psp_conn(cfg) as s:
+        assoc = cfg.pspnl.rx_assoc({"version": 0,
+                                  "sock-fd": s.fileno()})
+        tx = _spi_xchg(s, assoc['rx-key'])
+        cfg.pspnl.tx_assoc({"version": 0,
+                          "tx-key": tx,
+                          "sock-fd": s.fileno()})
+
+        data_len = _send_careful(cfg, s, 100)
+        _check_data_rx(cfg, data_len)
+
+        s.shutdown(socket.SHUT_RDWR)
+        s.close()
+
+
+def _data_mss_adjust(cfg, ipver):
+    _init_psp_dev(cfg)
+
+    # First figure out what the MSS would be without any adjustments
+    s = _make_clr_conn(cfg, ipver)
+    s.send(b"0123456789abcdef" * 1024)
+    _check_data_rx(cfg, 16 * 1024)
+    mss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG)
+    _close_conn(cfg, s)
+
+    s = _make_psp_conn(cfg, 0, ipver)
+    try:
+        rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+                                     "dev-id": cfg.psp_dev_id,
+                                     "sock-fd": s.fileno()})
+        rx = rx_assoc['rx-key']
+        tx = _spi_xchg(s, rx)
+
+        rxmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG)
+        ksft_eq(mss, rxmss)
+
+        cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+                          "version": 0,
+                          "tx-key": tx,
+                          "sock-fd": s.fileno()})
+
+        txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG)
+        ksft_eq(mss, txmss + 40)
+
+        data_len = _send_careful(cfg, s, 100)
+        _check_data_rx(cfg, data_len)
+        _check_data_outq(s, 0)
+
+        txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG)
+        ksft_eq(mss, txmss + 40)
+    finally:
+        _close_psp_conn(cfg, s)
+
+
+def data_stale_key(cfg):
+    """ Test send on a double-rotated key """
+    _init_psp_dev(cfg)
+
+    prev_stale = _get_stat(cfg, 'stale-events')
+    s = _make_psp_conn(cfg)
+    try:
+        rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+                                     "dev-id": cfg.psp_dev_id,
+                                     "sock-fd": s.fileno()})
+        rx = rx_assoc['rx-key']
+        tx = _spi_xchg(s, rx)
+
+        cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+                          "version": 0,
+                          "tx-key": tx,
+                          "sock-fd": s.fileno()})
+
+        data_len = _send_careful(cfg, s, 100)
+        _check_data_rx(cfg, data_len)
+        _check_data_outq(s, 0)
+
+        cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+        cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+
+        cur_stale = _get_stat(cfg, 'stale-events')
+        ksft_gt(cur_stale, prev_stale)
+
+        s.send(b'0123456789' * 200)
+        _check_data_outq(s, 2000, force_wait=True)
+    finally:
+        _close_psp_conn(cfg, s)
+
+
+def __nsim_psp_rereg(cfg):
+    # The PSP dev ID will change, remember what was there before
+    before = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)])
+
+    cfg._ns.nsims[0].dfs_write('psp_rereg', '1')
+
+    after = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)])
+
+    new_devs = list(after - before)
+    ksft_eq(len(new_devs), 1)
+    cfg.psp_dev_id = list(after - before)[0]
+
+
+def removal_device_rx(cfg):
+    """ Test removing a netdev / PSD with active Rx assoc """
+
+    # We could technically devlink reload real devices, too
+    # but that kills the control socket. So test this on
+    # netdevsim only for now
+    cfg.require_nsim()
+
+    s = _make_clr_conn(cfg)
+    try:
+        rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+                                       "dev-id": cfg.psp_dev_id,
+                                       "sock-fd": s.fileno()})
+        ksft_not_none(rx_assoc)
+
+        __nsim_psp_rereg(cfg)
+    finally:
+        _close_conn(cfg, s)
+
+
+def removal_device_bi(cfg):
+    """ Test removing a netdev / PSD with active Rx/Tx assoc """
+
+    # We could technically devlink reload real devices, too
+    # but that kills the control socket. So test this on
+    # netdevsim only for now
+    cfg.require_nsim()
+
+    s = _make_clr_conn(cfg)
+    try:
+        rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
+                                       "dev-id": cfg.psp_dev_id,
+                                       "sock-fd": s.fileno()})
+        cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id,
+                            "version": 0,
+                            "tx-key": rx_assoc['rx-key'],
+                            "sock-fd": s.fileno()})
+        __nsim_psp_rereg(cfg)
+    finally:
+        _close_conn(cfg, s)
+
+
+def psp_ip_ver_test_builder(name, test_func, psp_ver, ipver):
+    """Build test cases for each combo of PSP version and IP version"""
+    def test_case(cfg):
+        cfg.require_ipver(ipver)
+        test_case.__name__ = f"{name}_v{psp_ver}_ip{ipver}"
+        test_func(cfg, psp_ver, ipver)
+    return test_case
+
+
+def ipver_test_builder(name, test_func, ipver):
+    """Build test cases for each IP version"""
+    def test_case(cfg):
+        cfg.require_ipver(ipver)
+        test_case.__name__ = f"{name}_ip{ipver}"
+        test_func(cfg, ipver)
+    return test_case
+
+
+def main() -> None:
+    """ Ksft boiler plate main """
+
+    with NetDrvEpEnv(__file__) as cfg:
+        cfg.pspnl = PSPFamily()
+
+        # Set up responder and communication sock
+        responder = cfg.remote.deploy("psp_responder")
+
+        cfg.comm_port = rand_port()
+        srv = None
+        try:
+            with bkg(responder + f" -p {cfg.comm_port}", host=cfg.remote,
+                     exit_wait=True) as srv:
+                wait_port_listen(cfg.comm_port, host=cfg.remote)
+
+                cfg.comm_sock = socket.create_connection((cfg.remote_addr,
+                                                          cfg.comm_port),
+                                                         timeout=1)
+
+                cases = [
+                    psp_ip_ver_test_builder(
+                        "data_basic_send", _data_basic_send, version, ipver
+                    )
+                    for version in range(0, 4)
+                    for ipver in ("4", "6")
+                ]
+                cases += [
+                    ipver_test_builder("data_mss_adjust", _data_mss_adjust, ipver)
+                    for ipver in ("4", "6")
+                ]
+
+                ksft_run(cases=cases, globs=globals(),
+                         case_pfx={"dev_", "data_", "assoc_", "removal_"},
+                         args=(cfg, ))
+
+                cfg.comm_sock.send(b"exit\0")
+                cfg.comm_sock.close()
+        finally:
+            if srv and (srv.stdout or srv.stderr):
+                ksft_pr("")
+                ksft_pr(f"Responder logs ({srv.ret}):")
+            if srv and srv.stdout:
+                ksft_pr("STDOUT:\n#  " + srv.stdout.strip().replace("\n", "\n#  "))
+            if srv and srv.stderr:
+                ksft_pr("STDERR:\n#  " + srv.stderr.strip().replace("\n", "\n#  "))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/psp_responder.c b/tools/testing/selftests/drivers/net/psp_responder.c
new file mode 100644
index 000000000000..f309e0d73cbf
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/psp_responder.c
@@ -0,0 +1,483 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <unistd.h>
+
+#include <ynl.h>
+
+#include "psp-user.h"
+
+#define dbg(msg...)				\
+do {						\
+	if (opts->verbose)			\
+		fprintf(stderr, "DEBUG: " msg);	\
+} while (0)
+
+static bool should_quit;
+
+struct opts {
+	int port;
+	int devid;
+	bool verbose;
+};
+
+enum accept_cfg {
+	ACCEPT_CFG_NONE = 0,
+	ACCEPT_CFG_CLEAR,
+	ACCEPT_CFG_PSP,
+};
+
+static struct {
+	unsigned char tx;
+	unsigned char rx;
+} psp_vers;
+
+static int conn_setup_psp(struct ynl_sock *ys, struct opts *opts, int data_sock)
+{
+	struct psp_rx_assoc_rsp *rsp;
+	struct psp_rx_assoc_req *req;
+	struct psp_tx_assoc_rsp *tsp;
+	struct psp_tx_assoc_req *teq;
+	char info[300];
+	int key_len;
+	ssize_t sz;
+	__u32 spi;
+
+	dbg("create PSP connection\n");
+
+	// Rx assoc alloc
+	req = psp_rx_assoc_req_alloc();
+
+	psp_rx_assoc_req_set_sock_fd(req, data_sock);
+	psp_rx_assoc_req_set_version(req, psp_vers.rx);
+
+	rsp = psp_rx_assoc(ys, req);
+	psp_rx_assoc_req_free(req);
+
+	if (!rsp) {
+		perror("ERROR: failed to Rx assoc");
+		return -1;
+	}
+
+	// SPI exchange
+	key_len = rsp->rx_key._len.key;
+	memcpy(info, &rsp->rx_key.spi, sizeof(spi));
+	memcpy(&info[sizeof(spi)], rsp->rx_key.key, key_len);
+	sz = sizeof(spi) + key_len;
+
+	send(data_sock, info, sz, MSG_WAITALL);
+	psp_rx_assoc_rsp_free(rsp);
+
+	sz = recv(data_sock, info, sz, MSG_WAITALL);
+	if (sz < 0) {
+		perror("ERROR: failed to read PSP key from sock");
+		return -1;
+	}
+	memcpy(&spi, info, sizeof(spi));
+
+	// Setup Tx assoc
+	teq = psp_tx_assoc_req_alloc();
+
+	psp_tx_assoc_req_set_sock_fd(teq, data_sock);
+	psp_tx_assoc_req_set_version(teq, psp_vers.tx);
+	psp_tx_assoc_req_set_tx_key_spi(teq, spi);
+	psp_tx_assoc_req_set_tx_key_key(teq, &info[sizeof(spi)], key_len);
+
+	tsp = psp_tx_assoc(ys, teq);
+	psp_tx_assoc_req_free(teq);
+	if (!tsp) {
+		perror("ERROR: failed to Tx assoc");
+		return -1;
+	}
+	psp_tx_assoc_rsp_free(tsp);
+
+	return 0;
+}
+
+static void send_ack(int sock)
+{
+	send(sock, "ack", 4, MSG_WAITALL);
+}
+
+static void send_err(int sock)
+{
+	send(sock, "err", 4, MSG_WAITALL);
+}
+
+static void send_str(int sock, int value)
+{
+	char buf[128];
+	int ret;
+
+	ret = snprintf(buf, sizeof(buf), "%d", value);
+	send(sock, buf, ret + 1, MSG_WAITALL);
+}
+
+static void
+run_session(struct ynl_sock *ys, struct opts *opts,
+	    int server_sock, int comm_sock)
+{
+	enum accept_cfg accept_cfg = ACCEPT_CFG_NONE;
+	struct pollfd pfds[3];
+	size_t data_read = 0;
+	int data_sock = -1;
+
+	while (true) {
+		bool race_close = false;
+		int nfds;
+
+		memset(pfds, 0, sizeof(pfds));
+
+		pfds[0].fd = server_sock;
+		pfds[0].events = POLLIN;
+
+		pfds[1].fd = comm_sock;
+		pfds[1].events = POLLIN;
+
+		nfds = 2;
+		if (data_sock >= 0) {
+			pfds[2].fd = data_sock;
+			pfds[2].events = POLLIN;
+			nfds++;
+		}
+
+		dbg(" ...\n");
+		if (poll(pfds, nfds, -1) < 0) {
+			perror("poll");
+			break;
+		}
+
+		/* data sock */
+		if (pfds[2].revents & POLLIN) {
+			char buf[8192];
+			ssize_t n;
+
+			n = recv(data_sock, buf, sizeof(buf), 0);
+			if (n <= 0) {
+				if (n < 0)
+					perror("data read");
+				close(data_sock);
+				data_sock = -1;
+				dbg("data sock closed\n");
+			} else {
+				data_read += n;
+				dbg("data read %zd\n", data_read);
+			}
+		}
+
+		/* comm sock */
+		if (pfds[1].revents & POLLIN) {
+			static char buf[4096];
+			static ssize_t off;
+			bool consumed;
+			ssize_t n;
+
+			n = recv(comm_sock, &buf[off], sizeof(buf) - off, 0);
+			if (n <= 0) {
+				if (n < 0)
+					perror("comm read");
+				return;
+			}
+
+			off += n;
+			n = off;
+
+#define __consume(sz)						\
+		({						\
+			if (n == (sz)) {			\
+				off = 0;			\
+			} else {				\
+				off -= (sz);			\
+				memmove(buf, &buf[(sz)], off);	\
+			}					\
+		})
+
+#define cmd(_name)							\
+		({							\
+			ssize_t sz = sizeof(_name);			\
+			bool match = n >= sz &&	!memcmp(buf, _name, sz); \
+									\
+			if (match) {					\
+				dbg("command: " _name "\n");		\
+				__consume(sz);				\
+			}						\
+			consumed |= match;				\
+			match;						\
+		})
+
+			do {
+				consumed = false;
+
+				if (cmd("read len"))
+					send_str(comm_sock, data_read);
+
+				if (cmd("data echo")) {
+					if (data_sock >= 0)
+						send(data_sock, "echo", 5,
+						     MSG_WAITALL);
+					else
+						fprintf(stderr, "WARN: echo but no data sock\n");
+					send_ack(comm_sock);
+				}
+				if (cmd("data close")) {
+					if (data_sock >= 0) {
+						close(data_sock);
+						data_sock = -1;
+						send_ack(comm_sock);
+					} else {
+						race_close = true;
+					}
+				}
+				if (cmd("conn psp")) {
+					if (accept_cfg != ACCEPT_CFG_NONE)
+						fprintf(stderr, "WARN: old conn config still set!\n");
+					accept_cfg = ACCEPT_CFG_PSP;
+					send_ack(comm_sock);
+					/* next two bytes are versions */
+					if (off >= 2) {
+						memcpy(&psp_vers, buf, 2);
+						__consume(2);
+					} else {
+						fprintf(stderr, "WARN: short conn psp command!\n");
+					}
+				}
+				if (cmd("conn clr")) {
+					if (accept_cfg != ACCEPT_CFG_NONE)
+						fprintf(stderr, "WARN: old conn config still set!\n");
+					accept_cfg = ACCEPT_CFG_CLEAR;
+					send_ack(comm_sock);
+				}
+				if (cmd("exit"))
+					should_quit = true;
+#undef cmd
+
+				if (!consumed) {
+					fprintf(stderr, "WARN: unknown cmd: [%zd] %s\n",
+						off, buf);
+				}
+			} while (consumed && off);
+		}
+
+		/* server sock */
+		if (pfds[0].revents & POLLIN) {
+			if (data_sock >= 0) {
+				fprintf(stderr, "WARN: new data sock but old one still here\n");
+				close(data_sock);
+				data_sock = -1;
+			}
+			data_sock = accept(server_sock, NULL, NULL);
+			if (data_sock < 0) {
+				perror("accept");
+				continue;
+			}
+			data_read = 0;
+
+			if (accept_cfg == ACCEPT_CFG_CLEAR) {
+				dbg("new data sock: clear\n");
+				/* nothing to do */
+			} else if (accept_cfg == ACCEPT_CFG_PSP) {
+				dbg("new data sock: psp\n");
+				conn_setup_psp(ys, opts, data_sock);
+			} else {
+				fprintf(stderr, "WARN: new data sock but no config\n");
+			}
+			accept_cfg = ACCEPT_CFG_NONE;
+		}
+
+		if (race_close) {
+			if (data_sock >= 0) {
+				/* indeed, ordering problem, handle the close */
+				close(data_sock);
+				data_sock = -1;
+				send_ack(comm_sock);
+			} else {
+				fprintf(stderr, "WARN: close but no data sock\n");
+				send_err(comm_sock);
+			}
+		}
+	}
+	dbg("session ending\n");
+}
+
+static int spawn_server(struct opts *opts)
+{
+	struct sockaddr_in6 addr;
+	int fd;
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (fd < 0) {
+		perror("can't open socket");
+		return -1;
+	}
+
+	memset(&addr, 0, sizeof(addr));
+
+	addr.sin6_family = AF_INET6;
+	addr.sin6_addr = in6addr_any;
+	addr.sin6_port = htons(opts->port);
+
+	if (bind(fd, (struct sockaddr *)&addr, sizeof(addr))) {
+		perror("can't bind socket");
+		return -1;
+	}
+
+	if (listen(fd, 5)) {
+		perror("can't listen");
+		return -1;
+	}
+
+	return fd;
+}
+
+static int run_responder(struct ynl_sock *ys, struct opts *opts)
+{
+	int server_sock, comm;
+
+	server_sock = spawn_server(opts);
+	if (server_sock < 0)
+		return 4;
+
+	while (!should_quit) {
+		comm = accept(server_sock, NULL, NULL);
+		if (comm < 0) {
+			perror("accept failed");
+		} else {
+			run_session(ys, opts, server_sock, comm);
+			close(comm);
+		}
+	}
+
+	return 0;
+}
+
+static void usage(const char *name, const char *miss)
+{
+	if (miss)
+		fprintf(stderr, "Missing argument: %s\n", miss);
+
+	fprintf(stderr, "Usage: %s -p port [-v] [-d psp-dev-id]\n", name);
+	exit(EXIT_FAILURE);
+}
+
+static void parse_cmd_opts(int argc, char **argv, struct opts *opts)
+{
+	int opt;
+
+	while ((opt = getopt(argc, argv, "vp:d:")) != -1) {
+		switch (opt) {
+		case 'v':
+			opts->verbose = 1;
+			break;
+		case 'p':
+			opts->port = atoi(optarg);
+			break;
+		case 'd':
+			opts->devid = atoi(optarg);
+			break;
+		default:
+			usage(argv[0], NULL);
+		}
+	}
+}
+
+static int psp_dev_set_ena(struct ynl_sock *ys, __u32 dev_id, __u32 versions)
+{
+	struct psp_dev_set_req *sreq;
+	struct psp_dev_set_rsp *srsp;
+
+	fprintf(stderr, "Set PSP enable on device %d to 0x%x\n",
+		dev_id, versions);
+
+	sreq = psp_dev_set_req_alloc();
+
+	psp_dev_set_req_set_id(sreq, dev_id);
+	psp_dev_set_req_set_psp_versions_ena(sreq, versions);
+
+	srsp = psp_dev_set(ys, sreq);
+	psp_dev_set_req_free(sreq);
+	if (!srsp)
+		return 10;
+
+	psp_dev_set_rsp_free(srsp);
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	struct psp_dev_get_list *dev_list;
+	bool devid_found = false;
+	__u32 ver_ena, ver_cap;
+	struct opts opts = {};
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+	int first_id = 0;
+	int ret;
+
+	parse_cmd_opts(argc, argv, &opts);
+	if (!opts.port)
+		usage(argv[0], "port"); // exits
+
+	ys = ynl_sock_create(&ynl_psp_family, &yerr);
+	if (!ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return 1;
+	}
+
+	dev_list = psp_dev_get_dump(ys);
+	if (ynl_dump_empty(dev_list)) {
+		if (ys->err.code)
+			goto err_close;
+		fprintf(stderr, "No PSP devices\n");
+		goto err_close_silent;
+	}
+
+	ynl_dump_foreach(dev_list, d) {
+		if (opts.devid) {
+			devid_found = true;
+			ver_ena = d->psp_versions_ena;
+			ver_cap = d->psp_versions_cap;
+		} else if (!first_id) {
+			first_id = d->id;
+			ver_ena = d->psp_versions_ena;
+			ver_cap = d->psp_versions_cap;
+		} else {
+			fprintf(stderr, "Multiple PSP devices found\n");
+			goto err_close_silent;
+		}
+	}
+	psp_dev_get_list_free(dev_list);
+
+	if (opts.devid && !devid_found) {
+		fprintf(stderr, "PSP device %d requested on cmdline, not found\n",
+			opts.devid);
+		goto err_close_silent;
+	} else if (!opts.devid) {
+		opts.devid = first_id;
+	}
+
+	if (ver_ena != ver_cap) {
+		ret = psp_dev_set_ena(ys, opts.devid, ver_cap);
+		if (ret)
+			goto err_close;
+	}
+
+	ret = run_responder(ys, &opts);
+
+	if (ver_ena != ver_cap && psp_dev_set_ena(ys, opts.devid, ver_ena))
+		fprintf(stderr, "WARN: failed to set the PSP versions back\n");
+
+	ynl_sock_destroy(ys);
+
+	return ret;
+
+err_close:
+	fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_close_silent:
+	ynl_sock_destroy(ys);
+	return 2;
+}
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
new file mode 100755
index 000000000000..236005290a33
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_disruptive, ksft_exit, ksft_run
+from lib.py import ksft_eq, ksft_not_in, ksft_raises, KsftSkipEx, KsftFailEx
+from lib.py import EthtoolFamily, NetdevFamily, NlError
+from lib.py import NetDrvEnv
+from lib.py import bkg, cmd, defer, ip
+import errno
+import glob
+import os
+import socket
+import struct
+
+def sys_get_queues(ifname, qtype='rx') -> int:
+    folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*')
+    return len(folders)
+
+
+def nl_get_queues(cfg, nl, qtype='rx'):
+    queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
+    if queues:
+        return len([q for q in queues if q['type'] == qtype])
+    return None
+
+
+def check_xsk(cfg, nl, xdp_queue_id=0) -> None:
+    # Probe for support
+    xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
+    if xdp.ret == 255:
+        raise KsftSkipEx('AF_XDP unsupported')
+    elif xdp.ret > 0:
+        raise KsftFailEx('unable to create AF_XDP socket')
+
+    with bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}',
+             ksft_wait=3):
+
+        rx = tx = False
+
+        queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
+        if not queues:
+            raise KsftSkipEx("Netlink reports no queues")
+
+        for q in queues:
+            if q['id'] == 0:
+                if q['type'] == 'rx':
+                    rx = True
+                if q['type'] == 'tx':
+                    tx = True
+
+                ksft_eq(q.get('xsk', None), {},
+                        comment="xsk attr on queue we configured")
+            else:
+                ksft_not_in('xsk', q,
+                            comment="xsk attr on queue we didn't configure")
+
+        ksft_eq(rx, True)
+        ksft_eq(tx, True)
+
+
+def get_queues(cfg, nl) -> None:
+    snl = NetdevFamily(recv_size=4096)
+
+    for qtype in ['rx', 'tx']:
+        queues = nl_get_queues(cfg, snl, qtype)
+        if not queues:
+            raise KsftSkipEx('queue-get not supported by device')
+
+        expected = sys_get_queues(cfg.dev['ifname'], qtype)
+        ksft_eq(queues, expected)
+
+
+def addremove_queues(cfg, nl) -> None:
+    queues = nl_get_queues(cfg, nl)
+    if not queues:
+        raise KsftSkipEx('queue-get not supported by device')
+
+    curr_queues = sys_get_queues(cfg.dev['ifname'])
+    if curr_queues == 1:
+        raise KsftSkipEx('cannot decrement queue: already at 1')
+
+    netnl = EthtoolFamily()
+    channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+    rx_type = 'rx'
+    if channels.get('combined-count', 0) > 0:
+            rx_type = 'combined'
+
+    expected = curr_queues - 1
+    cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10)
+    queues = nl_get_queues(cfg, nl)
+    ksft_eq(queues, expected)
+
+    expected = curr_queues
+    cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10)
+    queues = nl_get_queues(cfg, nl)
+    ksft_eq(queues, expected)
+
+
+@ksft_disruptive
+def check_down(cfg, nl) -> None:
+    # Check the NAPI IDs before interface goes down and hides them
+    napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True)
+
+    ip(f"link set dev {cfg.dev['ifname']} down")
+    defer(ip, f"link set dev {cfg.dev['ifname']} up")
+
+    with ksft_raises(NlError) as cm:
+        nl.queue_get({'ifindex': cfg.ifindex, 'id': 0, 'type': 'rx'})
+    ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT)
+
+    if napis:
+        with ksft_raises(NlError) as cm:
+            nl.napi_get({'id': napis[0]['id']})
+        ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT)
+
+
+def main() -> None:
+    with NetDrvEnv(__file__, queue_count=100) as cfg:
+        ksft_run([get_queues, addremove_queues, check_down, check_xsk],
+                 args=(cfg, NetdevFamily()))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/ring_reconfig.py b/tools/testing/selftests/drivers/net/ring_reconfig.py
new file mode 100755
index 000000000000..f9530a8b0856
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ring_reconfig.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Test channel and ring size configuration via ethtool (-L / -G).
+"""
+
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_eq
+from lib.py import NetDrvEpEnv, EthtoolFamily, GenerateTraffic
+from lib.py import defer, NlError
+
+
+def channels(cfg) -> None:
+    """
+    Twiddle channel counts in various combinations of parameters.
+    We're only looking for driver adhering to the requested config
+    if the config is accepted and crashes.
+    """
+    ehdr = {'header':{'dev-index': cfg.ifindex}}
+    chans = cfg.eth.channels_get(ehdr)
+
+    all_keys = ["rx", "tx", "combined"]
+    mixes = [{"combined"}, {"rx", "tx"}, {"rx", "combined"}, {"tx", "combined"},
+             {"rx", "tx", "combined"},]
+
+    # Get the set of keys that device actually supports
+    restore = {}
+    supported = set()
+    for key in all_keys:
+        if key + "-max" in chans:
+            supported.add(key)
+            restore |= {key + "-count": chans[key + "-count"]}
+
+    defer(cfg.eth.channels_set, ehdr | restore)
+
+    def test_config(config):
+        try:
+            cfg.eth.channels_set(ehdr | config)
+            get = cfg.eth.channels_get(ehdr)
+            for k, v in config.items():
+                ksft_eq(get.get(k, 0), v)
+        except NlError as e:
+            failed.append(mix)
+            ksft_pr("Can't set", config, e)
+        else:
+            ksft_pr("Okay", config)
+
+    failed = []
+    for mix in mixes:
+        if not mix.issubset(supported):
+            continue
+
+        # Set all the values in the mix to 1, other supported to 0
+        config = {}
+        for key in all_keys:
+            config[key + "-count"] = 1 if key in mix else 0
+        test_config(config)
+
+    for mix in mixes:
+        if not mix.issubset(supported):
+            continue
+        if mix in failed:
+            continue
+
+        # Set all the values in the mix to max, other supported to 0
+        config = {}
+        for key in all_keys:
+            config[key + "-count"] = chans[key + '-max'] if key in mix else 0
+        test_config(config)
+
+
+def _configure_min_ring_cnt(cfg) -> None:
+    """ Try to configure a single Rx/Tx ring. """
+    ehdr = {'header':{'dev-index': cfg.ifindex}}
+    chans = cfg.eth.channels_get(ehdr)
+
+    all_keys = ["rx-count", "tx-count", "combined-count"]
+    restore = {}
+    config = {}
+    for key in all_keys:
+        if key in chans:
+            restore[key] = chans[key]
+            config[key] = 0
+
+    if chans.get('combined-count', 0) > 1:
+        config['combined-count'] = 1
+    elif chans.get('rx-count', 0) > 1 and chans.get('tx-count', 0) > 1:
+        config['tx-count'] = 1
+        config['rx-count'] = 1
+    else:
+        # looks like we're already on 1 channel
+        return
+
+    cfg.eth.channels_set(ehdr | config)
+    defer(cfg.eth.channels_set, ehdr | restore)
+
+
+def ringparam(cfg) -> None:
+    """
+    Tweak the ringparam configuration. Try to run some traffic over min
+    ring size to make sure it actually functions.
+    """
+    ehdr = {'header':{'dev-index': cfg.ifindex}}
+    rings = cfg.eth.rings_get(ehdr)
+
+    restore = {}
+    maxes = {}
+    params = set()
+    for key in rings.keys():
+        if 'max' in key:
+            param = key[:-4]
+            maxes[param] = rings[key]
+            params.add(param)
+            restore[param] = rings[param]
+
+    defer(cfg.eth.rings_set, ehdr | restore)
+
+    # Speed up the reconfig by configuring just one ring
+    _configure_min_ring_cnt(cfg)
+
+    # Try to reach min on all settings
+    for param in params:
+        val = rings[param]
+        while True:
+            try:
+                cfg.eth.rings_set({'header':{'dev-index': cfg.ifindex},
+                                   param: val // 2})
+                if val == 0:
+                    break
+                val //= 2
+            except NlError:
+                break
+
+        get = cfg.eth.rings_get(ehdr)
+        ksft_eq(get[param], val)
+
+        ksft_pr(f"Reached min for '{param}' at {val} (max {rings[param]})")
+
+    GenerateTraffic(cfg).wait_pkts_and_stop(10000)
+
+    # Try max across all params, if the driver supports large rings
+    # this may OOM so we ignore errors
+    try:
+        ksft_pr("Applying max settings")
+        config = {p: maxes[p] for p in params}
+        cfg.eth.rings_set(ehdr | config)
+    except NlError as e:
+        ksft_pr("Can't set max params", config, e)
+    else:
+        GenerateTraffic(cfg).wait_pkts_and_stop(10000)
+
+
+def main() -> None:
+    """ Ksft boiler plate main """
+
+    with NetDrvEpEnv(__file__) as cfg:
+        cfg.eth = EthtoolFamily()
+
+        ksft_run([channels,
+                  ringparam],
+                 args=(cfg, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/shaper.py b/tools/testing/selftests/drivers/net/shaper.py
new file mode 100755
index 000000000000..11310f19bfa0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/shaper.py
@@ -0,0 +1,461 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_true, KsftSkipEx
+from lib.py import EthtoolFamily, NetshaperFamily
+from lib.py import NetDrvEnv
+from lib.py import NlError
+from lib.py import cmd
+
+def get_shapers(cfg, nl_shaper) -> None:
+    try:
+        shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    except NlError as e:
+        if e.error == 95:
+            raise KsftSkipEx("shapers not supported by the device")
+        raise
+
+    # Default configuration: no shapers configured.
+    ksft_eq(len(shapers), 0)
+
+def get_caps(cfg, nl_shaper) -> None:
+    try:
+        caps = nl_shaper.cap_get({'ifindex': cfg.ifindex}, dump=True)
+    except NlError as e:
+        if e.error == 95:
+            raise KsftSkipEx("shapers not supported by the device")
+        raise
+
+    # Each device implementing shaper support must support some
+    # features in at least a scope.
+    ksft_true(len(caps)> 0)
+
+def set_qshapers(cfg, nl_shaper) -> None:
+    try:
+        caps = nl_shaper.cap_get({'ifindex': cfg.ifindex,
+                                 'scope':'queue'})
+    except NlError as e:
+        if e.error == 95:
+            raise KsftSkipEx("shapers not supported by the device")
+        raise
+    if not 'support-bw-max' in caps or not 'support-metric-bps' in caps:
+        raise KsftSkipEx("device does not support queue scope shapers with bw_max and metric bps")
+
+    cfg.queues = True;
+    netnl = EthtoolFamily()
+    channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+    if channels['combined-count'] == 0:
+        cfg.rx_type = 'rx'
+        cfg.nr_queues = channels['rx-count']
+    else:
+        cfg.rx_type = 'combined'
+        cfg.nr_queues = channels['combined-count']
+    if cfg.nr_queues < 3:
+        raise KsftSkipEx(f"device does not support enough queues min 3 found {cfg.nr_queues}")
+
+    nl_shaper.set({'ifindex': cfg.ifindex,
+                   'handle': {'scope': 'queue', 'id': 1},
+                   'metric': 'bps',
+                   'bw-max': 10000})
+    nl_shaper.set({'ifindex': cfg.ifindex,
+                   'handle': {'scope': 'queue', 'id': 2},
+                   'metric': 'bps',
+                   'bw-max': 20000})
+
+    # Querying a specific shaper not yet configured must fail.
+    raised = False
+    try:
+        shaper_q0 = nl_shaper.get({'ifindex': cfg.ifindex,
+                                   'handle': {'scope': 'queue', 'id': 0}})
+    except (NlError):
+        raised = True
+    ksft_eq(raised, True)
+
+    shaper_q1 = nl_shaper.get({'ifindex': cfg.ifindex,
+                              'handle': {'scope': 'queue', 'id': 1}})
+    ksft_eq(shaper_q1, {'ifindex': cfg.ifindex,
+                        'parent': {'scope': 'netdev'},
+                        'handle': {'scope': 'queue', 'id': 1},
+                        'metric': 'bps',
+                        'bw-max': 10000})
+
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'netdev'},
+                       'handle': {'scope': 'queue', 'id': 1},
+                       'metric': 'bps',
+                       'bw-max': 10000},
+                      {'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'netdev'},
+                       'handle': {'scope': 'queue', 'id': 2},
+                       'metric': 'bps',
+                       'bw-max': 20000}])
+
+def del_qshapers(cfg, nl_shaper) -> None:
+    if not cfg.queues:
+        raise KsftSkipEx("queue shapers not supported by device, skipping delete")
+
+    nl_shaper.delete({'ifindex': cfg.ifindex,
+                      'handle': {'scope': 'queue', 'id': 2}})
+    nl_shaper.delete({'ifindex': cfg.ifindex,
+                      'handle': {'scope': 'queue', 'id': 1}})
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(len(shapers), 0)
+
+def set_nshapers(cfg, nl_shaper) -> None:
+    # Check required features.
+    try:
+        caps = nl_shaper.cap_get({'ifindex': cfg.ifindex,
+                                  'scope':'netdev'})
+    except NlError as e:
+        if e.error == 95:
+            raise KsftSkipEx("shapers not supported by the device")
+        raise
+    if not 'support-bw-max' in caps or not 'support-metric-bps' in caps:
+        raise KsftSkipEx("device does not support nested netdev scope shapers with weight")
+
+    cfg.netdev = True;
+    nl_shaper.set({'ifindex': cfg.ifindex,
+                   'handle': {'scope': 'netdev', 'id': 0},
+                   'bw-max': 100000})
+
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+                       'handle': {'scope': 'netdev'},
+                       'metric': 'bps',
+                       'bw-max': 100000}])
+
+def del_nshapers(cfg, nl_shaper) -> None:
+    if not cfg.netdev:
+        raise KsftSkipEx("netdev shaper not supported by device, skipping delete")
+
+    nl_shaper.delete({'ifindex': cfg.ifindex,
+                      'handle': {'scope': 'netdev'}})
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(len(shapers), 0)
+
+def basic_groups(cfg, nl_shaper) -> None:
+    if not cfg.netdev:
+        raise KsftSkipEx("netdev shaper not supported by the device")
+    if cfg.nr_queues < 3:
+        raise KsftSkipEx(f"netdev does not have enough queues min 3 reported {cfg.nr_queues}")
+
+    try:
+        caps = nl_shaper.cap_get({'ifindex': cfg.ifindex,
+                                  'scope':'queue'})
+    except NlError as e:
+        if e.error == 95:
+            raise KsftSkipEx("shapers not supported by the device")
+        raise
+    if not 'support-weight' in caps:
+        raise KsftSkipEx("device does not support queue scope shapers with weight")
+
+    node_handle = nl_shaper.group({
+                        'ifindex': cfg.ifindex,
+                        'leaves':[{'handle': {'scope': 'queue', 'id': 1},
+                                   'weight': 1},
+                                  {'handle': {'scope': 'queue', 'id': 2},
+                                   'weight': 2}],
+                         'handle': {'scope':'netdev'},
+                         'metric': 'bps',
+                         'bw-max': 10000})
+    ksft_eq(node_handle, {'ifindex': cfg.ifindex,
+                          'handle': {'scope': 'netdev'}})
+
+    shaper = nl_shaper.get({'ifindex': cfg.ifindex,
+                            'handle': {'scope': 'queue', 'id': 1}})
+    ksft_eq(shaper, {'ifindex': cfg.ifindex,
+                     'parent': {'scope': 'netdev'},
+                     'handle': {'scope': 'queue', 'id': 1},
+                     'weight': 1 })
+
+    nl_shaper.delete({'ifindex': cfg.ifindex,
+                      'handle': {'scope': 'queue', 'id': 2}})
+    nl_shaper.delete({'ifindex': cfg.ifindex,
+                      'handle': {'scope': 'queue', 'id': 1}})
+
+    # Deleting all the leaves shaper does not affect the node one
+    # when the latter has 'netdev' scope.
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(len(shapers), 1)
+
+    nl_shaper.delete({'ifindex': cfg.ifindex,
+                      'handle': {'scope': 'netdev'}})
+
+def qgroups(cfg, nl_shaper) -> None:
+    if cfg.nr_queues < 4:
+        raise KsftSkipEx(f"netdev does not have enough queues min 4 reported {cfg.nr_queues}")
+    try:
+        caps = nl_shaper.cap_get({'ifindex': cfg.ifindex,
+                                  'scope':'node'})
+    except NlError as e:
+        if e.error == 95:
+            raise KsftSkipEx("shapers not supported by the device")
+        raise
+    if not 'support-bw-max' in caps or not 'support-metric-bps' in caps:
+        raise KsftSkipEx("device does not support node scope shapers with bw_max and metric bps")
+    try:
+        caps = nl_shaper.cap_get({'ifindex': cfg.ifindex,
+                                  'scope':'queue'})
+    except NlError as e:
+        if e.error == 95:
+            raise KsftSkipEx("shapers not supported by the device")
+        raise
+    if not 'support-nesting' in caps or not 'support-weight' in caps or not 'support-metric-bps' in caps:
+            raise KsftSkipEx("device does not support nested queue scope shapers with weight")
+
+    cfg.groups = True;
+    node_handle = nl_shaper.group({
+                   'ifindex': cfg.ifindex,
+                   'leaves':[{'handle': {'scope': 'queue', 'id': 1},
+                              'weight': 3},
+                             {'handle': {'scope': 'queue', 'id': 2},
+                              'weight': 2}],
+                   'handle': {'scope':'node'},
+                   'metric': 'bps',
+                   'bw-max': 10000})
+    node_id = node_handle['handle']['id']
+
+    shaper = nl_shaper.get({'ifindex': cfg.ifindex,
+                            'handle': {'scope': 'queue', 'id': 1}})
+    ksft_eq(shaper, {'ifindex': cfg.ifindex,
+                     'parent': {'scope': 'node', 'id': node_id},
+                     'handle': {'scope': 'queue', 'id': 1},
+                     'weight': 3})
+    shaper = nl_shaper.get({'ifindex': cfg.ifindex,
+                            'handle': {'scope': 'node', 'id': node_id}})
+    ksft_eq(shaper, {'ifindex': cfg.ifindex,
+                     'handle': {'scope': 'node', 'id': node_id},
+                     'parent': {'scope': 'netdev'},
+                     'metric': 'bps',
+                     'bw-max': 10000})
+
+    # Grouping to a specified, not existing node scope shaper must fail
+    raised = False
+    try:
+        nl_shaper.group({
+                   'ifindex': cfg.ifindex,
+                   'leaves':[{'handle': {'scope': 'queue', 'id': 3},
+                              'weight': 3}],
+                   'handle': {'scope':'node', 'id': node_id + 1},
+                   'metric': 'bps',
+                   'bw-max': 10000})
+
+    except (NlError):
+        raised = True
+    ksft_eq(raised, True)
+
+    # Add to an existing node
+    node_handle = nl_shaper.group({
+                   'ifindex': cfg.ifindex,
+                   'leaves':[{'handle': {'scope': 'queue', 'id': 3},
+                              'weight': 4}],
+                   'handle': {'scope':'node', 'id': node_id}})
+    ksft_eq(node_handle, {'ifindex': cfg.ifindex,
+                          'handle': {'scope': 'node', 'id': node_id}})
+
+    shaper = nl_shaper.get({'ifindex': cfg.ifindex,
+                            'handle': {'scope': 'queue', 'id': 3}})
+    ksft_eq(shaper, {'ifindex': cfg.ifindex,
+                     'parent': {'scope': 'node', 'id': node_id},
+                     'handle': {'scope': 'queue', 'id': 3},
+                     'weight': 4})
+
+    nl_shaper.delete({'ifindex': cfg.ifindex,
+                      'handle': {'scope': 'queue', 'id': 2}})
+    nl_shaper.delete({'ifindex': cfg.ifindex,
+                      'handle': {'scope': 'queue', 'id': 1}})
+
+    # Deleting a non empty node will move the leaves downstream.
+    nl_shaper.delete({'ifindex': cfg.ifindex,
+                      'handle': {'scope': 'node', 'id': node_id}})
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'netdev'},
+                       'handle': {'scope': 'queue', 'id': 3},
+                       'weight': 4}])
+
+    # Finish and verify the complete cleanup.
+    nl_shaper.delete({'ifindex': cfg.ifindex,
+                      'handle': {'scope': 'queue', 'id': 3}})
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(len(shapers), 0)
+
+def delegation(cfg, nl_shaper) -> None:
+    if not cfg.groups:
+        raise KsftSkipEx("device does not support node scope")
+    try:
+        caps = nl_shaper.cap_get({'ifindex': cfg.ifindex,
+                                  'scope':'node'})
+    except NlError as e:
+        if e.error == 95:
+            raise KsftSkipEx("node scope shapers not supported by the device")
+        raise
+    if not 'support-nesting' in caps:
+        raise KsftSkipEx("device does not support node scope shapers nesting")
+
+    node_handle = nl_shaper.group({
+                   'ifindex': cfg.ifindex,
+                   'leaves':[{'handle': {'scope': 'queue', 'id': 1},
+                              'weight': 3},
+                             {'handle': {'scope': 'queue', 'id': 2},
+                              'weight': 2},
+                             {'handle': {'scope': 'queue', 'id': 3},
+                              'weight': 1}],
+                   'handle': {'scope':'node'},
+                   'metric': 'bps',
+                   'bw-max': 10000})
+    node_id = node_handle['handle']['id']
+
+    # Create the nested node and validate the hierarchy
+    nested_node_handle = nl_shaper.group({
+                   'ifindex': cfg.ifindex,
+                   'leaves':[{'handle': {'scope': 'queue', 'id': 1},
+                              'weight': 3},
+                             {'handle': {'scope': 'queue', 'id': 2},
+                              'weight': 2}],
+                   'handle': {'scope':'node'},
+                   'metric': 'bps',
+                   'bw-max': 5000})
+    nested_node_id = nested_node_handle['handle']['id']
+    ksft_true(nested_node_id != node_id)
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'node', 'id': nested_node_id},
+                       'handle': {'scope': 'queue', 'id': 1},
+                       'weight': 3},
+                      {'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'node', 'id': nested_node_id},
+                       'handle': {'scope': 'queue', 'id': 2},
+                       'weight': 2},
+                      {'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'node', 'id': node_id},
+                       'handle': {'scope': 'queue', 'id': 3},
+                       'weight': 1},
+                      {'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'netdev'},
+                       'handle': {'scope': 'node', 'id': node_id},
+                       'metric': 'bps',
+                       'bw-max': 10000},
+                      {'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'node', 'id': node_id},
+                       'handle': {'scope': 'node', 'id': nested_node_id},
+                       'metric': 'bps',
+                       'bw-max': 5000}])
+
+    # Deleting a non empty node will move the leaves downstream.
+    nl_shaper.delete({'ifindex': cfg.ifindex,
+                      'handle': {'scope': 'node', 'id': nested_node_id}})
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'node', 'id': node_id},
+                       'handle': {'scope': 'queue', 'id': 1},
+                       'weight': 3},
+                      {'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'node', 'id': node_id},
+                       'handle': {'scope': 'queue', 'id': 2},
+                       'weight': 2},
+                      {'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'node', 'id': node_id},
+                       'handle': {'scope': 'queue', 'id': 3},
+                       'weight': 1},
+                      {'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'netdev'},
+                       'handle': {'scope': 'node', 'id': node_id},
+                       'metric': 'bps',
+                       'bw-max': 10000}])
+
+    # Final cleanup.
+    for i in range(1, 4):
+        nl_shaper.delete({'ifindex': cfg.ifindex,
+                          'handle': {'scope': 'queue', 'id': i}})
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(len(shapers), 0)
+
+def queue_update(cfg, nl_shaper) -> None:
+    if cfg.nr_queues < 4:
+        raise KsftSkipEx(f"netdev does not have enough queues min 4 reported {cfg.nr_queues}")
+    if not cfg.queues:
+        raise KsftSkipEx("device does not support queue scope")
+
+    for i in range(3):
+        nl_shaper.set({'ifindex': cfg.ifindex,
+                       'handle': {'scope': 'queue', 'id': i},
+                       'metric': 'bps',
+                       'bw-max': (i + 1) * 1000})
+    # Delete a channel, with no shapers configured on top of the related
+    # queue: no changes expected
+    cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} 3", timeout=10)
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'netdev'},
+                       'handle': {'scope': 'queue', 'id': 0},
+                       'metric': 'bps',
+                       'bw-max': 1000},
+                      {'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'netdev'},
+                       'handle': {'scope': 'queue', 'id': 1},
+                       'metric': 'bps',
+                       'bw-max': 2000},
+                      {'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'netdev'},
+                       'handle': {'scope': 'queue', 'id': 2},
+                       'metric': 'bps',
+                       'bw-max': 3000}])
+
+    # Delete a channel, with a shaper configured on top of the related
+    # queue: the shaper must be deleted, too
+    cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} 2", timeout=10)
+
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'netdev'},
+                       'handle': {'scope': 'queue', 'id': 0},
+                       'metric': 'bps',
+                       'bw-max': 1000},
+                      {'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'netdev'},
+                       'handle': {'scope': 'queue', 'id': 1},
+                       'metric': 'bps',
+                       'bw-max': 2000}])
+
+    # Restore the original channels number, no expected changes
+    cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} {cfg.nr_queues}", timeout=10)
+    shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True)
+    ksft_eq(shapers, [{'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'netdev'},
+                       'handle': {'scope': 'queue', 'id': 0},
+                       'metric': 'bps',
+                       'bw-max': 1000},
+                      {'ifindex': cfg.ifindex,
+                       'parent': {'scope': 'netdev'},
+                       'handle': {'scope': 'queue', 'id': 1},
+                       'metric': 'bps',
+                       'bw-max': 2000}])
+
+    # Final cleanup.
+    for i in range(0, 2):
+        nl_shaper.delete({'ifindex': cfg.ifindex,
+                          'handle': {'scope': 'queue', 'id': i}})
+
+def main() -> None:
+    with NetDrvEnv(__file__, queue_count=4) as cfg:
+        cfg.queues = False
+        cfg.netdev = False
+        cfg.groups = False
+        cfg.nr_queues = 0
+        ksft_run([get_shapers,
+                  get_caps,
+                  set_qshapers,
+                  del_qshapers,
+                  set_nshapers,
+                  del_nshapers,
+                  basic_groups,
+                  qgroups,
+                  delegation,
+                  queue_update], args=(cfg, NetshaperFamily()))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
new file mode 100755
index 000000000000..b08e4d48b15c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Tests related to standard netdevice statistics.
+"""
+
+import errno
+import subprocess
+import time
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import ksft_disruptive
+from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
+from lib.py import NetDrvEnv
+from lib.py import cmd, ip, defer
+
+ethnl = EthtoolFamily()
+netfam = NetdevFamily()
+rtnl = RtnlFamily()
+
+
+def check_pause(cfg) -> None:
+    """
+    Check that drivers which support Pause config also report standard
+    pause stats.
+    """
+
+    try:
+        ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
+    except NlError as e:
+        if e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("pause not supported by the device") from e
+        raise
+
+    data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
+                                       "flags": {'stats'}}})
+    ksft_true(data['stats'], "driver does not report stats")
+
+
+def check_fec(cfg) -> None:
+    """
+    Check that drivers which support FEC config also report standard
+    FEC stats.
+    """
+
+    try:
+        ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
+    except NlError as e:
+        if e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("FEC not supported by the device") from e
+        raise
+
+    data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
+                                     "flags": {'stats'}}})
+    ksft_true(data['stats'], "driver does not report stats")
+
+
+def check_fec_hist(cfg) -> None:
+    """
+    Check that drivers which support FEC histogram statistics report
+    reasonable values.
+    """
+
+    try:
+        data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
+                                         "flags": {'stats'}}})
+    except NlError as e:
+        if e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("FEC not supported by the device") from e
+        raise
+    if 'stats' not in data:
+        raise KsftSkipEx("FEC stats not supported by the device")
+    if 'hist' not in data['stats']:
+        raise KsftSkipEx("FEC histogram not supported by the device")
+
+    hist = data['stats']['hist']
+    for fec_bin in hist:
+        for key in ['bin-low', 'bin-high', 'bin-val']:
+            ksft_in(key, fec_bin,
+	            "Drivers should always report FEC bin range and value")
+        ksft_ge(fec_bin['bin-high'], fec_bin['bin-low'],
+                "FEC bin range should be valid")
+        if 'bin-val-per-lane' in fec_bin:
+            ksft_eq(sum(fec_bin['bin-val-per-lane']), fec_bin['bin-val'],
+                    "FEC bin value should be equal to sum of per-plane values")
+
+
+def pkt_byte_sum(cfg) -> None:
+    """
+    Check that qstat and interface stats match in value.
+    """
+
+    def get_qstat(test):
+        stats = netfam.qstats_get({}, dump=True)
+        if stats:
+            for qs in stats:
+                if qs["ifindex"]== test.ifindex:
+                    return qs
+        return None
+
+    qstat = get_qstat(cfg)
+    if qstat is None:
+        raise KsftSkipEx("qstats not supported by the device")
+
+    for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
+        ksft_in(key, qstat, "Drivers should always report basic keys")
+
+    # Compare stats, rtnl stats and qstats must match,
+    # but the interface may be up, so do a series of dumps
+    # each time the more "recent" stats must be higher or same.
+    def stat_cmp(rstat, qstat):
+        for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
+            if rstat[key] != qstat[key]:
+                return rstat[key] - qstat[key]
+        return 0
+
+    for _ in range(10):
+        rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+        if stat_cmp(rtstat, qstat) < 0:
+            raise KsftFailEx("RTNL stats are lower, fetched later")
+        qstat = get_qstat(cfg)
+        if stat_cmp(rtstat, qstat) > 0:
+            raise KsftFailEx("Qstats are lower, fetched later")
+
+
+def qstat_by_ifindex(cfg) -> None:
+    """ Qstats Netlink API tests - querying by ifindex. """
+
+    # Construct a map ifindex -> [dump, by-index, dump]
+    ifindexes = {}
+    stats = netfam.qstats_get({}, dump=True)
+    for entry in stats:
+        ifindexes[entry['ifindex']] = [entry, None, None]
+
+    for ifindex in ifindexes:
+        entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
+        ksft_eq(len(entry), 1)
+        ifindexes[entry[0]['ifindex']][1] = entry[0]
+
+    stats = netfam.qstats_get({}, dump=True)
+    for entry in stats:
+        ifindexes[entry['ifindex']][2] = entry
+
+    if len(ifindexes) == 0:
+        raise KsftSkipEx("No ifindex supports qstats")
+
+    # Now make sure the stats match/make sense
+    for ifindex, triple in ifindexes.items():
+        all_keys = triple[0].keys() | triple[1].keys() | triple[2].keys()
+
+        for key in all_keys:
+            ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key)
+            ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key)
+
+    # Sanity check the dumps
+    queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True)
+    # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}}
+    parsed = {}
+    for entry in queues:
+        ifindex = entry["ifindex"]
+        if ifindex not in parsed:
+            parsed[ifindex] = {"rx":[], "tx": []}
+        parsed[ifindex][entry["queue-type"]].append(entry['queue-id'])
+    # Now, validate
+    for ifindex, queues in parsed.items():
+        for qtype in ['rx', 'tx']:
+            ksft_eq(len(queues[qtype]), len(set(queues[qtype])),
+                    comment="repeated queue keys")
+            ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1,
+                    comment="missing queue keys")
+
+    # Test invalid dumps
+    # 0 is invalid
+    with ksft_raises(NlError) as cm:
+        netfam.qstats_get({"ifindex": 0}, dump=True)
+    ksft_eq(cm.exception.nl_msg.error, -34)
+    ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+    # loopback has no stats
+    with ksft_raises(NlError) as cm:
+        netfam.qstats_get({"ifindex": 1}, dump=True)
+    ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP)
+    ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+    # Try to get stats for lowest unused ifindex but not 0
+    devs = rtnl.getlink({}, dump=True)
+    all_ifindexes = set(dev["ifi-index"] for dev in devs)
+    lowest = 2
+    while lowest in all_ifindexes:
+        lowest += 1
+
+    with ksft_raises(NlError) as cm:
+        netfam.qstats_get({"ifindex": lowest}, dump=True)
+    ksft_eq(cm.exception.nl_msg.error, -19)
+    ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+
+@ksft_disruptive
+def check_down(cfg) -> None:
+    """ Test statistics (interface and qstat) are not impacted by ifdown """
+
+    try:
+        qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+    except NlError as e:
+        if e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("qstats not supported by the device") from e
+        raise
+
+    ip(f"link set dev {cfg.dev['ifname']} down")
+    defer(ip, f"link set dev {cfg.dev['ifname']} up")
+
+    qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+    for k in qstat:
+        ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
+
+    # exercise per-queue API to make sure that "device down" state
+    # is handled correctly and doesn't crash
+    netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
+
+
+def __run_inf_loop(body):
+    body = body.strip()
+    if body[-1] != ';':
+        body += ';'
+
+    return subprocess.Popen(f"while true; do {body} done", shell=True,
+                            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+
+def __stats_increase_sanely(old, new) -> None:
+    for k in old.keys():
+        ksft_ge(new[k], old[k])
+        ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error")
+
+
+def procfs_hammer(cfg) -> None:
+    """
+    Reading stats via procfs only holds the RCU lock, which is not an exclusive
+    lock, make sure drivers can handle parallel reads of stats.
+    """
+    one = __run_inf_loop("cat /proc/net/dev")
+    defer(one.kill)
+    two = __run_inf_loop("cat /proc/net/dev")
+    defer(two.kill)
+
+    time.sleep(1)
+    # Make sure the processes are running
+    ksft_is(one.poll(), None)
+    ksft_is(two.poll(), None)
+
+    rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+    time.sleep(2)
+    rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+    __stats_increase_sanely(rtstat1, rtstat2)
+    # defers will kill the loops
+
+
+@ksft_disruptive
+def procfs_downup_hammer(cfg) -> None:
+    """
+    Reading stats via procfs only holds the RCU lock, drivers often try
+    to sleep when reading the stats, or don't protect against races.
+    """
+    # Set a large number of queues,
+    # we'll flip between min(max_queues, 64) and 1
+    channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+    if channels['combined-count'] == 0:
+        rx_type = 'rx'
+    else:
+        rx_type = 'combined'
+    cur_queue_cnt = channels[f'{rx_type}-count']
+    max_queue_cnt = min(channels[f'{rx_type}-max'], 64)
+
+    cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
+    defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
+
+    # Real test stats
+    stats = __run_inf_loop("cat /proc/net/dev")
+    defer(stats.kill)
+
+    ipset = f"ip link set dev {cfg.ifname}"
+    defer(ip, f"link set dev {cfg.ifname} up")
+    # The "echo -n 1" lets us count iterations below
+    updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \
+             f"ethtool -L {cfg.ifname} {rx_type} 1; " + \
+             f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \
+              "echo -n 1"
+    updown = __run_inf_loop(updown)
+    kill_updown = defer(updown.kill)
+
+    time.sleep(1)
+    # Make sure the processes are running
+    ksft_is(stats.poll(), None)
+    ksft_is(updown.poll(), None)
+
+    rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+    # We're looking for crashes, give it extra time
+    time.sleep(9)
+    rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+    __stats_increase_sanely(rtstat1, rtstat2)
+
+    kill_updown.exec()
+    stdout, _ = updown.communicate(timeout=5)
+    ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8')))
+
+
+def main() -> None:
+    """ Ksft boiler plate main """
+
+    with NetDrvEnv(__file__, queue_count=100) as cfg:
+        ksft_run([check_pause, check_fec, check_fec_hist, pkt_byte_sum,
+		  qstat_by_ifindex, check_down, procfs_hammer,
+		  procfs_downup_hammer],
+                 args=(cfg, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
new file mode 100644
index 000000000000..1340b3df9c31
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for net selftests
+
+TEST_PROGS := \
+	dev_addr_lists.sh \
+	options.sh \
+	propagation.sh \
+# end of TEST_PROGS
+
+TEST_INCLUDES := \
+	../bonding/lag_lib.sh \
+	../../../net/forwarding/lib.sh \
+	../../../net/in_netns.sh \
+	../../../net/lib.sh \
+	../../../net/lib/sh/defer.sh \
+# end of TEST_INCLUDES
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config
new file mode 100644
index 000000000000..558e1d0cf565
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/config
@@ -0,0 +1,7 @@
+CONFIG_DUMMY=y
+CONFIG_IPV6=y
+CONFIG_MACVLAN=y
+CONFIG_NETDEVSIM=m
+CONFIG_NET_TEAM=y
+CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y
+CONFIG_NET_TEAM_MODE_LOADBALANCE=y
diff --git a/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh
new file mode 100755
index 000000000000..b1ec7755b783
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test team device handling of addr lists (dev->uc, mc)
+#
+
+ALL_TESTS="
+	team_cleanup
+"
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+source "$lib_dir"/../bonding/lag_lib.sh
+
+
+destroy()
+{
+	local ifnames=(dummy1 dummy2 team0 mv0)
+	local ifname
+
+	for ifname in "${ifnames[@]}"; do
+		ip link del "$ifname" &>/dev/null
+	done
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	destroy
+}
+
+
+team_cleanup()
+{
+	RET=0
+
+	test_LAG_cleanup "team" "lacp"
+}
+
+
+require_command teamd
+
+trap cleanup EXIT
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/team/options.sh b/tools/testing/selftests/drivers/net/team/options.sh
new file mode 100755
index 000000000000..44888f32b513
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/options.sh
@@ -0,0 +1,188 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# These tests verify basic set and get functionality of the team
+# driver options over netlink.
+
+# Run in private netns.
+test_dir="$(dirname "$0")"
+if [[ $# -eq 0 ]]; then
+        "${test_dir}"/../../../net/in_netns.sh "$0" __subprocess
+        exit $?
+fi
+
+ALL_TESTS="
+        team_test_options
+"
+
+source "${test_dir}/../../../net/lib.sh"
+
+TEAM_PORT="team0"
+MEMBER_PORT="dummy0"
+
+setup()
+{
+        ip link add name "${MEMBER_PORT}" type dummy
+        ip link add name "${TEAM_PORT}" type team
+}
+
+get_and_check_value()
+{
+        local option_name="$1"
+        local expected_value="$2"
+        local port_flag="$3"
+
+        local value_from_get
+
+        if ! value_from_get=$(teamnl "${TEAM_PORT}" getoption "${option_name}" \
+                        "${port_flag}"); then
+                echo "Could not get option '${option_name}'" >&2
+                return 1
+        fi
+
+        if [[ "${value_from_get}" != "${expected_value}" ]]; then
+                echo "Incorrect value for option '${option_name}'" >&2
+                echo "get (${value_from_get}) != set (${expected_value})" >&2
+                return 1
+        fi
+}
+
+set_and_check_get()
+{
+        local option_name="$1"
+        local option_value="$2"
+        local port_flag="$3"
+
+        local value_from_get
+
+        if ! teamnl "${TEAM_PORT}" setoption "${option_name}" \
+                        "${option_value}" "${port_flag}"; then
+                echo "'setoption ${option_name} ${option_value}' failed" >&2
+                return 1
+        fi
+
+        get_and_check_value "${option_name}" "${option_value}" "${port_flag}"
+        return $?
+}
+
+# Get a "port flag" to pass to the `teamnl` command.
+# E.g. $1="dummy0" -> "port=dummy0",
+#      $1=""       -> ""
+get_port_flag()
+{
+        local port_name="$1"
+
+        if [[ -n "${port_name}" ]]; then
+                echo "--port=${port_name}"
+        fi
+}
+
+attach_port_if_specified()
+{
+        local port_name="$1"
+
+        if [[ -n "${port_name}" ]]; then
+                ip link set dev "${port_name}" master "${TEAM_PORT}"
+                return $?
+        fi
+}
+
+detach_port_if_specified()
+{
+        local port_name="$1"
+
+        if [[ -n "${port_name}" ]]; then
+                ip link set dev "${port_name}" nomaster
+                return $?
+        fi
+}
+
+# Test that an option's get value matches its set value.
+# Globals:
+#   RET - Used by testing infra like `check_err`.
+#   EXIT_STATUS - Used by `log_test` for whole script exit value.
+# Arguments:
+#   option_name - The name of the option.
+#   value_1 - The first value to try setting.
+#   value_2 - The second value to try setting.
+#   port_name - The (optional) name of the attached port.
+team_test_option()
+{
+        local option_name="$1"
+        local value_1="$2"
+        local value_2="$3"
+        local possible_values="$2 $3 $2"
+        local port_name="$4"
+        local port_flag
+
+        RET=0
+
+        echo "Setting '${option_name}' to '${value_1}' and '${value_2}'"
+
+        attach_port_if_specified "${port_name}"
+        check_err $? "Couldn't attach ${port_name} to master"
+        port_flag=$(get_port_flag "${port_name}")
+
+        # Set and get both possible values.
+        for value in ${possible_values}; do
+                set_and_check_get "${option_name}" "${value}" "${port_flag}"
+                check_err $? "Failed to set '${option_name}' to '${value}'"
+        done
+
+        detach_port_if_specified "${port_name}"
+        check_err $? "Couldn't detach ${port_name} from its master"
+
+        log_test "Set + Get '${option_name}' test"
+}
+
+# Test that getting a non-existant option fails.
+# Globals:
+#   RET - Used by testing infra like `check_err`.
+#   EXIT_STATUS - Used by `log_test` for whole script exit value.
+# Arguments:
+#   option_name - The name of the option.
+#   port_name - The (optional) name of the attached port.
+team_test_get_option_fails()
+{
+        local option_name="$1"
+        local port_name="$2"
+        local port_flag
+
+        RET=0
+
+        attach_port_if_specified "${port_name}"
+        check_err $? "Couldn't attach ${port_name} to master"
+        port_flag=$(get_port_flag "${port_name}")
+
+        # Just confirm that getting the value fails.
+        teamnl "${TEAM_PORT}" getoption "${option_name}" "${port_flag}"
+        check_fail $? "Shouldn't be able to get option '${option_name}'"
+
+        detach_port_if_specified "${port_name}"
+
+        log_test "Get '${option_name}' fails"
+}
+
+team_test_options()
+{
+        # Wrong option name behavior.
+        team_test_get_option_fails fake_option1
+        team_test_get_option_fails fake_option2 "${MEMBER_PORT}"
+
+        # Correct set and get behavior.
+        team_test_option mode activebackup loadbalance
+        team_test_option notify_peers_count 0 5
+        team_test_option notify_peers_interval 0 5
+        team_test_option mcast_rejoin_count 0 5
+        team_test_option mcast_rejoin_interval 0 5
+        team_test_option enabled true false "${MEMBER_PORT}"
+        team_test_option user_linkup true false "${MEMBER_PORT}"
+        team_test_option user_linkup_enabled true false "${MEMBER_PORT}"
+        team_test_option priority 10 20 "${MEMBER_PORT}"
+        team_test_option queue_id 0 1 "${MEMBER_PORT}"
+}
+
+require_command teamnl
+setup
+tests_run
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/team/propagation.sh b/tools/testing/selftests/drivers/net/team/propagation.sh
new file mode 100755
index 000000000000..4bea75b79878
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/propagation.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+NSIM_LRO_ID=$((256 + RANDOM % 256))
+NSIM_LRO_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_LRO_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+
+cleanup()
+{
+	set +e
+	ip link del dummyteam &>/dev/null
+	ip link del team0 &>/dev/null
+	echo $NSIM_LRO_ID > $NSIM_DEV_SYS_DEL
+	modprobe -r netdevsim
+}
+
+# Trigger LRO propagation to the lower.
+# https://lore.kernel.org/netdev/aBvOpkIoxcr9PfDg@mini-arch/
+team_lro()
+{
+	# using netdevsim because it supports NETIF_F_LRO
+	NSIM_LRO_NAME=$(find $NSIM_LRO_SYS/net -maxdepth 1 -type d ! \
+		-path $NSIM_LRO_SYS/net -exec basename {} \;)
+
+	ip link add name team0 type team
+	ip link set $NSIM_LRO_NAME down
+	ip link set dev $NSIM_LRO_NAME master team0
+	ip link set team0 up
+	ethtool -K team0 large-receive-offload off
+
+	ip link del team0
+}
+
+# Trigger promisc propagation to the lower during IFLA_MASTER.
+# https://lore.kernel.org/netdev/20250506032328.3003050-1-sdf@fomichev.me/
+team_promisc()
+{
+	ip link add name dummyteam type dummy
+	ip link add name team0 type team
+	ip link set dummyteam down
+	ip link set team0 promisc on
+	ip link set dev dummyteam master team0
+	ip link set team0 up
+
+	ip link del team0
+	ip link del dummyteam
+}
+
+# Trigger promisc propagation to the lower via netif_change_flags (aka
+# ndo_change_rx_flags).
+# https://lore.kernel.org/netdev/20250514220319.3505158-1-stfomichev@gmail.com/
+team_change_flags()
+{
+	ip link add name dummyteam type dummy
+	ip link add name team0 type team
+	ip link set dummyteam down
+	ip link set dev dummyteam master team0
+	ip link set team0 up
+	ip link set team0 promisc on
+
+	# Make sure we can add more L2 addresses without any issues.
+	ip link add link team0 address 00:00:00:00:00:01 team0.1 type macvlan
+	ip link set team0.1 up
+
+	ip link del team0.1
+	ip link del team0
+	ip link del dummyteam
+}
+
+trap cleanup EXIT
+modprobe netdevsim || :
+echo $NSIM_LRO_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+team_lro
+team_promisc
+team_change_flags
diff --git a/tools/testing/selftests/drivers/net/virtio_net/Makefile b/tools/testing/selftests/drivers/net/virtio_net/Makefile
new file mode 100644
index 000000000000..868ece3fea1f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = basic_features.sh
+
+TEST_FILES = virtio_net_common.sh
+
+TEST_INCLUDES = \
+	../../../net/forwarding/lib.sh \
+	../../../net/lib.sh \
+# end of TEST_INCLUDES
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh b/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh
new file mode 100755
index 000000000000..cf8cf816ed48
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# See virtio_net_common.sh comments for more details about assumed setup
+
+ALL_TESTS="
+	initial_ping_test
+	f_mac_test
+"
+
+source virtio_net_common.sh
+
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p2]}
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+initial_ping_test()
+{
+	setup_cleanup
+	setup_prepare
+	ping_test $h1 $H2_IPV4 " simple"
+}
+
+f_mac_test()
+{
+	RET=0
+	local test_name="mac feature filtered"
+
+	virtio_feature_present $h1 $VIRTIO_NET_F_MAC
+	if [ $? -ne 0 ]; then
+		log_test_skip "$test_name" "Device $h1 is missing feature $VIRTIO_NET_F_MAC."
+		return 0
+	fi
+	virtio_feature_present $h1 $VIRTIO_NET_F_MAC
+	if [ $? -ne 0 ]; then
+		log_test_skip "$test_name" "Device $h2 is missing feature $VIRTIO_NET_F_MAC."
+		return 0
+	fi
+
+	setup_cleanup
+	setup_prepare
+
+	grep -q 0 /sys/class/net/$h1/addr_assign_type
+	check_err $? "Permanent address assign type for $h1 is not set"
+	grep -q 0 /sys/class/net/$h2/addr_assign_type
+	check_err $? "Permanent address assign type for $h2 is not set"
+
+	setup_cleanup
+	virtio_filter_feature_add $h1 $VIRTIO_NET_F_MAC
+	virtio_filter_feature_add $h2 $VIRTIO_NET_F_MAC
+	setup_prepare
+
+	grep -q 0 /sys/class/net/$h1/addr_assign_type
+	check_fail $? "Permanent address assign type for $h1 is set when F_MAC feature is filtered"
+	grep -q 0 /sys/class/net/$h2/addr_assign_type
+	check_fail $? "Permanent address assign type for $h2 is set when F_MAC feature is filtered"
+
+	ping_do $h1 $H2_IPV4
+	check_err $? "Ping failed"
+
+	log_test "$test_name"
+}
+
+setup_prepare()
+{
+	virtio_device_rebind $h1
+	virtio_device_rebind $h2
+	wait_for_dev $h1
+	wait_for_dev $h2
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+setup_cleanup()
+{
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	virtio_filter_features_clear $h1
+	virtio_filter_features_clear $h2
+	virtio_device_rebind $h1
+	virtio_device_rebind $h2
+	wait_for_dev $h1
+	wait_for_dev $h2
+}
+
+cleanup()
+{
+	pre_cleanup
+	setup_cleanup
+}
+
+check_driver $h1 "virtio_net"
+check_driver $h2 "virtio_net"
+check_virtio_debugfs $h1
+check_virtio_debugfs $h2
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/virtio_net/config b/tools/testing/selftests/drivers/net/virtio_net/config
new file mode 100644
index 000000000000..bcf7555eaffe
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/config
@@ -0,0 +1,8 @@
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=m
+CONFIG_VIRTIO_DEBUG=y
+CONFIG_VIRTIO_NET=y
diff --git a/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh b/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh
new file mode 100644
index 000000000000..57bd8055e2e5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This assumes running on a host with two virtio interfaces connected
+# back to back. Example script to do such wire-up of tap devices would
+# look like this:
+#
+# =======================================================================================================
+# #!/bin/bash
+#
+# DEV1="$1"
+# DEV2="$2"
+#
+# sudo tc qdisc add dev $DEV1 clsact
+# sudo tc qdisc add dev $DEV2 clsact
+# sudo tc filter add dev $DEV1 ingress protocol all pref 1 matchall action mirred egress redirect dev $DEV2
+# sudo tc filter add dev $DEV2 ingress protocol all pref 1 matchall action mirred egress redirect dev $DEV1
+# sudo ip link set $DEV1 up
+# sudo ip link set $DEV2 up
+# =======================================================================================================
+
+REQUIRE_MZ="no"
+NETIF_CREATE="no"
+NETIF_FIND_DRIVER="virtio_net"
+NUM_NETIFS=2
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+VIRTIO_NET_F_MAC=5
+
+virtio_device_get()
+{
+	local dev=$1; shift
+	local device_path="/sys/class/net/$dev/device/"
+
+	basename `realpath $device_path`
+}
+
+virtio_device_rebind()
+{
+	local dev=$1; shift
+	local device=`virtio_device_get $dev`
+
+	echo "$device" > /sys/bus/virtio/drivers/virtio_net/unbind
+	echo "$device" > /sys/bus/virtio/drivers/virtio_net/bind
+}
+
+virtio_debugfs_get()
+{
+	local dev=$1; shift
+	local device=`virtio_device_get $dev`
+
+	echo /sys/kernel/debug/virtio/$device/
+}
+
+check_virtio_debugfs()
+{
+	local dev=$1; shift
+	local debugfs=`virtio_debugfs_get $dev`
+
+	if [ ! -f "$debugfs/device_features" ] ||
+	   [ ! -f "$debugfs/filter_feature_add"  ] ||
+	   [ ! -f "$debugfs/filter_feature_del"  ] ||
+	   [ ! -f "$debugfs/filter_features"  ] ||
+	   [ ! -f "$debugfs/filter_features_clear"  ]; then
+		echo "SKIP: not possible to access debugfs for $dev"
+		exit $ksft_skip
+	fi
+}
+
+virtio_feature_present()
+{
+	local dev=$1; shift
+	local feature=$1; shift
+	local debugfs=`virtio_debugfs_get $dev`
+
+	cat $debugfs/device_features |grep "^$feature$" &> /dev/null
+	return $?
+}
+
+virtio_filter_features_clear()
+{
+	local dev=$1; shift
+	local debugfs=`virtio_debugfs_get $dev`
+
+	echo "1" > $debugfs/filter_features_clear
+}
+
+virtio_filter_feature_add()
+{
+	local dev=$1; shift
+	local feature=$1; shift
+	local debugfs=`virtio_debugfs_get $dev`
+
+	echo "$feature" > $debugfs/filter_feature_add
+}
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py
new file mode 100755
index 000000000000..e54df158dfe9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/xdp.py
@@ -0,0 +1,779 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+This file contains tests to verify native XDP support in network drivers.
+The tests utilize the BPF program `xdp_native.bpf.o` from the `selftests.net.lib`
+directory, with each test focusing on a specific aspect of XDP functionality.
+"""
+import random
+import string
+from dataclasses import dataclass
+from enum import Enum
+
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr
+from lib.py import KsftNamedVariant, ksft_variants
+from lib.py import KsftFailEx, NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily, NlError
+from lib.py import bkg, cmd, rand_port, wait_port_listen
+from lib.py import ip, bpftool, defer
+
+
+class TestConfig(Enum):
+    """Enum for XDP configuration options."""
+    MODE = 0  # Configures the BPF program for a specific test
+    PORT = 1  # Port configuration to communicate with the remote host
+    ADJST_OFFSET = 2  # Tail/Head adjustment offset for extension/shrinking
+    ADJST_TAG = 3  # Adjustment tag to annotate the start and end of extension
+
+
+class XDPAction(Enum):
+    """Enum for XDP actions."""
+    PASS = 0  # Pass the packet up to the stack
+    DROP = 1  # Drop the packet
+    TX = 2    # Route the packet to the remote host
+    TAIL_ADJST = 3  # Adjust the tail of the packet
+    HEAD_ADJST = 4  # Adjust the head of the packet
+
+
+class XDPStats(Enum):
+    """Enum for XDP statistics."""
+    RX = 0    # Count of valid packets received for testing
+    PASS = 1  # Count of packets passed up to the stack
+    DROP = 2  # Count of packets dropped
+    TX = 3    # Count of incoming packets routed to the remote host
+    ABORT = 4 # Count of packets that were aborted
+
+
+@dataclass
+class BPFProgInfo:
+    """Data class to store information about a BPF program."""
+    name: str               # Name of the BPF program
+    file: str               # BPF program object file
+    xdp_sec: str = "xdp"    # XDP section name (e.g., "xdp" or "xdp.frags")
+    mtu: int = 1500         # Maximum Transmission Unit, default is 1500
+
+
+def _exchg_udp(cfg, port, test_string):
+    """
+    Exchanges UDP packets between a local and remote host using the socat tool.
+
+    Args:
+        cfg: Configuration object containing network settings.
+        port: Port number to use for the UDP communication.
+        test_string: String that the remote host will send.
+
+    Returns:
+        The string received by the test host.
+    """
+    cfg.require_cmd("socat", remote=True)
+
+    rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT"
+    tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}"
+
+    with bkg(rx_udp_cmd, exit_wait=True) as nc:
+        wait_port_listen(port, proto="udp")
+        cmd(tx_udp_cmd, host=cfg.remote, shell=True)
+
+    return nc.stdout.strip()
+
+
+def _test_udp(cfg, port, size=256):
+    """
+    Tests UDP packet exchange between a local and remote host.
+
+    Args:
+        cfg: Configuration object containing network settings.
+        port: Port number to use for the UDP communication.
+        size: The length of the test string to be exchanged, default is 256 characters.
+
+    Returns:
+        bool: True if the received string matches the sent string, False otherwise.
+    """
+    test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(size))
+    recvd_str = _exchg_udp(cfg, port, test_str)
+
+    return recvd_str == test_str
+
+
+def _load_xdp_prog(cfg, bpf_info):
+    """
+    Loads an XDP program onto a network interface.
+
+    Args:
+        cfg: Configuration object containing network settings.
+        bpf_info: BPFProgInfo object containing information about the BPF program.
+
+    Returns:
+        dict: A dictionary containing the XDP program ID, name, and associated map IDs.
+    """
+    abs_path = cfg.net_lib_dir / bpf_info.file
+    prog_info = {}
+
+    cmd(f"ip link set dev {cfg.remote_ifname} mtu {bpf_info.mtu}", shell=True, host=cfg.remote)
+    defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
+
+    cmd(
+    f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdpdrv obj {abs_path} sec {bpf_info.xdp_sec}",
+    shell=True
+    )
+    defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpdrv off")
+
+    xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0]
+    prog_info["id"] = xdp_info["xdp"]["prog"]["id"]
+    prog_info["name"] = xdp_info["xdp"]["prog"]["name"]
+    prog_id = prog_info["id"]
+
+    map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"]
+    prog_info["maps"] = {}
+    for map_id in map_ids:
+        name = bpftool(f"map show id {map_id}", json=True)["name"]
+        prog_info["maps"][name] = map_id
+
+    return prog_info
+
+
+def format_hex_bytes(value):
+    """
+    Helper function that converts an integer into a formatted hexadecimal byte string.
+
+    Args:
+        value: An integer representing the number to be converted.
+
+    Returns:
+        A string representing hexadecimal equivalent of value, with bytes separated by spaces.
+    """
+    hex_str = value.to_bytes(4, byteorder='little', signed=True)
+    return ' '.join(f'{byte:02x}' for byte in hex_str)
+
+
+def _set_xdp_map(map_name, key, value):
+    """
+    Updates an XDP map with a given key-value pair using bpftool.
+
+    Args:
+        map_name: The name of the XDP map to update.
+        key: The key to update in the map, formatted as a hexadecimal string.
+        value: The value to associate with the key, formatted as a hexadecimal string.
+    """
+    key_formatted = format_hex_bytes(key)
+    value_formatted = format_hex_bytes(value)
+    bpftool(
+        f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}"
+    )
+
+
+def _get_stats(xdp_map_id):
+    """
+    Retrieves and formats statistics from an XDP map.
+
+    Args:
+        xdp_map_id: The ID of the XDP map from which to retrieve statistics.
+
+    Returns:
+        A dictionary containing formatted packet statistics for various XDP actions.
+        The keys are based on the XDPStats Enum values.
+
+    Raises:
+        KsftFailEx: If the stats retrieval fails.
+    """
+    stats_dump = bpftool(f"map dump id {xdp_map_id}", json=True)
+    if not stats_dump:
+        raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}")
+
+    stats_formatted = {}
+    for key in range(0, 5):
+        val = stats_dump[key]["formatted"]["value"]
+        if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value:
+            stats_formatted[XDPStats.RX.value] = val
+        elif stats_dump[key]["formatted"]["key"] == XDPStats.PASS.value:
+            stats_formatted[XDPStats.PASS.value] = val
+        elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value:
+            stats_formatted[XDPStats.DROP.value] = val
+        elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value:
+            stats_formatted[XDPStats.TX.value] = val
+        elif stats_dump[key]["formatted"]["key"] == XDPStats.ABORT.value:
+            stats_formatted[XDPStats.ABORT.value] = val
+
+    return stats_formatted
+
+
+def _test_pass(cfg, bpf_info, msg_sz):
+    """
+    Tests the XDP_PASS action by exchanging UDP packets.
+
+    Args:
+        cfg: Configuration object containing network settings.
+        bpf_info: BPFProgInfo object containing information about the BPF program.
+        msg_sz: Size of the test message to send.
+    """
+
+    prog_info = _load_xdp_prog(cfg, bpf_info)
+    port = rand_port()
+
+    _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value)
+    _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+    ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed")
+    stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+    ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should not be zero")
+    ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.PASS.value], "RX and PASS stats mismatch")
+
+
+def test_xdp_native_pass_sb(cfg):
+    """
+    Tests the XDP_PASS action for single buffer case.
+
+    Args:
+        cfg: Configuration object containing network settings.
+    """
+    bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+    _test_pass(cfg, bpf_info, 256)
+
+
+def test_xdp_native_pass_mb(cfg):
+    """
+    Tests the XDP_PASS action for a multi-buff size.
+
+    Args:
+        cfg: Configuration object containing network settings.
+    """
+    bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+    _test_pass(cfg, bpf_info, 8000)
+
+
+def _test_drop(cfg, bpf_info, msg_sz):
+    """
+    Tests the XDP_DROP action by exchanging UDP packets.
+
+    Args:
+        cfg: Configuration object containing network settings.
+        bpf_info: BPFProgInfo object containing information about the BPF program.
+        msg_sz: Size of the test message to send.
+    """
+
+    prog_info = _load_xdp_prog(cfg, bpf_info)
+    port = rand_port()
+
+    _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value)
+    _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+    ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail")
+    stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+    ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should be zero")
+    ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.DROP.value], "RX and DROP stats mismatch")
+
+
+def test_xdp_native_drop_sb(cfg):
+    """
+    Tests the XDP_DROP action for a signle-buff case.
+
+    Args:
+        cfg: Configuration object containing network settings.
+    """
+    bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+    _test_drop(cfg, bpf_info, 256)
+
+
+def test_xdp_native_drop_mb(cfg):
+    """
+    Tests the XDP_DROP action for a multi-buff case.
+
+    Args:
+        cfg: Configuration object containing network settings.
+    """
+    bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+    _test_drop(cfg, bpf_info, 8000)
+
+
+def _test_xdp_native_tx(cfg, bpf_info, payload_lens):
+    """
+    Tests the XDP_TX action.
+
+    Args:
+        cfg: Configuration object containing network settings.
+        bpf_info: BPFProgInfo object containing the BPF program metadata.
+        payload_lens: Array of packet lengths to send.
+    """
+    cfg.require_cmd("socat", remote=True)
+    prog_info = _load_xdp_prog(cfg, bpf_info)
+    port = rand_port()
+
+    _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value)
+    _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+    expected_pkts = 0
+    for payload_len in payload_lens:
+        test_string = "".join(
+            random.choice(string.ascii_lowercase) for _ in range(payload_len)
+        )
+
+        rx_udp = f"socat -{cfg.addr_ipver} -T 2 " + \
+                 f"-u UDP-RECV:{port},reuseport STDOUT"
+
+        # Writing zero bytes to stdin gets ignored by socat,
+        # but with the shut-null flag socat generates a zero sized packet
+        # when the socket is closed.
+        tx_cmd_suffix = ",shut-null" if payload_len == 0 else ""
+        tx_udp = f"echo -n {test_string} | socat -t 2 " + \
+                 f"-u STDIN UDP:{cfg.baddr}:{port}{tx_cmd_suffix}"
+
+        with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc:
+            wait_port_listen(port, proto="udp", host=cfg.remote)
+            cmd(tx_udp, host=cfg.remote, shell=True)
+
+        ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed")
+
+        expected_pkts += 1
+        stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+        ksft_eq(stats[XDPStats.RX.value], expected_pkts, "RX stats mismatch")
+        ksft_eq(stats[XDPStats.TX.value], expected_pkts, "TX stats mismatch")
+
+
+def test_xdp_native_tx_sb(cfg):
+    """
+    Tests the XDP_TX action for a single-buff case.
+
+    Args:
+        cfg: Configuration object containing network settings.
+    """
+    bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+    # Ensure there's enough room for an ETH / IP / UDP header
+    pkt_hdr_len = 42 if cfg.addr_ipver == "4" else 62
+
+    _test_xdp_native_tx(cfg, bpf_info, [0, 1500 // 2, 1500 - pkt_hdr_len])
+
+
+def test_xdp_native_tx_mb(cfg):
+    """
+    Tests the XDP_TX action for a multi-buff case.
+
+    Args:
+        cfg: Configuration object containing network settings.
+    """
+    bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o",
+                           "xdp.frags", 9000)
+    # The first packet ensures we exercise the fragmented code path.
+    # And the subsequent 0-sized packet ensures the driver
+    # reinitializes xdp_buff correctly.
+    _test_xdp_native_tx(cfg, bpf_info, [8000, 0])
+
+
+def _validate_res(res, offset_lst, pkt_sz_lst):
+    """
+    Validates the result of a test.
+
+    Args:
+        res: The result of the test, which should be a dictionary with a "status" key.
+
+    Raises:
+        KsftFailEx: If the test fails to pass any combination of offset and packet size.
+    """
+    if "status" not in res:
+        raise KsftFailEx("Missing 'status' key in result dictionary")
+
+    # Validate that not a single case was successful
+    if res["status"] == "fail":
+        if res["offset"] == offset_lst[0] and res["pkt_sz"] == pkt_sz_lst[0]:
+            raise KsftFailEx(f"{res['reason']}")
+
+        # Get the previous offset and packet size to report the successful run
+        tmp_idx = offset_lst.index(res["offset"])
+        prev_offset = offset_lst[tmp_idx - 1]
+        if tmp_idx == 0:
+            tmp_idx = pkt_sz_lst.index(res["pkt_sz"])
+            prev_pkt_sz = pkt_sz_lst[tmp_idx - 1]
+        else:
+            prev_pkt_sz = res["pkt_sz"]
+
+        # Use these values for error reporting
+        ksft_pr(
+        f"Failed run: pkt_sz {res['pkt_sz']}, offset {res['offset']}. "
+        f"Last successful run: pkt_sz {prev_pkt_sz}, offset {prev_offset}. "
+        f"Reason: {res['reason']}"
+        )
+
+
+def _check_for_failures(recvd_str, stats):
+    """
+    Checks for common failures while adjusting headroom or tailroom.
+
+    Args:
+        recvd_str: The string received from the remote host after sending a test string.
+        stats: A dictionary containing formatted packet statistics for various XDP actions.
+
+    Returns:
+        str: A string describing the failure reason if a failure is detected, otherwise None.
+    """
+
+    # Any adjustment failure result in an abort hence, we track this counter
+    if stats[XDPStats.ABORT.value] != 0:
+        return "Adjustment failed"
+
+    # Since we are using aggregate stats for a single test across all offsets and packet sizes
+    # we can't use RX stats only to track data exchange failure without taking a previous
+    # snapshot. An easier way is to simply check for non-zero length of received string.
+    if len(recvd_str) == 0:
+        return "Data exchange failed"
+
+    # Check for RX and PASS stats mismatch. Ideally, they should be equal for a successful run
+    if stats[XDPStats.RX.value] != stats[XDPStats.PASS.value]:
+        return "RX stats mismatch"
+
+    return None
+
+
+def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst):
+    """
+    Tests the XDP tail adjustment functionality.
+
+    This function loads the appropriate XDP program based on the provided
+    program name and configures the XDP map for tail adjustment. It then
+    validates the tail adjustment by sending and receiving UDP packets
+    with specified packet sizes and offsets.
+
+    Args:
+        cfg: Configuration object containing network settings.
+        prog: Name of the XDP program to load.
+        pkt_sz_lst: List of packet sizes to test.
+        offset_lst: List of offsets to validate support for tail adjustment.
+
+    Returns:
+        dict: A dictionary with test status and failure details if applicable.
+    """
+    port = rand_port()
+    bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+    prog_info = _load_xdp_prog(cfg, bpf_info)
+
+    # Configure the XDP map for tail adjustment
+    _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value)
+    _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+    for offset in offset_lst:
+        tag = format(random.randint(65, 90), "02x")
+
+        _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset)
+        if offset > 0:
+            _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16))
+
+        for pkt_sz in pkt_sz_lst:
+            test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz))
+            recvd_str = _exchg_udp(cfg, port, test_str)
+            stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+            failure = _check_for_failures(recvd_str, stats)
+            if failure is not None:
+                return {
+                    "status": "fail",
+                    "reason": failure,
+                    "offset": offset,
+                    "pkt_sz": pkt_sz,
+                }
+
+            # Validate data content based on offset direction
+            expected_data = None
+            if offset > 0:
+                expected_data = test_str + (offset * chr(int(tag, 16)))
+            else:
+                expected_data = test_str[0:pkt_sz + offset]
+
+            if recvd_str != expected_data:
+                return {
+                    "status": "fail",
+                    "reason": "Data mismatch",
+                    "offset": offset,
+                    "pkt_sz": pkt_sz,
+                }
+
+    return {"status": "pass"}
+
+
+def test_xdp_native_adjst_tail_grow_data(cfg):
+    """
+    Tests the XDP tail adjustment by growing packet data.
+
+    Args:
+        cfg: Configuration object containing network settings.
+    """
+    pkt_sz_lst = [512, 1024, 2048]
+    offset_lst = [1, 16, 32, 64, 128, 256]
+    res = _test_xdp_native_tail_adjst(
+        cfg,
+        pkt_sz_lst,
+        offset_lst,
+    )
+
+    _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def test_xdp_native_adjst_tail_shrnk_data(cfg):
+    """
+    Tests the XDP tail adjustment by shrinking packet data.
+
+    Args:
+        cfg: Configuration object containing network settings.
+    """
+    pkt_sz_lst = [512, 1024, 2048]
+    offset_lst = [-16, -32, -64, -128, -256]
+    res = _test_xdp_native_tail_adjst(
+        cfg,
+        pkt_sz_lst,
+        offset_lst,
+    )
+
+    _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def get_hds_thresh(cfg):
+    """
+    Retrieves the header data split (HDS) threshold for a network interface.
+
+    Args:
+        cfg: Configuration object containing network settings.
+
+    Returns:
+        The HDS threshold value. If the threshold is not supported or an error occurs,
+        a default value of 1500 is returned.
+    """
+    ethnl = cfg.ethnl
+    hds_thresh = 1500
+
+    try:
+        rings = ethnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+        if 'hds-thresh' not in rings:
+            ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}')
+            return hds_thresh
+        hds_thresh = rings['hds-thresh']
+    except NlError as e:
+        ksft_pr(f"Failed to get rings: {e}. Using default: {hds_thresh}")
+
+    return hds_thresh
+
+
+def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst):
+    """
+    Tests the XDP head adjustment action for a multi-buffer case.
+
+    Args:
+        cfg: Configuration object containing network settings.
+        ethnl: Network namespace or link object (not used in this function).
+
+    This function sets up the packet size and offset lists, then performs
+    the head adjustment test by sending and receiving UDP packets.
+    """
+    cfg.require_cmd("socat", remote=True)
+
+    prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000))
+    port = rand_port()
+
+    _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value)
+    _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+    hds_thresh = get_hds_thresh(cfg)
+    for offset in offset_lst:
+        for pkt_sz in pkt_sz_lst:
+            # The "head" buffer must contain at least the Ethernet header
+            # after we eat into it. We send large-enough packets, but if HDS
+            # is enabled head will only contain headers. Don't try to eat
+            # more than 28 bytes (UDPv4 + eth hdr left: (14 + 20 + 8) - 14)
+            l2_cut_off = 28 if cfg.addr_ipver == 4 else 48
+            if pkt_sz > hds_thresh and offset > l2_cut_off:
+                ksft_pr(
+                f"Failed run: pkt_sz ({pkt_sz}) > HDS threshold ({hds_thresh}) and "
+                f"offset {offset} > {l2_cut_off}"
+                )
+                return {"status": "pass"}
+
+            test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz))
+            tag = format(random.randint(65, 90), '02x')
+
+            _set_xdp_map("map_xdp_setup",
+                     TestConfig.ADJST_OFFSET.value,
+                     offset)
+            _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16))
+            _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset)
+
+            recvd_str = _exchg_udp(cfg, port, test_str)
+
+            # Check for failures around adjustment and data exchange
+            failure = _check_for_failures(recvd_str, _get_stats(prog_info['maps']['map_xdp_stats']))
+            if failure is not None:
+                return {
+                    "status": "fail",
+                    "reason": failure,
+                    "offset": offset,
+                    "pkt_sz": pkt_sz
+                }
+
+            # Validate data content based on offset direction
+            expected_data = None
+            if offset < 0:
+                expected_data = chr(int(tag, 16)) * (0 - offset) + test_str
+            else:
+                expected_data = test_str[offset:]
+
+            if recvd_str != expected_data:
+                return {
+                    "status": "fail",
+                    "reason": "Data mismatch",
+                    "offset": offset,
+                    "pkt_sz": pkt_sz
+                }
+
+    return {"status": "pass"}
+
+
+def test_xdp_native_adjst_head_grow_data(cfg):
+    """
+    Tests the XDP headroom growth support.
+
+    Args:
+        cfg: Configuration object containing network settings.
+
+    This function sets up the packet size and offset lists, then calls the
+    _test_xdp_native_head_adjst_mb function to perform the actual test. The
+    test is passed if the headroom is successfully extended for given packet
+    sizes and offsets.
+    """
+    pkt_sz_lst = [512, 1024, 2048]
+
+    # Negative values result in headroom shrinking, resulting in growing of payload
+    offset_lst = [-16, -32, -64, -128, -256]
+    res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst)
+
+    _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def test_xdp_native_adjst_head_shrnk_data(cfg):
+    """
+    Tests the XDP headroom shrinking support.
+
+    Args:
+        cfg: Configuration object containing network settings.
+
+    This function sets up the packet size and offset lists, then calls the
+    _test_xdp_native_head_adjst_mb function to perform the actual test. The
+    test is passed if the headroom is successfully shrunk for given packet
+    sizes and offsets.
+    """
+    pkt_sz_lst = [512, 1024, 2048]
+
+    # Positive values result in headroom growing, resulting in shrinking of payload
+    offset_lst = [16, 32, 64, 128, 256]
+    res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst)
+
+    _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+@ksft_variants([
+    KsftNamedVariant("pass", XDPAction.PASS),
+    KsftNamedVariant("drop", XDPAction.DROP),
+    KsftNamedVariant("tx", XDPAction.TX),
+])
+def test_xdp_native_qstats(cfg, act):
+    """
+    Send 1000 messages. Expect XDP action specified in @act.
+    Make sure the packets were counted to interface level qstats
+    (Rx, and Tx if act is TX).
+    """
+
+    cfg.require_cmd("socat")
+
+    bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+    prog_info = _load_xdp_prog(cfg, bpf_info)
+    port = rand_port()
+
+    _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, act.value)
+    _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+    # Discard the input, but we need a listener to avoid ICMP errors
+    rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport " + \
+        "/dev/null"
+    # Listener runs on "remote" in case of XDP_TX
+    rx_host = cfg.remote if act == XDPAction.TX else None
+    # We want to spew 1000 packets quickly, bash seems to do a good enough job
+    # Each reopening of the socket gives us a differenot local port (for RSS)
+    tx_udp = "for _ in `seq 20`; do " \
+        f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \
+        "for i in `seq 50`; do echo a >&5; done; " \
+        "exec 5>&-; done"
+
+    cfg.wait_hw_stats_settle()
+    # Qstats have more clearly defined semantics than rtnetlink.
+    # XDP is the "first layer of the stack" so XDP packets should be counted
+    # as received and sent as if the decision was made in the routing layer.
+    before = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+    with bkg(rx_udp, host=rx_host, exit_wait=True):
+        wait_port_listen(port, proto="udp", host=rx_host)
+        cmd(tx_udp, host=cfg.remote, shell=True)
+
+    cfg.wait_hw_stats_settle()
+    after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+    expected_pkts = 1000
+    ksft_ge(after['rx-packets'] - before['rx-packets'], expected_pkts)
+    if act == XDPAction.TX:
+        ksft_ge(after['tx-packets'] - before['tx-packets'], expected_pkts)
+
+    stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+    ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch")
+    if act == XDPAction.TX:
+        ksft_eq(stats[XDPStats.TX.value], expected_pkts, "XDP TX stats mismatch")
+
+    # Flip the ring count back and forth to make sure the stats from XDP rings
+    # don't get lost.
+    chans = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+    if chans.get('combined-count', 0) > 1:
+        cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex},
+                                'combined-count': 1})
+        cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex},
+                                'combined-count': chans['combined-count']})
+        before = after
+        after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+        ksft_ge(after['rx-packets'], before['rx-packets'])
+        if act == XDPAction.TX:
+            ksft_ge(after['tx-packets'], before['tx-packets'])
+
+
+def main():
+    """
+    Main function to execute the XDP tests.
+
+    This function runs a series of tests to validate the XDP support for
+    both the single and multi-buffer. It uses the NetDrvEpEnv context
+    manager to manage the network driver environment and the ksft_run
+    function to execute the tests.
+    """
+    with NetDrvEpEnv(__file__) as cfg:
+        cfg.ethnl = EthtoolFamily()
+        cfg.netnl = NetdevFamily()
+        ksft_run(
+            [
+                test_xdp_native_pass_sb,
+                test_xdp_native_pass_mb,
+                test_xdp_native_drop_sb,
+                test_xdp_native_drop_mb,
+                test_xdp_native_tx_sb,
+                test_xdp_native_tx_mb,
+                test_xdp_native_adjst_tail_grow_data,
+                test_xdp_native_adjst_tail_shrnk_data,
+                test_xdp_native_adjst_head_grow_data,
+                test_xdp_native_adjst_head_shrnk_data,
+                test_xdp_native_qstats,
+            ],
+            args=(cfg,))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()