summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/drivers/net/hw
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/drivers/net/hw')
-rw-r--r--tools/testing/selftests/drivers/net/hw/.gitignore4
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile79
-rw-r--r--tools/testing/selftests/drivers/net/hw/config11
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/csum.py110
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_port_split.py309
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py439
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py77
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool.sh297
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh116
-rw-r--r--tools/testing/selftests/drivers/net/hw/ethtool_lib.sh120
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool_mm.sh341
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool_rmon.sh145
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/hw_stats_l3.sh334
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh111
-rw-r--r--tools/testing/selftests/drivers/net/hw/iou-zcrx.c464
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py145
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/irq.py99
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py52
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/loopback.sh103
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c1524
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/nic_timestamp.py113
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/pp_alloc_fail.py144
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_api.py476
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py832
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_flow_label.py167
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py92
-rw-r--r--tools/testing/selftests/drivers/net/hw/settings1
-rw-r--r--tools/testing/selftests/drivers/net/hw/toeplitz.c655
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/toeplitz.py211
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py261
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/xsk_reconfig.py60
31 files changed, 7892 insertions, 0 deletions
diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore
new file mode 100644
index 000000000000..46540468a775
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+iou-zcrx
+ncdevmem
+toeplitz
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
new file mode 100644
index 000000000000..9c163ba6feee
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+# Check if io_uring supports zero-copy receive
+HAS_IOURING_ZCRX := $(shell \
+ echo -e '#include <liburing.h>\n' \
+ 'void *func = (void *)io_uring_register_ifq;\n' \
+ 'int main() {return 0;}' | \
+ $(CC) -luring -x c - -o /dev/null 2>&1 && echo y)
+
+ifeq ($(HAS_IOURING_ZCRX),y)
+COND_GEN_FILES += iou-zcrx
+else
+$(warning excluding iouring tests, liburing not installed or too old)
+endif
+
+TEST_GEN_FILES := \
+ $(COND_GEN_FILES) \
+# end of TEST_GEN_FILES
+
+TEST_PROGS = \
+ csum.py \
+ devlink_port_split.py \
+ devlink_rate_tc_bw.py \
+ devmem.py \
+ ethtool.sh \
+ ethtool_extended_state.sh \
+ ethtool_mm.sh \
+ ethtool_rmon.sh \
+ hw_stats_l3.sh \
+ hw_stats_l3_gre.sh \
+ iou-zcrx.py \
+ irq.py \
+ loopback.sh \
+ nic_timestamp.py \
+ pp_alloc_fail.py \
+ rss_api.py \
+ rss_ctx.py \
+ rss_flow_label.py \
+ rss_input_xfrm.py \
+ toeplitz.py \
+ tso.py \
+ xsk_reconfig.py \
+ #
+
+TEST_FILES := \
+ ethtool_lib.sh \
+ #
+
+TEST_INCLUDES := \
+ $(wildcard lib/py/*.py ../lib/py/*.py) \
+ ../../../net/lib.sh \
+ ../../../net/forwarding/ipip_lib.sh \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/forwarding/tc_common.sh \
+ #
+
+# YNL files, must be before "include ..lib.mk"
+YNL_GEN_FILES := \
+ ncdevmem \
+ toeplitz \
+# end of YNL_GEN_FILES
+TEST_GEN_FILES += $(YNL_GEN_FILES)
+TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
+
+include ../../../lib.mk
+
+# YNL build
+YNL_GENS := \
+ ethtool \
+ netdev \
+# end of YNL_GENS
+
+include ../../../net/ynl.mk
+
+include ../../../net/bpf.mk
+
+ifeq ($(HAS_IOURING_ZCRX),y)
+$(OUTPUT)/iou-zcrx: LDLIBS += -luring
+endif
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config
new file mode 100644
index 000000000000..2307aa001be1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/config
@@ -0,0 +1,11 @@
+CONFIG_FAIL_FUNCTION=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FUNCTION_ERROR_INJECTION=y
+CONFIG_IO_URING=y
+CONFIG_IPV6=y
+CONFIG_IPV6_GRE=y
+CONFIG_NET_IPGRE=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_UDMABUF=y
+CONFIG_VXLAN=y
diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py
new file mode 100755
index 000000000000..3e3a89a34afe
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/csum.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Run the tools/testing/selftests/net/csum testsuite."""
+
+from os import path
+
+from lib.py import ksft_run, ksft_exit, KsftSkipEx
+from lib.py import EthtoolFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, wait_port_listen
+
+def test_receive(cfg, ipver="6", extra_args=None):
+ """Test local nic checksum receive. Remote host sends crafted packets."""
+ if not cfg.have_rx_csum:
+ raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}")
+
+ ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}"
+
+ rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}"
+ tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
+
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(34000, proto="udp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def test_transmit(cfg, ipver="6", extra_args=None):
+ """Test local nic checksum transmit. Remote host verifies packets."""
+ if (not cfg.have_tx_csum_generic and
+ not (cfg.have_tx_csum_ipv4 and ipver == "4") and
+ not (cfg.have_tx_csum_ipv6 and ipver == "6")):
+ raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}")
+
+ ip_args = f"-{ipver} -S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}"
+
+ # Cannot randomize input when calculating zero checksum
+ if extra_args != "-U -Z":
+ extra_args += " -r 1"
+
+ rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}"
+ tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}"
+
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(34000, proto="udp", host=cfg.remote)
+ cmd(tx_cmd)
+
+
+def test_builder(name, cfg, ipver="6", tx=False, extra_args=""):
+ """Construct specific tests from the common template.
+
+ Most tests follow the same basic pattern, differing only in
+ Direction of the test and optional flags passed to csum."""
+ def f(cfg):
+ cfg.require_ipver(ipver)
+
+ if tx:
+ test_transmit(cfg, ipver, extra_args)
+ else:
+ test_receive(cfg, ipver, extra_args)
+
+ f.__name__ = f"ipv{ipver}_" + name
+ return f
+
+
+def check_nic_features(cfg) -> None:
+ """Test whether Tx and Rx checksum offload are enabled.
+
+ If the device under test has either off, then skip the relevant tests."""
+ cfg.have_tx_csum_generic = False
+ cfg.have_tx_csum_ipv4 = False
+ cfg.have_tx_csum_ipv6 = False
+ cfg.have_rx_csum = False
+
+ ethnl = EthtoolFamily()
+ features = ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ if f["name"] == "tx-checksum-ip-generic":
+ cfg.have_tx_csum_generic = True
+ elif f["name"] == "tx-checksum-ipv4":
+ cfg.have_tx_csum_ipv4 = True
+ elif f["name"] == "tx-checksum-ipv6":
+ cfg.have_tx_csum_ipv6 = True
+ elif f["name"] == "rx-checksum":
+ cfg.have_rx_csum = True
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ check_nic_features(cfg)
+
+ cfg.bin_local = cfg.net_lib_dir / "csum"
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ cases = []
+ for ipver in ["4", "6"]:
+ cases.append(test_builder("rx_tcp", cfg, ipver, False, "-t"))
+ cases.append(test_builder("rx_tcp_invalid", cfg, ipver, False, "-t -E"))
+
+ cases.append(test_builder("rx_udp", cfg, ipver, False, ""))
+ cases.append(test_builder("rx_udp_invalid", cfg, ipver, False, "-E"))
+
+ cases.append(test_builder("tx_udp_csum_offload", cfg, ipver, True, "-U"))
+ cases.append(test_builder("tx_udp_zero_checksum", cfg, ipver, True, "-U -Z"))
+
+ ksft_run(cases=cases, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/devlink_port_split.py b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py
new file mode 100755
index 000000000000..2d84c7a0be6b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from subprocess import PIPE, Popen
+import json
+import time
+import argparse
+import collections
+import sys
+
+#
+# Test port split configuration using devlink-port lanes attribute.
+# The test is skipped in case the attribute is not available.
+#
+# First, check that all the ports with 1 lane fail to split.
+# Second, check that all the ports with more than 1 lane can be split
+# to all valid configurations (e.g., split to 2, split to 4 etc.)
+#
+
+
+# Kselftest framework requirement - SKIP code is 4
+KSFT_SKIP=4
+Port = collections.namedtuple('Port', 'bus_info name')
+
+
+def run_command(cmd, should_fail=False):
+ """
+ Run a command in subprocess.
+ Return: Tuple of (stdout, stderr).
+ """
+
+ p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
+ stdout, stderr = p.communicate()
+ stdout, stderr = stdout.decode(), stderr.decode()
+
+ if stderr != "" and not should_fail:
+ print("Error sending command: %s" % cmd)
+ print(stdout)
+ print(stderr)
+ return stdout, stderr
+
+
+class devlink_ports(object):
+ """
+ Class that holds information on the devlink ports, required to the tests;
+ if_names: A list of interfaces in the devlink ports.
+ """
+
+ def get_if_names(dev):
+ """
+ Get a list of physical devlink ports.
+ Return: Array of tuples (bus_info/port, if_name).
+ """
+
+ arr = []
+
+ cmd = "devlink -j port show"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ ports = json.loads(stdout)['port']
+
+ validate_devlink_output(ports, 'flavour')
+
+ for port in ports:
+ if dev in port:
+ if ports[port]['flavour'] == 'physical':
+ arr.append(Port(bus_info=port, name=ports[port]['netdev']))
+
+ return arr
+
+ def __init__(self, dev):
+ self.if_names = devlink_ports.get_if_names(dev)
+
+
+def get_max_lanes(port):
+ """
+ Get the $port's maximum number of lanes.
+ Return: number of lanes, e.g. 1, 2, 4 and 8.
+ """
+
+ cmd = "devlink -j port show %s" % port
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ values = list(json.loads(stdout)['port'].values())[0]
+
+ if 'lanes' in values:
+ lanes = values['lanes']
+ else:
+ lanes = 0
+ return lanes
+
+
+def get_split_ability(port):
+ """
+ Get the $port split ability.
+ Return: split ability, true or false.
+ """
+
+ cmd = "devlink -j port show %s" % port.name
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ values = list(json.loads(stdout)['port'].values())[0]
+
+ return values['splittable']
+
+
+def split(k, port, should_fail=False):
+ """
+ Split $port into $k ports.
+ If should_fail == True, the split should fail. Otherwise, should pass.
+ Return: Array of sub ports after splitting.
+ If the $port wasn't split, the array will be empty.
+ """
+
+ cmd = "devlink port split %s count %s" % (port.bus_info, k)
+ stdout, stderr = run_command(cmd, should_fail=should_fail)
+
+ if should_fail:
+ if not test(stderr != "", "%s is unsplittable" % port.name):
+ print("split an unsplittable port %s" % port.name)
+ return create_split_group(port, k)
+ else:
+ if stderr == "":
+ return create_split_group(port, k)
+ print("didn't split a splittable port %s" % port.name)
+
+ return []
+
+
+def unsplit(port):
+ """
+ Unsplit $port.
+ """
+
+ cmd = "devlink port unsplit %s" % port
+ stdout, stderr = run_command(cmd)
+ test(stderr == "", "Unsplit port %s" % port)
+
+
+def exists(port, dev):
+ """
+ Check if $port exists in the devlink ports.
+ Return: True is so, False otherwise.
+ """
+
+ return any(dev_port.name == port
+ for dev_port in devlink_ports.get_if_names(dev))
+
+
+def exists_and_lanes(ports, lanes, dev):
+ """
+ Check if every port in the list $ports exists in the devlink ports and has
+ $lanes number of lanes after splitting.
+ Return: True if both are True, False otherwise.
+ """
+
+ for port in ports:
+ max_lanes = get_max_lanes(port)
+ if not exists(port, dev):
+ print("port %s doesn't exist in devlink ports" % port)
+ return False
+ if max_lanes != lanes:
+ print("port %s has %d lanes, but %s were expected"
+ % (port, lanes, max_lanes))
+ return False
+ return True
+
+
+def test(cond, msg):
+ """
+ Check $cond and print a message accordingly.
+ Return: True is pass, False otherwise.
+ """
+
+ if cond:
+ print("TEST: %-60s [ OK ]" % msg)
+ else:
+ print("TEST: %-60s [FAIL]" % msg)
+
+ return cond
+
+
+def create_split_group(port, k):
+ """
+ Create the split group for $port.
+ Return: Array with $k elements, which are the split port group.
+ """
+
+ return list(port.name + "s" + str(i) for i in range(k))
+
+
+def split_unsplittable_port(port, k):
+ """
+ Test that splitting of unsplittable port fails.
+ """
+
+ # split to max
+ new_split_group = split(k, port, should_fail=True)
+
+ if new_split_group != []:
+ unsplit(port.bus_info)
+
+
+def split_splittable_port(port, k, lanes, dev):
+ """
+ Test that splitting of splittable port passes correctly.
+ """
+
+ new_split_group = split(k, port)
+
+ # Once the split command ends, it takes some time to the sub ifaces'
+ # to get their names. Use udevadm to continue only when all current udev
+ # events are handled.
+ cmd = "udevadm settle"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+
+ if new_split_group != []:
+ test(exists_and_lanes(new_split_group, lanes/k, dev),
+ "split port %s into %s" % (port.name, k))
+
+ unsplit(port.bus_info)
+
+
+def validate_devlink_output(devlink_data, target_property=None):
+ """
+ Determine if test should be skipped by checking:
+ 1. devlink_data contains values
+ 2. The target_property exist in devlink_data
+ """
+ skip_reason = None
+ if any(devlink_data.values()):
+ if target_property:
+ skip_reason = "{} not found in devlink output, test skipped".format(target_property)
+ for key in devlink_data:
+ if target_property in devlink_data[key]:
+ skip_reason = None
+ else:
+ skip_reason = 'devlink output is empty, test skipped'
+
+ if skip_reason:
+ print(skip_reason)
+ sys.exit(KSFT_SKIP)
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='A test for port splitting.')
+ parser.add_argument('--dev',
+ help='The devlink handle of the device under test. ' +
+ 'The default is the first registered devlink ' +
+ 'handle.')
+
+ return parser
+
+
+def main(cmdline=None):
+ parser = make_parser()
+ args = parser.parse_args(cmdline)
+
+ dev = args.dev
+ if not dev:
+ cmd = "devlink -j dev show"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+
+ validate_devlink_output(json.loads(stdout))
+ devs = json.loads(stdout)['dev']
+ dev = list(devs.keys())[0]
+
+ cmd = "devlink dev show %s" % dev
+ stdout, stderr = run_command(cmd)
+ if stderr != "":
+ print("devlink device %s can not be found" % dev)
+ sys.exit(1)
+
+ ports = devlink_ports(dev)
+
+ found_max_lanes = False
+ for port in ports.if_names:
+ max_lanes = get_max_lanes(port.name)
+
+ # If max lanes is 0, do not test port splitting at all
+ if max_lanes == 0:
+ continue
+
+ # If 1 lane, shouldn't be able to split
+ elif max_lanes == 1:
+ test(not get_split_ability(port),
+ "%s should not be able to split" % port.name)
+ split_unsplittable_port(port, max_lanes)
+
+ # Else, splitting should pass and all the split ports should exist.
+ else:
+ lane = max_lanes
+ test(get_split_ability(port),
+ "%s should be able to split" % port.name)
+ while lane > 1:
+ split_splittable_port(port, lane, max_lanes, dev)
+
+ lane //= 2
+ found_max_lanes = True
+
+ if not found_max_lanes:
+ print(f"Test not started, no port of device {dev} reports max_lanes")
+ sys.exit(KSFT_SKIP)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
new file mode 100755
index 000000000000..4e4faa9275bb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
@@ -0,0 +1,439 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Devlink Rate TC Bandwidth Test Suite
+===================================
+
+This test suite verifies the functionality of devlink-rate traffic class (TC)
+bandwidth distribution in a virtualized environment. The tests validate that
+bandwidth can be properly allocated between different traffic classes and
+that TC mapping works as expected.
+
+Test Environment:
+----------------
+- Creates 1 VF
+- Establishes a bridge connecting the VF representor and the uplink representor
+- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102)
+- Configures different traffic classes (TC3 and TC4) for each VLAN
+
+Test Cases:
+----------
+1. test_no_tc_mapping_bandwidth:
+ - Verifies that without TC mapping, bandwidth is NOT distributed according to
+ the configured 20/80 split between TC3 and TC4
+ - This test should fail if bandwidth matches the 20/80 split without TC
+ mapping
+ - Expected: Bandwidth should NOT be distributed as 20/80
+
+2. test_tc_mapping_bandwidth:
+ - Configures TC mapping using mqprio qdisc
+ - Verifies that with TC mapping, bandwidth IS distributed according to the
+ configured 20/80 split between TC3 and TC4
+ - Expected: Bandwidth should be distributed as 20/80
+
+Bandwidth Distribution:
+----------------------
+- TC3 (VLAN 101): Configured for 20% of total bandwidth
+- TC4 (VLAN 102): Configured for 80% of total bandwidth
+- Total bandwidth: 1Gbps
+- Tolerance: +-12%
+
+Hardware-Specific Behavior (mlx5):
+--------------------------
+mlx5 hardware enforces traffic class separation by ensuring that each transmit
+queue (SQ) is associated with a single TC. If a packet is sent on a queue that
+doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set
+mapping), the hardware moves the queue to the correct TC scheduler to preserve
+traffic isolation.
+
+This behavior means that even without explicit TC-to-queue mapping, bandwidth
+enforcement may still appear to work—because the hardware dynamically adjusts
+the scheduling context. However, this can lead to performance issues in high
+rates and HOL blocking if traffic from different TCs is mixed on the same queue.
+"""
+
+import json
+import os
+import subprocess
+import threading
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit
+from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+from lib.py import NetDrvEpEnv, DevlinkFamily
+from lib.py import NlError
+from lib.py import cmd, defer, ethtool, ip
+from lib.py import Iperf3Runner
+
+
+class BandwidthValidator:
+ """
+ Validates total bandwidth and individual shares with tolerance
+ relative to the overall total.
+ """
+
+ def __init__(self, shares):
+ self.tolerance_percent = 12
+ self.expected_total = sum(shares.values())
+ self.bounds = {}
+
+ for name, exp in shares.items():
+ self.bounds[name] = (self.min_expected(exp), self.max_expected(exp))
+
+ def min_expected(self, value):
+ """Calculates the minimum acceptable value based on tolerance."""
+ return value - (self.expected_total * self.tolerance_percent / 100)
+
+ def max_expected(self, value):
+ """Calculates the maximum acceptable value based on tolerance."""
+ return value + (self.expected_total * self.tolerance_percent / 100)
+
+ def bound(self, values):
+ """
+ Return True if all given values fall within tolerance.
+ """
+ for name, value in values.items():
+ low, high = self.bounds[name]
+ if not low <= value <= high:
+ return False
+ return True
+
+
+def setup_vf(cfg, set_tc_mapping=True):
+ """
+ Sets up a VF on the given network interface.
+
+ Enables SR-IOV and switchdev mode, brings the VF interface up,
+ and optionally configures TC mapping using mqprio.
+ """
+ try:
+ cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev")
+ defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy")
+ except Exception as exc:
+ raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc
+ try:
+ cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
+ defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
+ except Exception as exc:
+ raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc
+
+ time.sleep(2)
+ vf_ifc = (os.listdir(
+ f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0]
+ if vf_ifc:
+ ip(f"link set dev {vf_ifc} up")
+ else:
+ raise KsftSkipEx("VF interface not found")
+ if set_tc_mapping:
+ cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8")
+
+ return vf_ifc
+
+
+def setup_vlans_on_vf(vf_ifc):
+ """
+ Sets up two VLAN interfaces on the given VF, each mapped to a different TC.
+ """
+ vlan_configs = [
+ {"vlan_id": 101, "tc": 3, "ip": "198.51.100.1"},
+ {"vlan_id": 102, "tc": 4, "ip": "198.51.100.9"},
+ ]
+
+ for config in vlan_configs:
+ vlan_dev = f"{vf_ifc}.{config['vlan_id']}"
+ ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}")
+ ip(f"addr add {config['ip']}/29 dev {vlan_dev}")
+ ip(f"link set dev {vlan_dev} up")
+ ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}")
+ ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}")
+
+
+def get_vf_info(cfg):
+ """
+ Finds the VF representor interface and devlink port index
+ for the given PCI device used in the test environment.
+ """
+ cfg.vf_representor = None
+ cfg.vf_port_index = None
+ out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8")
+ ports = json.loads(out)["port"]
+
+ for port_name, props in ports.items():
+ netdev = props.get("netdev")
+
+ if (port_name.startswith(f"pci/{cfg.pci}/") and
+ props.get("vfnum") == 0):
+ cfg.vf_representor = netdev
+ cfg.vf_port_index = int(port_name.split("/")[-1])
+ break
+
+
+def setup_bridge(cfg):
+ """
+ Creates and configures a Linux bridge, with both the uplink
+ and VF representor interfaces attached to it.
+ """
+ bridge_name = f"br_{os.getpid()}"
+ ip(f"link add name {bridge_name} type bridge")
+ defer(cmd, f"ip link del name {bridge_name} type bridge")
+
+ ip(f"link set dev {cfg.ifname} master {bridge_name}")
+
+ rep_name = cfg.vf_representor
+ if rep_name:
+ ip(f"link set dev {rep_name} master {bridge_name}")
+ ip(f"link set dev {rep_name} up")
+ ksft_pr(f"Set representor {rep_name} up and added to bridge")
+ else:
+ raise KsftSkipEx("Could not find representor for the VF")
+
+ ip(f"link set dev {bridge_name} up")
+
+
+def setup_devlink_rate(cfg):
+ """
+ Configures devlink rate tx_max and traffic class bandwidth for the VF.
+ """
+ port_index = cfg.vf_port_index
+ if port_index is None:
+ raise KsftSkipEx("Could not find VF port index")
+ try:
+ cfg.devnl.rate_set({
+ "bus-name": "pci",
+ "dev-name": cfg.pci,
+ "port-index": port_index,
+ "rate-tx-max": 125000000,
+ "rate-tc-bws": [
+ {"index": 0, "bw": 0},
+ {"index": 1, "bw": 0},
+ {"index": 2, "bw": 0},
+ {"index": 3, "bw": 20},
+ {"index": 4, "bw": 80},
+ {"index": 5, "bw": 0},
+ {"index": 6, "bw": 0},
+ {"index": 7, "bw": 0},
+ ]
+ })
+ except NlError as exc:
+ if exc.error == 95: # EOPNOTSUPP
+ raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc
+ raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc
+
+
+def setup_remote_vlans(cfg):
+ """
+ Sets up VLAN interfaces on the remote side.
+ """
+ remote_dev = cfg.remote_ifname
+ vlan_ids = [101, 102]
+ remote_ips = ["198.51.100.2", "198.51.100.10"]
+
+ for vlan_id, ip_addr in zip(vlan_ids, remote_ips):
+ vlan_dev = f"{remote_dev}.{vlan_id}"
+ cmd(f"ip link add link {remote_dev} name {vlan_dev} "
+ f"type vlan id {vlan_id}", host=cfg.remote)
+ cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote)
+ cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote)
+ defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote)
+
+
+def setup_test_environment(cfg, set_tc_mapping=True):
+ """
+ Sets up the complete test environment including VF creation, VLANs,
+ bridge configuration and devlink rate setup.
+ """
+ vf_ifc = setup_vf(cfg, set_tc_mapping)
+ ksft_pr(f"Created VF interface: {vf_ifc}")
+
+ setup_vlans_on_vf(vf_ifc)
+
+ get_vf_info(cfg)
+ setup_bridge(cfg)
+
+ setup_devlink_rate(cfg)
+ setup_remote_vlans(cfg)
+
+
+def measure_bandwidth(cfg, server_ip, client_ip, barrier):
+ """
+ Synchronizes with peers and runs an iperf3-based bandwidth measurement
+ between the given endpoints. Returns average Gbps.
+ """
+ runner = Iperf3Runner(cfg, server_ip=server_ip, client_ip=client_ip)
+ try:
+ barrier.wait(timeout=10)
+ except Exception as exc:
+ raise KsftFailEx("iperf3 barrier wait timed") from exc
+
+ try:
+ bw_gbps = runner.measure_bandwidth(reverse=True)
+ except Exception as exc:
+ raise KsftFailEx("iperf3 bandwidth measurement failed") from exc
+
+ return bw_gbps
+
+
+def run_bandwidth_test(cfg):
+ """
+ Runs parallel bandwidth measurements for each VLAN/TC pair and collects results.
+ """
+ def _run_measure_bandwidth_thread(local_ip, remote_ip, results, barrier, tc_ix):
+ results[tc_ix] = measure_bandwidth(cfg, local_ip, remote_ip, barrier)
+
+ vf_vlan_data = [
+ # (local_ip, remote_ip, TC)
+ ("198.51.100.1", "198.51.100.2", 3),
+ ("198.51.100.9", "198.51.100.10", 4),
+ ]
+
+ results = {}
+ threads = []
+ start_barrier = threading.Barrier(len(vf_vlan_data))
+
+ for local_ip, remote_ip, tc_ix in vf_vlan_data:
+ thread = threading.Thread(
+ target=_run_measure_bandwidth_thread,
+ args=(local_ip, remote_ip, results, start_barrier, tc_ix)
+ )
+ thread.start()
+ threads.append(thread)
+
+ for thread in threads:
+ thread.join()
+
+ for tc_ix, tc_bw in results.items():
+ if tc_bw is None:
+ raise KsftFailEx("iperf3 failed; cannot evaluate bandwidth")
+
+ return results
+
+
+def calculate_bandwidth_percentages(results):
+ """
+ Calculates the percentage of total bandwidth received by TC3 and TC4.
+ """
+ if 3 not in results or 4 not in results:
+ raise KsftFailEx(f"Missing expected TC results in {results}")
+
+ tc3_bw = results[3]
+ tc4_bw = results[4]
+ total_bw = tc3_bw + tc4_bw
+ tc3_percentage = (tc3_bw / total_bw) * 100
+ tc4_percentage = (tc4_bw / total_bw) * 100
+
+ return {
+ 'tc3_bw': tc3_bw,
+ 'tc4_bw': tc4_bw,
+ 'tc3_percentage': tc3_percentage,
+ 'tc4_percentage': tc4_percentage,
+ 'total_bw': total_bw
+ }
+
+
+def print_bandwidth_results(bw_data, test_name):
+ """
+ Prints bandwidth measurements and TC usage summary for a given test.
+ """
+ ksft_pr(f"Bandwidth check results {test_name}:")
+ ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec")
+ ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec")
+ ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec")
+ ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%")
+ ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%")
+
+
+def verify_total_bandwidth(bw_data, validator):
+ """
+ Ensures the total measured bandwidth falls within the acceptable tolerance.
+ """
+ total = bw_data['total_bw']
+
+ if validator.bound({"total": total}):
+ return
+
+ low, high = validator.bounds["total"]
+
+ if total < low:
+ raise KsftSkipEx(
+ f"Total bandwidth {total:.2f} Gbps < minimum "
+ f"{low:.2f} Gbps; "
+ f"parent tx_max ({validator.expected_total:.1f} G) "
+ f"not reached, cannot validate share"
+ )
+
+ raise KsftFailEx(
+ f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling "
+ f"{high:.2f} Gbps "
+ f"(VF tx_max set to {validator.expected_total:.1f} G)"
+ )
+
+
+def run_bandwidth_distribution_test(cfg, set_tc_mapping):
+ """
+ Runs parallel bandwidth measurements for both TCs and collects results.
+ """
+ setup_test_environment(cfg, set_tc_mapping)
+ bandwidths = run_bandwidth_test(cfg)
+ bw_data = calculate_bandwidth_percentages(bandwidths)
+ test_name = "with TC mapping" if set_tc_mapping else "without TC mapping"
+ print_bandwidth_results(bw_data, test_name)
+
+ verify_total_bandwidth(bw_data, cfg.traffic_bw_validator)
+
+ return cfg.tc_bw_validator.bound({"tc3": bw_data['tc3_percentage'],
+ "tc4": bw_data['tc4_percentage']})
+
+
+def test_no_tc_mapping_bandwidth(cfg):
+ """
+ Verifies that bandwidth is not split 20/80 without traffic class mapping.
+ """
+ pass_bw_msg = "Bandwidth is NOT distributed as 20/80 without TC mapping"
+ fail_bw_msg = "Bandwidth matched 20/80 split without TC mapping"
+ is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout
+
+ if run_bandwidth_distribution_test(cfg, set_tc_mapping=False):
+ if is_mlx5:
+ raise KsftXfailEx(fail_bw_msg)
+ raise KsftFailEx(fail_bw_msg)
+ if is_mlx5:
+ raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg)
+ ksft_pr(pass_bw_msg)
+
+
+def test_tc_mapping_bandwidth(cfg):
+ """
+ Verifies that bandwidth is correctly split 20/80 between TC3 and TC4
+ when traffic class mapping is set.
+ """
+ if run_bandwidth_distribution_test(cfg, set_tc_mapping=True):
+ ksft_pr("Bandwidth is distributed as 20/80 with TC mapping")
+ else:
+ raise KsftFailEx("Bandwidth did not match 20/80 split with TC mapping")
+
+
+def main() -> None:
+ """
+ Main entry point for running the test cases.
+ """
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.devnl = DevlinkFamily()
+
+ cfg.pci = os.path.basename(
+ os.path.realpath(f"/sys/class/net/{cfg.ifname}/device")
+ )
+ if not cfg.pci:
+ raise KsftSkipEx("Could not get PCI address of the interface")
+
+ cfg.traffic_bw_validator = BandwidthValidator({"total": 1})
+ cfg.tc_bw_validator = BandwidthValidator({"tc3": 20, "tc4": 80})
+
+ cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth]
+
+ ksft_run(cases=cases, args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
new file mode 100755
index 000000000000..45c2d49d55b6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from os import path
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, KsftSkipEx
+from lib.py import NetDrvEpEnv
+from lib.py import bkg, cmd, rand_port, wait_port_listen
+from lib.py import ksft_disruptive
+
+
+def require_devmem(cfg):
+ if not hasattr(cfg, "_devmem_probed"):
+ probe_command = f"{cfg.bin_local} -f {cfg.ifname}"
+ cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0
+ cfg._devmem_probed = True
+
+ if not cfg._devmem_supported:
+ raise KsftSkipEx("Test requires devmem support")
+
+
+@ksft_disruptive
+def check_rx(cfg) -> None:
+ require_devmem(cfg)
+
+ port = rand_port()
+ socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}"
+ listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7"
+
+ with bkg(listen_cmd, exit_wait=True) as ncdevmem:
+ wait_port_listen(port)
+ cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \
+ head -c 1K | {socat}", host=cfg.remote, shell=True)
+
+ ksft_eq(ncdevmem.ret, 0)
+
+
+@ksft_disruptive
+def check_tx(cfg) -> None:
+ require_devmem(cfg)
+
+ port = rand_port()
+ listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
+
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat:
+ wait_port_listen(port, host=cfg.remote)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True)
+
+ ksft_eq(socat.stdout.strip(), "hello\nworld")
+
+
+@ksft_disruptive
+def check_tx_chunks(cfg) -> None:
+ require_devmem(cfg)
+
+ port = rand_port()
+ listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
+
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat:
+ wait_port_listen(port, host=cfg.remote)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True)
+
+ ksft_eq(socat.stdout.strip(), "hello\nworld")
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem")
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ ksft_run([check_rx, check_tx, check_tx_chunks],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/ethtool.sh b/tools/testing/selftests/drivers/net/hw/ethtool.sh
new file mode 100755
index 000000000000..fa6953de6b6d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ethtool.sh
@@ -0,0 +1,297 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ same_speeds_autoneg_off
+ different_speeds_autoneg_off
+ combination_of_neg_on_and_off
+ advertise_subset_of_speeds
+ check_highest_speed_is_chosen
+ different_speeds_autoneg_on
+"
+NUM_NETIFS=2
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source ethtool_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+}
+
+same_speeds_autoneg_off()
+{
+ # Check that when each of the reported speeds is forced, the links come
+ # up and are operational.
+ local -a speeds_arr=($(common_speeds_get $h1 $h2 0 0))
+
+ for speed in "${speeds_arr[@]}"; do
+ RET=0
+ ethtool_set $h1 speed $speed autoneg off
+ ethtool_set $h2 speed $speed autoneg off
+
+ setup_wait_dev_with_timeout $h1
+ setup_wait_dev_with_timeout $h2
+ ping_do $h1 192.0.2.2
+ check_err $? "ping with speed $speed autoneg off"
+ log_test "force speed $speed on both ends"
+ done
+
+ ethtool -s $h2 autoneg on
+ ethtool -s $h1 autoneg on
+}
+
+different_speeds_autoneg_off()
+{
+ # Test that when we force different speeds, links are not up and ping
+ # fails.
+ RET=0
+
+ local -a speeds_arr=($(different_speeds_get $h1 $h2 0 0))
+ local speed1=${speeds_arr[0]}
+ local speed2=${speeds_arr[1]}
+
+ ethtool_set $h1 speed $speed1 autoneg off
+ ethtool_set $h2 speed $speed2 autoneg off
+
+ setup_wait_dev_with_timeout $h1
+ setup_wait_dev_with_timeout $h2
+ ping_do $h1 192.0.2.2
+ check_fail $? "ping with different speeds"
+
+ log_test "force of different speeds autoneg off"
+
+ ethtool -s $h2 autoneg on
+ ethtool -s $h1 autoneg on
+}
+
+combination_of_neg_on_and_off()
+{
+ # Test that when one device is forced to a speed supported by both
+ # endpoints and the other device is configured to autoneg on, the links
+ # are up and ping passes.
+ local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1))
+
+ for speed in "${speeds_arr[@]}"; do
+ RET=0
+ ethtool_set $h1 speed $speed autoneg off
+
+ setup_wait_dev_with_timeout $h1
+ setup_wait_dev_with_timeout $h2
+ ping_do $h1 192.0.2.2
+ check_err $? "ping with h1-speed=$speed autoneg off, h2 autoneg on"
+ log_test "force speed $speed vs. autoneg"
+ done
+
+ ethtool -s $h1 autoneg on
+}
+
+hex_speed_value_get()
+{
+ local speed=$1; shift
+
+ local shift_size=${speed_values[$speed]}
+ speed=$((0x1 << $"shift_size"))
+ printf "%#x" "$speed"
+}
+
+subset_of_common_speeds_get()
+{
+ local dev1=$1; shift
+ local dev2=$1; shift
+ local adver=$1; shift
+
+ local -a speeds_arr=($(common_speeds_get $dev1 $dev2 0 $adver))
+ local speed_to_advertise=0
+ local speed_to_remove=${speeds_arr[0]}
+ speed_to_remove+='base'
+
+ local -a speeds_mode_arr=($(common_speeds_get $dev1 $dev2 1 $adver))
+
+ for speed in ${speeds_mode_arr[@]}; do
+ if [[ $speed != $speed_to_remove* ]]; then
+ speed=$(hex_speed_value_get $speed)
+ speed_to_advertise=$(($speed_to_advertise | \
+ $speed))
+ fi
+
+ done
+
+ # Convert to hex.
+ printf "%#x" "$speed_to_advertise"
+}
+
+speed_to_advertise_get()
+{
+ # The function returns the hex number that is composed by OR-ing all
+ # the modes corresponding to the provided speed.
+ local speed_without_mode=$1; shift
+ local supported_speeds=("$@"); shift
+ local speed_to_advertise=0
+
+ speed_without_mode+='base'
+
+ for speed in ${supported_speeds[@]}; do
+ if [[ $speed == $speed_without_mode* ]]; then
+ speed=$(hex_speed_value_get $speed)
+ speed_to_advertise=$(($speed_to_advertise | \
+ $speed))
+ fi
+
+ done
+
+ # Convert to hex.
+ printf "%#x" "$speed_to_advertise"
+}
+
+advertise_subset_of_speeds()
+{
+ # Test that when one device advertises a subset of speeds and another
+ # advertises a specific speed (but all modes of this speed), the links
+ # are up and ping passes.
+ RET=0
+
+ local speed_1_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1)
+ ethtool_set $h1 advertise $speed_1_to_advertise
+
+ if [ $RET != 0 ]; then
+ log_test "advertise subset of speeds"
+ return
+ fi
+
+ local -a speeds_arr_without_mode=($(common_speeds_get $h1 $h2 0 1))
+ # Check only speeds that h1 advertised. Remove the first speed.
+ unset speeds_arr_without_mode[0]
+ local -a speeds_arr_with_mode=($(common_speeds_get $h1 $h2 1 1))
+
+ for speed_value in ${speeds_arr_without_mode[@]}; do
+ RET=0
+ local speed_2_to_advertise=$(speed_to_advertise_get $speed_value \
+ "${speeds_arr_with_mode[@]}")
+ ethtool_set $h2 advertise $speed_2_to_advertise
+
+ setup_wait_dev_with_timeout $h1
+ setup_wait_dev_with_timeout $h2
+ ping_do $h1 192.0.2.2
+ check_err $? "ping with h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)"
+
+ log_test "advertise $speed_1_to_advertise vs. $speed_2_to_advertise"
+ done
+
+ ethtool -s $h2 autoneg on
+ ethtool -s $h1 autoneg on
+}
+
+check_highest_speed_is_chosen()
+{
+ # Test that when one device advertises a subset of speeds, the other
+ # chooses the highest speed. This test checks configuration without
+ # traffic.
+ RET=0
+
+ local max_speed
+ local chosen_speed
+ local speed_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1)
+
+ ethtool_set $h1 advertise $speed_to_advertise
+
+ if [ $RET != 0 ]; then
+ log_test "check highest speed"
+ return
+ fi
+
+ local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1))
+
+ max_speed=${speeds_arr[0]}
+ for current in ${speeds_arr[@]}; do
+ if [[ $current -gt $max_speed ]]; then
+ max_speed=$current
+ fi
+ done
+
+ setup_wait_dev_with_timeout $h1
+ setup_wait_dev_with_timeout $h2
+ chosen_speed=$(ethtool $h1 | grep 'Speed:')
+ chosen_speed=${chosen_speed%"Mb/s"*}
+ chosen_speed=${chosen_speed#*"Speed: "}
+ ((chosen_speed == max_speed))
+ check_err $? "h1 advertise $speed_to_advertise, h2 sync to speed $chosen_speed"
+
+ log_test "check highest speed"
+
+ ethtool -s $h2 autoneg on
+ ethtool -s $h1 autoneg on
+}
+
+different_speeds_autoneg_on()
+{
+ # Test that when we configure links to advertise different speeds,
+ # links are not up and ping fails.
+ RET=0
+
+ local -a speeds=($(different_speeds_get $h1 $h2 1 1))
+ local speed1=${speeds[0]}
+ local speed2=${speeds[1]}
+
+ speed1=$(hex_speed_value_get $speed1)
+ speed2=$(hex_speed_value_get $speed2)
+
+ ethtool_set $h1 advertise $speed1
+ ethtool_set $h2 advertise $speed2
+
+ if (($RET)); then
+ setup_wait_dev_with_timeout $h1
+ setup_wait_dev_with_timeout $h2
+ ping_do $h1 192.0.2.2
+ check_fail $? "ping with different speeds autoneg on"
+ fi
+
+ log_test "advertise different speeds autoneg on"
+
+ ethtool -s $h2 autoneg on
+ ethtool -s $h1 autoneg on
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+declare -gA speed_values
+eval "speed_values=($(speeds_arr_get))"
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh
new file mode 100755
index 000000000000..a7584448416e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ autoneg
+ autoneg_force_mode
+ no_cable
+"
+
+NUM_NETIFS=2
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source ethtool_lib.sh
+
+TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+ swp3=$NETIF_NO_CABLE
+}
+
+ethtool_ext_state()
+{
+ local dev=$1; shift
+ local expected_ext_state=$1; shift
+ local expected_ext_substate=${1:-""}; shift
+
+ local ext_state=$(ethtool $dev | grep "Link detected" \
+ | cut -d "(" -f2 | cut -d ")" -f1)
+ local ext_substate=$(echo $ext_state | cut -sd "," -f2 \
+ | sed -e 's/^[[:space:]]*//')
+ ext_state=$(echo $ext_state | cut -d "," -f1)
+
+ if [[ $ext_state != $expected_ext_state ]]; then
+ echo "Expected \"$expected_ext_state\", got \"$ext_state\""
+ return 1
+ fi
+ if [[ $ext_substate != $expected_ext_substate ]]; then
+ echo "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+ return 1
+ fi
+}
+
+autoneg()
+{
+ local msg
+
+ RET=0
+
+ ip link set dev $swp1 up
+
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
+ "Autoneg" "No partner detected")
+ check_err $? "$msg"
+
+ log_test "Autoneg, No partner detected"
+
+ ip link set dev $swp1 down
+}
+
+autoneg_force_mode()
+{
+ local msg
+
+ RET=0
+
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0))
+ local speed1=${speeds_arr[0]}
+ local speed2=${speeds_arr[1]}
+
+ ethtool_set $swp1 speed $speed1 autoneg off
+ ethtool_set $swp2 speed $speed2 autoneg off
+
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
+ "Autoneg" "No partner detected during force mode")
+ check_err $? "$msg"
+
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \
+ "Autoneg" "No partner detected during force mode")
+ check_err $? "$msg"
+
+ log_test "Autoneg, No partner detected during force mode"
+
+ ethtool -s $swp2 autoneg on
+ ethtool -s $swp1 autoneg on
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+}
+
+no_cable()
+{
+ local msg
+
+ RET=0
+
+ ip link set dev $swp3 up
+
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable")
+ check_err $? "$msg"
+
+ log_test "No cable"
+
+ ip link set dev $swp3 down
+}
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh
new file mode 100644
index 000000000000..b9bfb45085af
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+speeds_arr_get()
+{
+ cmd='/ETHTOOL_LINK_MODE_[^[:space:]]*_BIT[[:space:]]+=[[:space:]]+/ \
+ {sub(/,$/, "") \
+ sub(/ETHTOOL_LINK_MODE_/,"") \
+ sub(/_BIT/,"") \
+ sub(/_Full/,"/Full") \
+ sub(/_Half/,"/Half");\
+ print "["$1"]="$3}'
+
+ awk "${cmd}" /usr/include/linux/ethtool.h
+}
+
+ethtool_set()
+{
+ local cmd="$@"
+ local out=$(ethtool -s $cmd 2>&1 | wc -l)
+
+ check_err $out "error in configuration. $cmd"
+}
+
+dev_linkmodes_params_get()
+{
+ local dev=$1; shift
+ local adver=$1; shift
+ local -a linkmodes_params
+ local param_count
+ local arr
+
+ if (($adver)); then
+ mode="Advertised link modes"
+ else
+ mode="Supported link modes"
+ fi
+
+ local -a dev_linkmodes=($(dev_speeds_get $dev 1 $adver))
+ for ((i=0; i<${#dev_linkmodes[@]}; i++)); do
+ linkmodes_params[$i]=$(echo -e "${dev_linkmodes[$i]}" | \
+ # Replaces all non numbers with spaces
+ sed -e 's/[^0-9]/ /g' | \
+ # Squeeze spaces in sequence to 1 space
+ tr -s ' ')
+ # Count how many numbers were found in the linkmode
+ param_count=$(echo "${linkmodes_params[$i]}" | wc -w)
+ if [[ $param_count -eq 1 ]]; then
+ linkmodes_params[$i]="${linkmodes_params[$i]} 1"
+ elif [[ $param_count -ge 3 ]]; then
+ arr=(${linkmodes_params[$i]})
+ # Take only first two params
+ linkmodes_params[$i]=$(echo "${arr[@]:0:2}")
+ fi
+ done
+ echo ${linkmodes_params[@]}
+}
+
+dev_speeds_get()
+{
+ local dev=$1; shift
+ local with_mode=$1; shift
+ local adver=$1; shift
+ local speeds_str
+
+ if (($adver)); then
+ mode="Advertised link modes"
+ else
+ mode="Supported link modes"
+ fi
+
+ speeds_str=$(ethtool "$dev" | \
+ # Snip everything before the link modes section.
+ sed -n '/'"$mode"':/,$p' | \
+ # Quit processing the rest at the start of the next section.
+ # When checking, skip the header of this section (hence the 2,).
+ sed -n '2,${/^[\t][^ \t]/q};p' | \
+ # Drop the section header of the current section.
+ cut -d':' -f2)
+
+ local -a speeds_arr=($speeds_str)
+ if [[ $with_mode -eq 0 ]]; then
+ for ((i=0; i<${#speeds_arr[@]}; i++)); do
+ speeds_arr[$i]=${speeds_arr[$i]%base*}
+ done
+ fi
+ echo ${speeds_arr[@]}
+}
+
+common_speeds_get()
+{
+ dev1=$1; shift
+ dev2=$1; shift
+ with_mode=$1; shift
+ adver=$1; shift
+
+ local -a dev1_speeds=($(dev_speeds_get $dev1 $with_mode $adver))
+ local -a dev2_speeds=($(dev_speeds_get $dev2 $with_mode $adver))
+
+ comm -12 \
+ <(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \
+ <(printf '%s\n' "${dev2_speeds[@]}" | sort -u)
+}
+
+different_speeds_get()
+{
+ local dev1=$1; shift
+ local dev2=$1; shift
+ local with_mode=$1; shift
+ local adver=$1; shift
+
+ local -a speeds_arr
+
+ speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
+ if [[ ${#speeds_arr[@]} < 2 ]]; then
+ check_err 1 "cannot check different speeds. There are not enough speeds"
+ fi
+
+ echo ${speeds_arr[0]} ${speeds_arr[1]}
+}
diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh
new file mode 100755
index 000000000000..c301e735c8ab
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh
@@ -0,0 +1,341 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ manual_with_verification_h1_to_h2
+ manual_with_verification_h2_to_h1
+ manual_without_verification_h1_to_h2
+ manual_without_verification_h2_to_h1
+ manual_failed_verification_h1_to_h2
+ manual_failed_verification_h2_to_h1
+ lldp
+"
+
+NUM_NETIFS=2
+REQUIRE_MZ=no
+PREEMPTIBLE_PRIO=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+traffic_test()
+{
+ local if=$1; shift
+ local src=$1; shift
+ local num_pkts=10000
+ local before=
+ local after=
+ local delta=
+
+ if [ ${has_pmac_stats[$if]} = false ]; then
+ src="aggregate"
+ fi
+
+ before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
+
+ $MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO
+
+ after=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
+
+ delta=$((after - before))
+
+ # Allow an extra 1% tolerance for random packets sent by the stack
+ [ $delta -ge $num_pkts ] && [ $delta -le $((num_pkts + 100)) ]
+}
+
+manual_with_verification()
+{
+ local tx=$1; shift
+ local rx=$1; shift
+
+ RET=0
+
+ # It isn't completely clear from IEEE 802.3-2018 Figure 99-5: Transmit
+ # Processing state diagram whether the "send_r" variable (send response
+ # to verification frame) should be taken into consideration while the
+ # MAC Merge TX direction is disabled. That being said, at least the
+ # NXP ENETC does not, and requires tx-enabled on in order to respond to
+ # the link partner's verification frames.
+ ethtool --set-mm $rx tx-enabled on
+ ethtool --set-mm $tx verify-enabled on tx-enabled on
+
+ # Wait for verification to finish
+ sleep 1
+
+ ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+ grep -q 'SUCCEEDED'
+ check_err "$?" "Verification did not succeed"
+
+ ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+ check_err "$?" "pMAC TX is not active"
+
+ traffic_test $tx "pmac"
+ check_err "$?" "Traffic did not get sent through $tx's pMAC"
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled off
+ ethtool --set-mm $rx tx-enabled off
+
+ log_test "Manual configuration with verification: $tx to $rx"
+}
+
+manual_with_verification_h1_to_h2()
+{
+ manual_with_verification $h1 $h2
+}
+
+manual_with_verification_h2_to_h1()
+{
+ manual_with_verification $h2 $h1
+}
+
+manual_without_verification()
+{
+ local tx=$1; shift
+ local rx=$1; shift
+
+ RET=0
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled on
+
+ ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+ grep -q 'DISABLED'
+ check_err "$?" "Verification is not disabled"
+
+ ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+ check_err "$?" "pMAC TX is not active"
+
+ traffic_test $tx "pmac"
+ check_err "$?" "Traffic did not get sent through $tx's pMAC"
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled off
+
+ log_test "Manual configuration without verification: $tx to $rx"
+}
+
+manual_without_verification_h1_to_h2()
+{
+ manual_without_verification $h1 $h2
+}
+
+manual_without_verification_h2_to_h1()
+{
+ manual_without_verification $h2 $h1
+}
+
+manual_failed_verification()
+{
+ local tx=$1; shift
+ local rx=$1; shift
+
+ RET=0
+
+ ethtool --set-mm $rx pmac-enabled off
+ ethtool --set-mm $tx verify-enabled on tx-enabled on
+
+ # Wait for verification to time out
+ sleep 1
+
+ ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+ grep -q 'SUCCEEDED'
+ check_fail "$?" "Verification succeeded when it shouldn't have"
+
+ ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+ check_fail "$?" "pMAC TX is active when it shouldn't have"
+
+ traffic_test $tx "emac"
+ check_err "$?" "Traffic did not get sent through $tx's eMAC"
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled off
+ ethtool --set-mm $rx pmac-enabled on
+
+ log_test "Manual configuration with failed verification: $tx to $rx"
+}
+
+manual_failed_verification_h1_to_h2()
+{
+ manual_failed_verification $h1 $h2
+}
+
+manual_failed_verification_h2_to_h1()
+{
+ manual_failed_verification $h2 $h1
+}
+
+smallest_supported_add_frag_size()
+{
+ local iface=$1
+ local rx_min_frag_size=
+
+ rx_min_frag_size=$(ethtool --json --show-mm $iface | \
+ jq '.[]."rx-min-frag-size"')
+
+ if [ $rx_min_frag_size -le 60 ]; then
+ echo 0
+ elif [ $rx_min_frag_size -le 124 ]; then
+ echo 1
+ elif [ $rx_min_frag_size -le 188 ]; then
+ echo 2
+ elif [ $rx_min_frag_size -le 252 ]; then
+ echo 3
+ else
+ echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP"
+ exit 1
+ fi
+}
+
+expected_add_frag_size()
+{
+ local iface=$1
+ local requested=$2
+ local min=$(smallest_supported_add_frag_size $iface)
+
+ [ $requested -le $min ] && echo $min || echo $requested
+}
+
+lldp_change_add_frag_size()
+{
+ local add_frag_size=$1
+ local pattern=
+
+ lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null
+ # Wait for TLVs to be received
+ sleep 2
+ pattern=$(printf "Additional fragment size: %d" \
+ $(expected_add_frag_size $h1 $add_frag_size))
+ lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern"
+}
+
+lldp()
+{
+ RET=0
+
+ systemctl start lldpad
+
+ # Configure the interfaces to receive and transmit LLDPDUs
+ lldptool -L -i $h1 adminStatus=rxtx >/dev/null
+ lldptool -L -i $h2 adminStatus=rxtx >/dev/null
+
+ # Enable the transmission of Additional Ethernet Capabilities TLV
+ lldptool -T -i $h1 -V addEthCaps enableTx=yes >/dev/null
+ lldptool -T -i $h2 -V addEthCaps enableTx=yes >/dev/null
+
+ # Wait for TLVs to be received
+ sleep 2
+
+ lldptool -i $h1 -t -n -V addEthCaps | \
+ grep -q "Preemption capability active"
+ check_err "$?" "$h1 pMAC TX is not active"
+
+ lldptool -i $h2 -t -n -V addEthCaps | \
+ grep -q "Preemption capability active"
+ check_err "$?" "$h2 pMAC TX is not active"
+
+ lldp_change_add_frag_size 3
+ check_err "$?" "addFragSize 3"
+
+ lldp_change_add_frag_size 2
+ check_err "$?" "addFragSize 2"
+
+ lldp_change_add_frag_size 1
+ check_err "$?" "addFragSize 1"
+
+ lldp_change_add_frag_size 0
+ check_err "$?" "addFragSize 0"
+
+ traffic_test $h1 "pmac"
+ check_err "$?" "Traffic did not get sent through $h1's pMAC"
+
+ traffic_test $h2 "pmac"
+ check_err "$?" "Traffic did not get sent through $h2's pMAC"
+
+ systemctl stop lldpad
+
+ log_test "LLDP"
+}
+
+h1_create()
+{
+ ip link set dev $h1 up
+
+ tc qdisc add dev $h1 root mqprio num_tc 4 map 0 1 2 3 \
+ queues 1@0 1@1 1@2 1@3 \
+ fp P E E E \
+ hw 1
+
+ ethtool --set-mm $h1 pmac-enabled on tx-enabled off verify-enabled off
+}
+
+h2_create()
+{
+ ip link set dev $h2 up
+
+ ethtool --set-mm $h2 pmac-enabled on tx-enabled off verify-enabled off
+
+ tc qdisc add dev $h2 root mqprio num_tc 4 map 0 1 2 3 \
+ queues 1@0 1@1 1@2 1@3 \
+ fp P E E E \
+ hw 1
+}
+
+h1_destroy()
+{
+ ethtool --set-mm $h1 pmac-enabled off tx-enabled off verify-enabled off
+
+ tc qdisc del dev $h1 root
+
+ ip link set dev $h1 down
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 root
+
+ ethtool --set-mm $h2 pmac-enabled off tx-enabled off verify-enabled off
+
+ ip link set dev $h2 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+}
+
+check_ethtool_mm_support
+check_tc_fp_support
+require_command lldptool
+bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
+
+for netif in ${NETIFS[@]}; do
+ ethtool --show-mm $netif 2>&1 &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: $netif does not support MAC Merge"
+ exit $ksft_skip
+ fi
+
+ if check_ethtool_pmac_std_stats_support $netif eth-mac; then
+ has_pmac_stats[$netif]=true
+ else
+ has_pmac_stats[$netif]=false
+ echo "$netif does not report pMAC statistics, falling back to aggregate"
+ fi
+done
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh
new file mode 100755
index 000000000000..8f60c1685ad4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ rmon_rx_histogram
+ rmon_tx_histogram
+"
+
+NUM_NETIFS=2
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+ETH_FCS_LEN=4
+ETH_HLEN=$((6+6+2))
+
+declare -A netif_mtu
+
+ensure_mtu()
+{
+ local iface=$1; shift
+ local len=$1; shift
+ local current=$(ip -j link show dev $iface | jq -r '.[0].mtu')
+ local required=$((len - ETH_HLEN - ETH_FCS_LEN))
+
+ if [ $current -lt $required ]; then
+ ip link set dev $iface mtu $required || return 1
+ fi
+}
+
+bucket_test()
+{
+ local iface=$1; shift
+ local neigh=$1; shift
+ local set=$1; shift
+ local bucket=$1; shift
+ local len=$1; shift
+ local num_rx=10000
+ local num_tx=20000
+ local expected=
+ local before=
+ local after=
+ local delta=
+
+ # Mausezahn does not include FCS bytes in its length - but the
+ # histogram counters do
+ len=$((len - ETH_FCS_LEN))
+ len=$((len > 0 ? len : 0))
+
+ before=$(ethtool --json -S $iface --groups rmon | \
+ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
+
+ # Send 10k one way and 20k in the other, to detect counters
+ # mapped to the wrong direction
+ $MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us
+ $MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us
+
+ after=$(ethtool --json -S $iface --groups rmon | \
+ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
+
+ delta=$((after - before))
+
+ expected=$([ $set = rx ] && echo $num_rx || echo $num_tx)
+
+ # Allow some extra tolerance for other packets sent by the stack
+ [ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ]
+}
+
+rmon_histogram()
+{
+ local iface=$1; shift
+ local neigh=$1; shift
+ local set=$1; shift
+ local nbuckets=0
+ local step=
+
+ RET=0
+
+ while read -r -a bucket; do
+ step="$set-pkts${bucket[0]}to${bucket[1]} on $iface"
+
+ for if in $iface $neigh; do
+ if ! ensure_mtu $if ${bucket[0]}; then
+ log_test_xfail "$if does not support the required MTU for $step"
+ return
+ fi
+ done
+
+ if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then
+ check_err 1 "$step failed"
+ return 1
+ fi
+ log_test "$step"
+ nbuckets=$((nbuckets + 1))
+ done < <(ethtool --json -S $iface --groups rmon | \
+ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null)
+
+ if [ $nbuckets -eq 0 ]; then
+ log_test_xfail "$iface does not support $set histogram counters"
+ return
+ fi
+}
+
+rmon_rx_histogram()
+{
+ rmon_histogram $h1 $h2 rx
+ rmon_histogram $h2 $h1 rx
+}
+
+rmon_tx_histogram()
+{
+ rmon_histogram $h1 $h2 tx
+ rmon_histogram $h2 $h1 tx
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ for iface in $h1 $h2; do
+ netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu')
+ ip link set dev $iface up
+ done
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ for iface in $h2 $h1; do
+ ip link set dev $iface \
+ mtu ${netif_mtu[$iface]} \
+ down
+ done
+}
+
+check_ethtool_counter_group_support
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh
new file mode 100755
index 000000000000..67fafefc80be
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh
@@ -0,0 +1,334 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+ +----------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.200 + | | + $h2.200 |
+# | 192.0.2.1/28 | | | | 192.0.2.18/28 |
+# | 2001:db8:1::1/64 | | | | 2001:db8:2::1/64 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# | | | | | |
+# +------------------|-+ +-|--------------------+
+# | |
+# +------------------|-------------------------|--------------------+
+# | SW | | |
+# | | | |
+# | $rp1 + + $rp2 |
+# | | | |
+# | $rp1.200 + + $rp2.200 |
+# | 192.0.2.2/28 192.0.2.17/28 |
+# | 2001:db8:1::2/64 2001:db8:2::2/64 |
+# | |
+# +-----------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ test_stats_rx_ipv4
+ test_stats_tx_ipv4
+ test_stats_rx_ipv6
+ test_stats_tx_ipv6
+ respin_enablement
+ test_stats_rx_ipv4
+ test_stats_tx_ipv4
+ test_stats_rx_ipv6
+ test_stats_tx_ipv6
+ reapply_config
+ ping_ipv4
+ ping_ipv6
+ test_stats_rx_ipv4
+ test_stats_tx_ipv4
+ test_stats_rx_ipv6
+ test_stats_tx_ipv6
+ test_stats_report_rx
+ test_stats_report_tx
+ test_destroy_enabled
+ test_double_enable
+"
+NUM_NETIFS=4
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+ vlan_destroy $h1 200
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64
+ ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+ ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+ vlan_destroy $h2 200
+ simple_if_fini $h2
+}
+
+router_rp1_200_create()
+{
+ ip link add name $rp1.200 link $rp1 type vlan id 200
+ ip link set dev $rp1.200 addrgenmode eui64
+ ip link set dev $rp1.200 up
+ ip address add dev $rp1.200 192.0.2.2/28
+ ip address add dev $rp1.200 2001:db8:1::2/64
+ ip stats set dev $rp1.200 l3_stats on
+}
+
+router_rp1_200_destroy()
+{
+ ip stats set dev $rp1.200 l3_stats off
+ ip address del dev $rp1.200 2001:db8:1::2/64
+ ip address del dev $rp1.200 192.0.2.2/28
+ ip link del dev $rp1.200
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ router_rp1_200_create
+
+ ip link set dev $rp2 up
+ vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+ vlan_destroy $rp2 200
+ ip link set dev $rp2 down
+
+ router_rp1_200_destroy
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1mac=$(mac_get $rp1)
+ rp2mac=$(mac_get $rp2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1.200 192.0.2.18 " IPv4"
+}
+
+ping_ipv6()
+{
+ ping_test $h1.200 2001:db8:2::1 " IPv6"
+}
+
+send_packets_rx_ipv4()
+{
+ # Send 21 packets instead of 20, because the first one might trap and go
+ # through the SW datapath, which might not bump the HW counter.
+ $MZ $h1.200 -c 21 -d 20msec -p 100 \
+ -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \
+ -q -t udp sp=54321,dp=12345
+}
+
+send_packets_rx_ipv6()
+{
+ $MZ $h1.200 -6 -c 21 -d 20msec -p 100 \
+ -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \
+ -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv4()
+{
+ $MZ $h2.200 -c 21 -d 20msec -p 100 \
+ -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \
+ -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv6()
+{
+ $MZ $h2.200 -6 -c 21 -d 20msec -p 100 \
+ -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \
+ -q -t udp sp=54321,dp=12345
+}
+
+___test_stats()
+{
+ local dir=$1; shift
+ local prot=$1; shift
+
+ local a
+ local b
+
+ a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
+ send_packets_${dir}_${prot}
+ "$@"
+ b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+ hw_stats_get l3_stats $rp1.200 ${dir} packets)
+ check_err $? "Traffic not reflected in the counter: $a -> $b"
+}
+
+__test_stats()
+{
+ local dir=$1; shift
+ local prot=$1; shift
+
+ RET=0
+ ___test_stats "$dir" "$prot"
+ log_test "Test $dir packets: $prot"
+}
+
+test_stats_rx_ipv4()
+{
+ __test_stats rx ipv4
+}
+
+test_stats_tx_ipv4()
+{
+ __test_stats tx ipv4
+}
+
+test_stats_rx_ipv6()
+{
+ __test_stats rx ipv6
+}
+
+test_stats_tx_ipv6()
+{
+ __test_stats tx ipv6
+}
+
+# Make sure everything works well even after stats have been disabled and
+# reenabled on the same device without touching the L3 configuration.
+respin_enablement()
+{
+ log_info "Turning stats off and on again"
+ ip stats set dev $rp1.200 l3_stats off
+ ip stats set dev $rp1.200 l3_stats on
+}
+
+# For the initial run, l3_stats is enabled on a completely set up netdevice. Now
+# do it the other way around: enabling the L3 stats on an L2 netdevice, and only
+# then apply the L3 configuration.
+reapply_config()
+{
+ log_info "Reapplying configuration"
+
+ router_rp1_200_destroy
+
+ ip link add name $rp1.200 link $rp1 type vlan id 200
+ ip link set dev $rp1.200 addrgenmode none
+ ip stats set dev $rp1.200 l3_stats on
+ ip link set dev $rp1.200 addrgenmode eui64
+ ip link set dev $rp1.200 up
+ ip address add dev $rp1.200 192.0.2.2/28
+ ip address add dev $rp1.200 2001:db8:1::2/64
+}
+
+__test_stats_report()
+{
+ local dir=$1; shift
+ local prot=$1; shift
+
+ local a
+ local b
+
+ RET=0
+
+ a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
+ send_packets_${dir}_${prot}
+ ip address flush dev $rp1.200
+ b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+ hw_stats_get l3_stats $rp1.200 ${dir} packets)
+ check_err $? "Traffic not reflected in the counter: $a -> $b"
+ log_test "Test ${dir} packets: stats pushed on loss of L3"
+
+ ip stats set dev $rp1.200 l3_stats off
+ ip link del dev $rp1.200
+ router_rp1_200_create
+}
+
+test_stats_report_rx()
+{
+ __test_stats_report rx ipv4
+}
+
+test_stats_report_tx()
+{
+ __test_stats_report tx ipv4
+}
+
+test_destroy_enabled()
+{
+ RET=0
+
+ ip link del dev $rp1.200
+ router_rp1_200_create
+
+ log_test "Destroy l3_stats-enabled netdev"
+}
+
+test_double_enable()
+{
+ RET=0
+ ___test_stats rx ipv4 \
+ ip stats set dev $rp1.200 l3_stats on
+ log_test "Test stat retention across a spurious enablement"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info |
+ jq '.[].info.l3_stats.used')
+[[ $used = true ]]
+check_err $? "hw_stats_info.used=$used"
+log_test "l3_stats offloaded"
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh
new file mode 100755
index 000000000000..a94d92e1abce
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test L3 stats on IP-in-IP GRE tunnel without key.
+
+# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
+# details.
+
+ALL_TESTS="
+ ping_ipv4
+ test_stats_rx
+ test_stats_tx
+"
+NUM_NETIFS=6
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/../../../net/forwarding/ipip_lib.sh
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ ol1mac=$(mac_get $ol1)
+
+ forwarding_enable
+ vrf_prepare
+ h1_create
+ h2_create
+ sw1_flat_create gre $ol1 $ul1
+ sw2_flat_create gre $ol2 $ul2
+ ip stats set dev g1a l3_stats on
+ ip stats set dev g2a l3_stats on
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip stats set dev g1a l3_stats off
+ ip stats set dev g2a l3_stats off
+
+ sw2_flat_destroy $ol2 $ul2
+ sw1_flat_destroy $ol1 $ul1
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+ forwarding_restore
+}
+
+ping_ipv4()
+{
+ RET=0
+
+ ping_test $h1 192.0.2.18 " gre flat"
+}
+
+send_packets_ipv4()
+{
+ # Send 21 packets instead of 20, because the first one might trap and go
+ # through the SW datapath, which might not bump the HW counter.
+ $MZ $h1 -c 21 -d 20msec -p 100 \
+ -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \
+ -q -t udp sp=54321,dp=12345
+}
+
+test_stats()
+{
+ local dev=$1; shift
+ local dir=$1; shift
+
+ local a
+ local b
+
+ RET=0
+
+ a=$(hw_stats_get l3_stats $dev $dir packets)
+ send_packets_ipv4
+ b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+ hw_stats_get l3_stats $dev $dir packets)
+ check_err $? "Traffic not reflected in the counter: $a -> $b"
+
+ log_test "Test $dir packets: $prot"
+}
+
+test_stats_tx()
+{
+ test_stats g1a tx
+}
+
+test_stats_rx()
+{
+ test_stats g2a rx
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
new file mode 100644
index 000000000000..62456df947bc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+
+#include <liburing.h>
+
+static long page_size;
+#define AREA_SIZE (8192 * page_size)
+#define SEND_SIZE (512 * 4096)
+#define min(a, b) \
+ ({ \
+ typeof(a) _a = (a); \
+ typeof(b) _b = (b); \
+ _a < _b ? _a : _b; \
+ })
+#define min_t(t, a, b) \
+ ({ \
+ t _ta = (a); \
+ t _tb = (b); \
+ min(_ta, _tb); \
+ })
+
+#define ALIGN_UP(v, align) (((v) + (align) - 1) & ~((align) - 1))
+
+static int cfg_server;
+static int cfg_client;
+static int cfg_port = 8000;
+static int cfg_payload_len;
+static const char *cfg_ifname;
+static int cfg_queue_id = -1;
+static bool cfg_oneshot;
+static int cfg_oneshot_recvs;
+static int cfg_send_size = SEND_SIZE;
+static struct sockaddr_in6 cfg_addr;
+
+static char *payload;
+static void *area_ptr;
+static void *ring_ptr;
+static size_t ring_size;
+static struct io_uring_zcrx_rq rq_ring;
+static unsigned long area_token;
+static int connfd;
+static bool stop;
+static size_t received;
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+ int ret;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+
+ ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+ if (ret != 1) {
+ /* fallback to plain IPv4 */
+ ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+ if (ret != 1)
+ return -1;
+
+ /* add ::ffff prefix */
+ sin6->sin6_addr.s6_addr32[0] = 0;
+ sin6->sin6_addr.s6_addr32[1] = 0;
+ sin6->sin6_addr.s6_addr16[4] = 0;
+ sin6->sin6_addr.s6_addr16[5] = 0xffff;
+ }
+
+ return 0;
+}
+
+static inline size_t get_refill_ring_size(unsigned int rq_entries)
+{
+ size_t size;
+
+ ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe);
+ /* add space for the header (head/tail/etc.) */
+ ring_size += page_size;
+ return ALIGN_UP(ring_size, page_size);
+}
+
+static void setup_zcrx(struct io_uring *ring)
+{
+ unsigned int ifindex;
+ unsigned int rq_entries = 4096;
+ int ret;
+
+ ifindex = if_nametoindex(cfg_ifname);
+ if (!ifindex)
+ error(1, 0, "bad interface name: %s", cfg_ifname);
+
+ area_ptr = mmap(NULL,
+ AREA_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE,
+ 0,
+ 0);
+ if (area_ptr == MAP_FAILED)
+ error(1, 0, "mmap(): zero copy area");
+
+ ring_size = get_refill_ring_size(rq_entries);
+ ring_ptr = mmap(NULL,
+ ring_size,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE,
+ 0,
+ 0);
+
+ struct io_uring_region_desc region_reg = {
+ .size = ring_size,
+ .user_addr = (__u64)(unsigned long)ring_ptr,
+ .flags = IORING_MEM_REGION_TYPE_USER,
+ };
+
+ struct io_uring_zcrx_area_reg area_reg = {
+ .addr = (__u64)(unsigned long)area_ptr,
+ .len = AREA_SIZE,
+ .flags = 0,
+ };
+
+ struct io_uring_zcrx_ifq_reg reg = {
+ .if_idx = ifindex,
+ .if_rxq = cfg_queue_id,
+ .rq_entries = rq_entries,
+ .area_ptr = (__u64)(unsigned long)&area_reg,
+ .region_ptr = (__u64)(unsigned long)&region_reg,
+ };
+
+ ret = io_uring_register_ifq(ring, &reg);
+ if (ret)
+ error(1, 0, "io_uring_register_ifq(): %d", ret);
+
+ rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head);
+ rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail);
+ rq_ring.rqes = (struct io_uring_zcrx_rqe *)((char *)ring_ptr + reg.offsets.rqes);
+ rq_ring.rq_tail = 0;
+ rq_ring.ring_entries = reg.rq_entries;
+
+ area_token = area_reg.rq_area_token;
+}
+
+static void add_accept(struct io_uring *ring, int sockfd)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_accept(sqe, sockfd, NULL, NULL, 0);
+ sqe->user_data = 1;
+}
+
+static void add_recvzc(struct io_uring *ring, int sockfd)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, 0, 0);
+ sqe->ioprio |= IORING_RECV_MULTISHOT;
+ sqe->user_data = 2;
+}
+
+static void add_recvzc_oneshot(struct io_uring *ring, int sockfd, size_t len)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, len, 0);
+ sqe->ioprio |= IORING_RECV_MULTISHOT;
+ sqe->user_data = 2;
+}
+
+static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+ if (cqe->res < 0)
+ error(1, 0, "accept()");
+ if (connfd)
+ error(1, 0, "Unexpected second connection");
+
+ connfd = cqe->res;
+ if (cfg_oneshot)
+ add_recvzc_oneshot(ring, connfd, page_size);
+ else
+ add_recvzc(ring, connfd);
+}
+
+static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
+{
+ unsigned rq_mask = rq_ring.ring_entries - 1;
+ struct io_uring_zcrx_cqe *rcqe;
+ struct io_uring_zcrx_rqe *rqe;
+ struct io_uring_sqe *sqe;
+ uint64_t mask;
+ char *data;
+ ssize_t n;
+ int i;
+
+ if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs == 0) {
+ stop = true;
+ return;
+ }
+
+ if (cqe->res < 0)
+ error(1, 0, "recvzc(): %d", cqe->res);
+
+ if (cfg_oneshot) {
+ if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) {
+ add_recvzc_oneshot(ring, connfd, page_size);
+ cfg_oneshot_recvs--;
+ }
+ } else if (!(cqe->flags & IORING_CQE_F_MORE)) {
+ add_recvzc(ring, connfd);
+ }
+
+ rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1);
+
+ n = cqe->res;
+ mask = (1ULL << IORING_ZCRX_AREA_SHIFT) - 1;
+ data = (char *)area_ptr + (rcqe->off & mask);
+
+ for (i = 0; i < n; i++) {
+ if (*(data + i) != payload[(received + i)])
+ error(1, 0, "payload mismatch at %d", i);
+ }
+ received += n;
+
+ rqe = &rq_ring.rqes[(rq_ring.rq_tail & rq_mask)];
+ rqe->off = (rcqe->off & ~IORING_ZCRX_AREA_MASK) | area_token;
+ rqe->len = cqe->res;
+ io_uring_smp_store_release(rq_ring.ktail, ++rq_ring.rq_tail);
+}
+
+static void server_loop(struct io_uring *ring)
+{
+ struct io_uring_cqe *cqe;
+ unsigned int count = 0;
+ unsigned int head;
+ int i, ret;
+
+ io_uring_submit_and_wait(ring, 1);
+
+ io_uring_for_each_cqe(ring, head, cqe) {
+ if (cqe->user_data == 1)
+ process_accept(ring, cqe);
+ else if (cqe->user_data == 2)
+ process_recvzc(ring, cqe);
+ else
+ error(1, 0, "unknown cqe");
+ count++;
+ }
+ io_uring_cq_advance(ring, count);
+}
+
+static void run_server(void)
+{
+ unsigned int flags = 0;
+ struct io_uring ring;
+ int fd, enable, ret;
+ uint64_t tstop;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, 0, "socket()");
+
+ enable = 1;
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int));
+ if (ret < 0)
+ error(1, 0, "setsockopt(SO_REUSEADDR)");
+
+ ret = bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+ if (ret < 0)
+ error(1, 0, "bind()");
+
+ if (listen(fd, 1024) < 0)
+ error(1, 0, "listen()");
+
+ flags |= IORING_SETUP_COOP_TASKRUN;
+ flags |= IORING_SETUP_SINGLE_ISSUER;
+ flags |= IORING_SETUP_DEFER_TASKRUN;
+ flags |= IORING_SETUP_SUBMIT_ALL;
+ flags |= IORING_SETUP_CQE32;
+
+ io_uring_queue_init(512, &ring, flags);
+
+ setup_zcrx(&ring);
+
+ add_accept(&ring, fd);
+
+ tstop = gettimeofday_ms() + 5000;
+ while (!stop && gettimeofday_ms() < tstop)
+ server_loop(&ring);
+
+ if (!stop)
+ error(1, 0, "test failed\n");
+}
+
+static void run_client(void)
+{
+ ssize_t to_send = cfg_send_size;
+ ssize_t sent = 0;
+ ssize_t chunk, res;
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, 0, "socket()");
+
+ if (connect(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)))
+ error(1, 0, "connect()");
+
+ while (to_send) {
+ void *src = &payload[sent];
+
+ chunk = min_t(ssize_t, cfg_payload_len, to_send);
+ res = send(fd, src, chunk, 0);
+ if (res < 0)
+ error(1, 0, "send(): %zd", sent);
+ sent += res;
+ to_send -= res;
+ }
+
+ close(fd);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s (-4|-6) (-s|-c) -h<server_ip> -p<port> "
+ "-l<payload_size> -i<ifname> -q<rxq_id>", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ const int max_payload_len = SEND_SIZE -
+ sizeof(struct ipv6hdr) -
+ sizeof(struct tcphdr) -
+ 40 /* max tcp options */;
+ struct sockaddr_in6 *addr6 = (void *) &cfg_addr;
+ char *addr = NULL;
+ int ret;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+ cfg_payload_len = max_payload_len;
+
+ while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) {
+ switch (c) {
+ case 's':
+ if (cfg_client)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_server = 1;
+ break;
+ case 'c':
+ if (cfg_server)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_client = 1;
+ break;
+ case 'h':
+ addr = optarg;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 'l':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'q':
+ cfg_queue_id = strtoul(optarg, NULL, 0);
+ break;
+ case 'o': {
+ cfg_oneshot = true;
+ cfg_oneshot_recvs = strtoul(optarg, NULL, 0);
+ break;
+ }
+ case 'z':
+ cfg_send_size = strtoul(optarg, NULL, 0);
+ break;
+ }
+ }
+
+ if (cfg_server && addr)
+ error(1, 0, "Receiver cannot have -h specified");
+
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ addr6->sin6_addr = in6addr_any;
+ if (addr) {
+ ret = parse_address(addr, cfg_port, addr6);
+ if (ret)
+ error(1, 0, "receiver address parse error: %s", addr);
+ }
+
+ if (cfg_payload_len > max_payload_len)
+ error(1, 0, "-l: payload exceeds max (%d)", max_payload_len);
+}
+
+int main(int argc, char **argv)
+{
+ const char *cfg_test = argv[argc - 1];
+ int i;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size < 0)
+ return 1;
+
+ if (posix_memalign((void **)&payload, page_size, SEND_SIZE))
+ return 1;
+
+ parse_opts(argc, argv);
+
+ for (i = 0; i < SEND_SIZE; i++)
+ payload[i] = 'a' + (i % 26);
+
+ if (cfg_server)
+ run_server();
+ else if (cfg_client)
+ run_client();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
new file mode 100755
index 000000000000..712c806508b5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import re
+from os import path
+from lib.py import ksft_run, ksft_exit, KsftSkipEx
+from lib.py import NetDrvEpEnv
+from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
+
+
+def _get_current_settings(cfg):
+ output = ethtool(f"-g {cfg.ifname}", json=True)[0]
+ return (output['rx'], output['hds-thresh'])
+
+
+def _get_combined_channels(cfg):
+ output = ethtool(f"-l {cfg.ifname}").stdout
+ values = re.findall(r'Combined:\s+(\d+)', output)
+ return int(values[1])
+
+
+def _create_rss_ctx(cfg, chan):
+ output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout
+ values = re.search(r'New RSS context is (\d+)', output).group(1)
+ ctx_id = int(values)
+ return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}"))
+
+
+def _set_flow_rule(cfg, port, chan):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout
+ values = re.search(r'ID (\d+)', output).group(1)
+ return int(values)
+
+
+def _set_flow_rule_rss(cfg, port, ctx_id):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout
+ values = re.search(r'ID (\d+)', output).group(1)
+ return int(values)
+
+
+def test_zcrx(cfg) -> None:
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def test_zcrx_oneshot(cfg) -> None:
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def test_zcrx_rss(cfg) -> None:
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
+ flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx")
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py
new file mode 100755
index 000000000000..0699d6a8b4e2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/irq.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_ge, ksft_eq
+from lib.py import KsftSkipEx
+from lib.py import ksft_disruptive
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import NetDrvEnv
+from lib.py import cmd, ip, defer
+
+
+def read_affinity(irq) -> str:
+ with open(f'/proc/irq/{irq}/smp_affinity', 'r') as fp:
+ return fp.read().lstrip("0,").strip()
+
+
+def write_affinity(irq, what) -> str:
+ if what != read_affinity(irq):
+ with open(f'/proc/irq/{irq}/smp_affinity', 'w') as fp:
+ fp.write(what)
+
+
+def check_irqs_reported(cfg) -> None:
+ """ Check that device reports IRQs for NAPI instances """
+ napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+ irqs = sum(['irq' in x for x in napis])
+
+ ksft_ge(irqs, 1)
+ ksft_eq(irqs, len(napis))
+
+
+def _check_reconfig(cfg, reconfig_cb) -> None:
+ napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+ for n in reversed(napis):
+ if 'irq' in n:
+ break
+ else:
+ raise KsftSkipEx(f"Device has no NAPI with IRQ attribute (#napis: {len(napis)}")
+
+ old = read_affinity(n['irq'])
+ # pick an affinity that's not the current one
+ new = "3" if old != "3" else "5"
+ write_affinity(n['irq'], new)
+ defer(write_affinity, n['irq'], old)
+
+ reconfig_cb(cfg)
+
+ ksft_eq(read_affinity(n['irq']), new, comment="IRQ affinity changed after reconfig")
+
+
+def check_reconfig_queues(cfg) -> None:
+ def reconfig(cfg) -> None:
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if channels['combined-count'] == 0:
+ rx_type = 'rx'
+ else:
+ rx_type = 'combined'
+ cur_queue_cnt = channels[f'{rx_type}-count']
+ max_queue_cnt = channels[f'{rx_type}-max']
+
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} 1")
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
+
+ _check_reconfig(cfg, reconfig)
+
+
+def check_reconfig_xdp(cfg) -> None:
+ def reconfig(cfg) -> None:
+ ip(f"link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
+ ip(f"link set dev %s xdp off" % cfg.ifname)
+
+ _check_reconfig(cfg, reconfig)
+
+
+@ksft_disruptive
+def check_down(cfg) -> None:
+ def reconfig(cfg) -> None:
+ ip("link set dev %s down" % cfg.ifname)
+ ip("link set dev %s up" % cfg.ifname)
+
+ _check_reconfig(cfg, reconfig)
+
+
+def main() -> None:
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+
+ ksft_run([check_irqs_reported, check_reconfig_queues,
+ check_reconfig_xdp, check_down],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
new file mode 100644
index 000000000000..766bfc4ad842
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Driver test environment (hardware-only tests).
+NetDrvEnv and NetDrvEpEnv are the main environment classes.
+Former is for local host only tests, latter creates / connects
+to a remote endpoint. See NIPA wiki for more information about
+running and writing driver tests.
+"""
+
+import sys
+from pathlib import Path
+
+KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()
+
+try:
+ sys.path.append(KSFT_DIR.as_posix())
+
+ # Import one by one to avoid pylint false positives
+ from net.lib.py import NetNS, NetNSEnter, NetdevSimDev
+ from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+ NlError, RtnlFamily, DevlinkFamily, PSPFamily
+ from net.lib.py import CmdExitFailure
+ from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
+ fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
+ from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+ from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+ ksft_setup, ksft_variants, KsftNamedVariant
+ from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
+ from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner
+ from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv
+
+ __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
+ "EthtoolFamily", "NetdevFamily", "NetshaperFamily",
+ "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily",
+ "CmdExitFailure",
+ "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
+ "fd_read_timeout", "ip", "rand_port",
+ "wait_port_listen", "wait_file",
+ "KsftSkipEx", "KsftFailEx", "KsftXfailEx",
+ "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
+ "ksft_setup", "ksft_variants", "KsftNamedVariant",
+ "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
+ "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
+ "ksft_not_none", "ksft_not_none",
+ "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+ "Iperf3Runner"]
+except ModuleNotFoundError as e:
+ print("Failed importing `net` library from kernel sources")
+ print(str(e))
+ sys.exit(4)
diff --git a/tools/testing/selftests/drivers/net/hw/loopback.sh b/tools/testing/selftests/drivers/net/hw/loopback.sh
new file mode 100755
index 000000000000..5acc3ff820aa
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/loopback.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+ALL_TESTS="loopback_test"
+NUM_NETIFS=2
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+ tc qdisc add dev $h1 clsact
+}
+
+h1_destroy()
+{
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2
+}
+
+loopback_test()
+{
+ RET=0
+
+ tc filter add dev $h1 ingress protocol arp pref 1 handle 101 flower \
+ skip_hw arp_op reply arp_tip 192.0.2.1 action drop
+
+ $MZ $h1 -c 1 -t arp -q
+
+ tc_check_packets "dev $h1 ingress" 101 1
+ check_fail $? "Matched on a filter without loopback setup"
+
+ ethtool -K $h1 loopback on
+ check_err $? "Failed to enable loopback"
+
+ setup_wait_dev $h1
+
+ $MZ $h1 -c 1 -t arp -q
+
+ tc_check_packets "dev $h1 ingress" 101 1
+ check_err $? "Did not match on filter with loopback"
+
+ ethtool -K $h1 loopback off
+ check_err $? "Failed to disable loopback"
+
+ $MZ $h1 -c 1 -t arp -q
+
+ tc_check_packets "dev $h1 ingress" 101 2
+ check_fail $? "Matched on a filter after loopback was removed"
+
+ tc filter del dev $h1 ingress protocol arp pref 1 handle 101 flower
+
+ log_test "loopback"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ if ethtool -k $h1 | grep loopback | grep -q fixed; then
+ log_test "SKIP: dev $h1 does not support loopback feature"
+ exit $ksft_skip
+ fi
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
new file mode 100644
index 000000000000..3288ed04ce08
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -0,0 +1,1524 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tcpdevmem netcat. Works similarly to netcat but does device memory TCP
+ * instead of regular TCP. Uses udmabuf to mock a dmabuf provider.
+ *
+ * Usage:
+ *
+ * On server:
+ * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201
+ *
+ * On client:
+ * echo -n "hello\nworld" | \
+ * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
+ *
+ * Note this is compatible with regular netcat. i.e. the sender or receiver can
+ * be replaced with regular netcat to test the RX or TX path in isolation.
+ *
+ * Test data validation (devmem TCP on RX only):
+ *
+ * On server:
+ * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7
+ *
+ * On client:
+ * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
+ * head -c 1G | \
+ * nc <server IP> 5201 -p 5201
+ *
+ * Test data validation (devmem TCP on RX and TX, validation happens on RX):
+ *
+ * On server:
+ * ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1
+ *
+ * On client:
+ * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \
+ * head -c 1M | \
+ * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
+ */
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <linux/uio.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#define __iovec_defined
+#include <fcntl.h>
+#include <malloc.h>
+#include <error.h>
+#include <poll.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+
+#include <linux/memfd.h>
+#include <linux/dma-buf.h>
+#include <linux/errqueue.h>
+#include <linux/udmabuf.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+#include <linux/netdev.h>
+#include <linux/ethtool_netlink.h>
+#include <time.h>
+#include <net/if.h>
+
+#include "netdev-user.h"
+#include "ethtool-user.h"
+#include <ynl.h>
+
+#define PAGE_SHIFT 12
+#define TEST_PREFIX "ncdevmem"
+#define NUM_PAGES 16000
+
+#ifndef MSG_SOCK_DEVMEM
+#define MSG_SOCK_DEVMEM 0x2000000
+#endif
+
+#define MAX_IOV 1024
+
+static size_t max_chunk;
+static char *server_ip;
+static char *client_ip;
+static char *port;
+static size_t do_validation;
+static int start_queue = -1;
+static int num_queues = -1;
+static char *ifname;
+static unsigned int ifindex;
+static unsigned int dmabuf_id;
+static uint32_t tx_dmabuf_id;
+static int waittime_ms = 500;
+
+/* System state loaded by current_config_load() */
+#define MAX_FLOWS 8
+static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, };
+
+struct memory_buffer {
+ int fd;
+ size_t size;
+
+ int devfd;
+ int memfd;
+ char *buf_mem;
+};
+
+struct memory_provider {
+ struct memory_buffer *(*alloc)(size_t size);
+ void (*free)(struct memory_buffer *ctx);
+ void (*memcpy_to_device)(struct memory_buffer *dst, size_t off,
+ void *src, int n);
+ void (*memcpy_from_device)(void *dst, struct memory_buffer *src,
+ size_t off, int n);
+};
+
+static void pr_err(const char *fmt, ...)
+{
+ va_list args;
+
+ fprintf(stderr, "%s: ", TEST_PREFIX);
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ if (errno != 0)
+ fprintf(stderr, ": %s", strerror(errno));
+ fprintf(stderr, "\n");
+}
+
+static struct memory_buffer *udmabuf_alloc(size_t size)
+{
+ struct udmabuf_create create;
+ struct memory_buffer *ctx;
+ int ret;
+
+ ctx = malloc(sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ ctx->size = size;
+
+ ctx->devfd = open("/dev/udmabuf", O_RDWR);
+ if (ctx->devfd < 0) {
+ pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]");
+ goto err_free_ctx;
+ }
+
+ ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
+ if (ctx->memfd < 0) {
+ pr_err("[skip,no-memfd]");
+ goto err_close_dev;
+ }
+
+ ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK);
+ if (ret < 0) {
+ pr_err("[skip,fcntl-add-seals]");
+ goto err_close_memfd;
+ }
+
+ ret = ftruncate(ctx->memfd, size);
+ if (ret == -1) {
+ pr_err("[FAIL,memfd-truncate]");
+ goto err_close_memfd;
+ }
+
+ memset(&create, 0, sizeof(create));
+
+ create.memfd = ctx->memfd;
+ create.offset = 0;
+ create.size = size;
+ ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create);
+ if (ctx->fd < 0) {
+ pr_err("[FAIL, create udmabuf]");
+ goto err_close_fd;
+ }
+
+ ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ ctx->fd, 0);
+ if (ctx->buf_mem == MAP_FAILED) {
+ pr_err("[FAIL, map udmabuf]");
+ goto err_close_fd;
+ }
+
+ return ctx;
+
+err_close_fd:
+ close(ctx->fd);
+err_close_memfd:
+ close(ctx->memfd);
+err_close_dev:
+ close(ctx->devfd);
+err_free_ctx:
+ free(ctx);
+ return NULL;
+}
+
+static void udmabuf_free(struct memory_buffer *ctx)
+{
+ munmap(ctx->buf_mem, ctx->size);
+ close(ctx->fd);
+ close(ctx->memfd);
+ close(ctx->devfd);
+ free(ctx);
+}
+
+static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off,
+ void *src, int n)
+{
+ struct dma_buf_sync sync = {};
+
+ sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE;
+ ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+
+ memcpy(dst->buf_mem + off, src, n);
+
+ sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE;
+ ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+}
+
+static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
+ size_t off, int n)
+{
+ struct dma_buf_sync sync = {};
+
+ sync.flags = DMA_BUF_SYNC_START;
+ ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync);
+
+ memcpy(dst, src->buf_mem + off, n);
+
+ sync.flags = DMA_BUF_SYNC_END;
+ ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync);
+}
+
+static struct memory_provider udmabuf_memory_provider = {
+ .alloc = udmabuf_alloc,
+ .free = udmabuf_free,
+ .memcpy_to_device = udmabuf_memcpy_to_device,
+ .memcpy_from_device = udmabuf_memcpy_from_device,
+};
+
+static struct memory_provider *provider = &udmabuf_memory_provider;
+
+static void print_nonzero_bytes(void *ptr, size_t size)
+{
+ unsigned char *p = ptr;
+ unsigned int i;
+
+ for (i = 0; i < size; i++)
+ putchar(p[i]);
+}
+
+int validate_buffer(void *line, size_t size)
+{
+ static unsigned char seed = 1;
+ unsigned char *ptr = line;
+ unsigned char expected;
+ static int errors;
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ expected = seed ? seed : '\n';
+ if (ptr[i] != expected) {
+ fprintf(stderr,
+ "Failed validation: expected=%u, actual=%u, index=%lu\n",
+ expected, ptr[i], i);
+ errors++;
+ if (errors > 20) {
+ pr_err("validation failed");
+ return -1;
+ }
+ }
+ seed++;
+ if (seed == do_validation)
+ seed = 0;
+ }
+
+ fprintf(stdout, "Validated buffer\n");
+ return 0;
+}
+
+static int
+__run_command(char *out, size_t outlen, const char *cmd, va_list args)
+{
+ char command[256];
+ FILE *fp;
+
+ vsnprintf(command, sizeof(command), cmd, args);
+
+ fprintf(stderr, "Running: %s\n", command);
+ fp = popen(command, "r");
+ if (!fp)
+ return -1;
+ if (out) {
+ size_t len;
+
+ if (!fgets(out, outlen, fp))
+ return -1;
+
+ /* Remove trailing newline if present */
+ len = strlen(out);
+ if (len && out[len - 1] == '\n')
+ out[len - 1] = '\0';
+ }
+ return pclose(fp);
+}
+
+static int run_command(const char *cmd, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, cmd);
+ ret = __run_command(NULL, 0, cmd, args);
+ va_end(args);
+
+ return ret;
+}
+
+static int ethtool_add_flow(const char *format, ...)
+{
+ char local_output[256], cmd[256];
+ const char *id_start;
+ int flow_idx, ret;
+ char *endptr;
+ long flow_id;
+ va_list args;
+
+ for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++)
+ if (ntuple_ids[flow_idx] == -1)
+ break;
+ if (flow_idx == MAX_FLOWS) {
+ fprintf(stderr, "Error: too many flows\n");
+ return -1;
+ }
+
+ snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format);
+
+ va_start(args, format);
+ ret = __run_command(local_output, sizeof(local_output), cmd, args);
+ va_end(args);
+
+ if (ret != 0)
+ return ret;
+
+ /* Extract the ID from the output */
+ id_start = strstr(local_output, "Added rule with ID ");
+ if (!id_start)
+ return -1;
+ id_start += strlen("Added rule with ID ");
+
+ flow_id = strtol(id_start, &endptr, 10);
+ if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX)
+ return -1;
+
+ fprintf(stderr, "Added flow rule with ID %ld\n", flow_id);
+ ntuple_ids[flow_idx] = flow_id;
+ return flow_id;
+}
+
+static int rxq_num(int ifindex)
+{
+ struct ethtool_channels_get_req *req;
+ struct ethtool_channels_get_rsp *rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int num = -1;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = ethtool_channels_get_req_alloc();
+ ethtool_channels_get_req_set_header_dev_index(req, ifindex);
+ rsp = ethtool_channels_get(ys, req);
+ if (rsp)
+ num = rsp->rx_count + rsp->combined_count;
+ ethtool_channels_get_req_free(req);
+ ethtool_channels_get_rsp_free(rsp);
+
+ ynl_sock_destroy(ys);
+
+ return num;
+}
+
+static void reset_flow_steering(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_FLOWS; i++) {
+ if (ntuple_ids[i] == -1)
+ continue;
+ run_command("ethtool -N %s delete %d",
+ ifname, ntuple_ids[i]);
+ ntuple_ids[i] = -1;
+ }
+}
+
+static const char *tcp_data_split_str(int val)
+{
+ switch (val) {
+ case 0:
+ return "off";
+ case 1:
+ return "auto";
+ case 2:
+ return "on";
+ default:
+ return "?";
+ }
+}
+
+static struct ethtool_rings_get_rsp *get_ring_config(void)
+{
+ struct ethtool_rings_get_req *get_req;
+ struct ethtool_rings_get_rsp *get_rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return NULL;
+ }
+
+ get_req = ethtool_rings_get_req_alloc();
+ ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+ get_rsp = ethtool_rings_get(ys, get_req);
+ ethtool_rings_get_req_free(get_req);
+
+ ynl_sock_destroy(ys);
+
+ return get_rsp;
+}
+
+static void restore_ring_config(const struct ethtool_rings_get_rsp *config)
+{
+ struct ethtool_rings_get_req *get_req;
+ struct ethtool_rings_get_rsp *get_rsp;
+ struct ethtool_rings_set_req *req;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret;
+
+ if (!config)
+ return;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return;
+ }
+
+ req = ethtool_rings_set_req_alloc();
+ ethtool_rings_set_req_set_header_dev_index(req, ifindex);
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ ETHTOOL_TCP_DATA_SPLIT_UNKNOWN);
+ if (config->_present.hds_thresh)
+ ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh);
+
+ ret = ethtool_rings_set(ys, req);
+ if (ret < 0)
+ fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg);
+
+ get_req = ethtool_rings_get_req_alloc();
+ ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+ get_rsp = ethtool_rings_get(ys, get_req);
+ ethtool_rings_get_req_free(get_req);
+
+ /* use explicit value if UKNOWN didn't give us the previous */
+ if (get_rsp->tcp_data_split != config->tcp_data_split) {
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ config->tcp_data_split);
+ ret = ethtool_rings_set(ys, req);
+ if (ret < 0)
+ fprintf(stderr, "YNL restoring expl HDS cfg: %s\n",
+ ys->err.msg);
+ }
+
+ ethtool_rings_get_rsp_free(get_rsp);
+ ethtool_rings_set_req_free(req);
+
+ ynl_sock_destroy(ys);
+}
+
+static int
+configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on)
+{
+ struct ethtool_rings_get_req *get_req;
+ struct ethtool_rings_get_rsp *get_rsp;
+ struct ethtool_rings_set_req *req;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = ethtool_rings_set_req_alloc();
+ ethtool_rings_set_req_set_header_dev_index(req, ifindex);
+ if (on) {
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ ETHTOOL_TCP_DATA_SPLIT_ENABLED);
+ if (old->_present.hds_thresh)
+ ethtool_rings_set_req_set_hds_thresh(req, 0);
+ } else {
+ ethtool_rings_set_req_set_tcp_data_split(req,
+ ETHTOOL_TCP_DATA_SPLIT_UNKNOWN);
+ }
+ ret = ethtool_rings_set(ys, req);
+ if (ret < 0)
+ fprintf(stderr, "YNL failed: %s\n", ys->err.msg);
+ ethtool_rings_set_req_free(req);
+
+ if (ret == 0) {
+ get_req = ethtool_rings_get_req_alloc();
+ ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
+ get_rsp = ethtool_rings_get(ys, get_req);
+ ethtool_rings_get_req_free(get_req);
+ if (get_rsp)
+ fprintf(stderr, "TCP header split: %s\n",
+ tcp_data_split_str(get_rsp->tcp_data_split));
+ ethtool_rings_get_rsp_free(get_rsp);
+ }
+
+ ynl_sock_destroy(ys);
+
+ return ret;
+}
+
+static int configure_rss(void)
+{
+ return run_command("ethtool -X %s equal %d >&2", ifname, start_queue);
+}
+
+static void reset_rss(void)
+{
+ run_command("ethtool -X %s default >&2", ifname, start_queue);
+}
+
+static int check_changing_channels(unsigned int rx, unsigned int tx)
+{
+ struct ethtool_channels_get_req *gchan;
+ struct ethtool_channels_set_req *schan;
+ struct ethtool_channels_get_rsp *chan;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret;
+
+ fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx);
+
+ ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ gchan = ethtool_channels_get_req_alloc();
+ if (!gchan) {
+ ret = -1;
+ goto exit_close_sock;
+ }
+
+ ethtool_channels_get_req_set_header_dev_index(gchan, ifindex);
+ chan = ethtool_channels_get(ys, gchan);
+ ethtool_channels_get_req_free(gchan);
+ if (!chan) {
+ fprintf(stderr, "YNL get channels: %s\n", ys->err.msg);
+ ret = -1;
+ goto exit_close_sock;
+ }
+
+ schan = ethtool_channels_set_req_alloc();
+ if (!schan) {
+ ret = -1;
+ goto exit_free_chan;
+ }
+
+ ethtool_channels_set_req_set_header_dev_index(schan, ifindex);
+
+ if (chan->_present.combined_count) {
+ if (chan->_present.rx_count || chan->_present.tx_count) {
+ ethtool_channels_set_req_set_rx_count(schan, 0);
+ ethtool_channels_set_req_set_tx_count(schan, 0);
+ }
+
+ if (rx == tx) {
+ ethtool_channels_set_req_set_combined_count(schan, rx);
+ } else if (rx > tx) {
+ ethtool_channels_set_req_set_combined_count(schan, tx);
+ ethtool_channels_set_req_set_rx_count(schan, rx - tx);
+ } else {
+ ethtool_channels_set_req_set_combined_count(schan, rx);
+ ethtool_channels_set_req_set_tx_count(schan, tx - rx);
+ }
+
+ } else if (chan->_present.rx_count) {
+ ethtool_channels_set_req_set_rx_count(schan, rx);
+ ethtool_channels_set_req_set_tx_count(schan, tx);
+ } else {
+ fprintf(stderr, "Error: device has neither combined nor rx channels\n");
+ ret = -1;
+ goto exit_free_schan;
+ }
+
+ ret = ethtool_channels_set(ys, schan);
+ if (ret) {
+ fprintf(stderr, "YNL set channels: %s\n", ys->err.msg);
+ } else {
+ /* We were expecting a failure, go back to previous settings */
+ ethtool_channels_set_req_set_combined_count(schan,
+ chan->combined_count);
+ ethtool_channels_set_req_set_rx_count(schan, chan->rx_count);
+ ethtool_channels_set_req_set_tx_count(schan, chan->tx_count);
+
+ ret = ethtool_channels_set(ys, schan);
+ if (ret)
+ fprintf(stderr, "YNL un-setting channels: %s\n",
+ ys->err.msg);
+ }
+
+exit_free_schan:
+ ethtool_channels_set_req_free(schan);
+exit_free_chan:
+ ethtool_channels_get_rsp_free(chan);
+exit_close_sock:
+ ynl_sock_destroy(ys);
+
+ return ret;
+}
+
+static int configure_flow_steering(struct sockaddr_in6 *server_sin)
+{
+ const char *type = "tcp6";
+ const char *server_addr;
+ char buf[40];
+ int flow_id;
+
+ inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf));
+ server_addr = buf;
+
+ if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) {
+ type = "tcp4";
+ server_addr = strrchr(server_addr, ':') + 1;
+ }
+
+ /* Try configure 5-tuple */
+ flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d",
+ type,
+ client_ip ? "src-ip" : "",
+ client_ip ?: "",
+ server_addr,
+ client_ip ? "src-port" : "",
+ client_ip ? port : "",
+ port, start_queue);
+ if (flow_id < 0) {
+ /* If that fails, try configure 3-tuple */
+ flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d",
+ type, server_addr, port, start_queue);
+ if (flow_id < 0)
+ /* If that fails, return error */
+ return -1;
+ }
+
+ return 0;
+}
+
+static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+ struct netdev_queue_id *queues,
+ unsigned int n_queue_index, struct ynl_sock **ys)
+{
+ struct netdev_bind_rx_req *req = NULL;
+ struct netdev_bind_rx_rsp *rsp = NULL;
+ struct ynl_error yerr;
+
+ *ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!*ys) {
+ netdev_queue_id_free(queues);
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_bind_rx_req_alloc();
+ netdev_bind_rx_req_set_ifindex(req, ifindex);
+ netdev_bind_rx_req_set_fd(req, dmabuf_fd);
+ __netdev_bind_rx_req_set_queues(req, queues, n_queue_index);
+
+ rsp = netdev_bind_rx(*ys, req);
+ if (!rsp) {
+ perror("netdev_bind_rx");
+ goto err_close;
+ }
+
+ if (!rsp->_present.id) {
+ perror("id not present");
+ goto err_close;
+ }
+
+ fprintf(stderr, "got dmabuf id=%d\n", rsp->id);
+ dmabuf_id = rsp->id;
+
+ netdev_bind_rx_req_free(req);
+ netdev_bind_rx_rsp_free(rsp);
+
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+ netdev_bind_rx_req_free(req);
+ ynl_sock_destroy(*ys);
+ return -1;
+}
+
+static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+ struct ynl_sock **ys)
+{
+ struct netdev_bind_tx_req *req = NULL;
+ struct netdev_bind_tx_rsp *rsp = NULL;
+ struct ynl_error yerr;
+
+ *ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!*ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_bind_tx_req_alloc();
+ netdev_bind_tx_req_set_ifindex(req, ifindex);
+ netdev_bind_tx_req_set_fd(req, dmabuf_fd);
+
+ rsp = netdev_bind_tx(*ys, req);
+ if (!rsp) {
+ perror("netdev_bind_tx");
+ goto err_close;
+ }
+
+ if (!rsp->_present.id) {
+ perror("id not present");
+ goto err_close;
+ }
+
+ fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id);
+ tx_dmabuf_id = rsp->id;
+
+ netdev_bind_tx_req_free(req);
+ netdev_bind_tx_rsp_free(rsp);
+
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+ netdev_bind_tx_req_free(req);
+ ynl_sock_destroy(*ys);
+ return -1;
+}
+
+static int enable_reuseaddr(int fd)
+{
+ int opt = 1;
+ int ret;
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+ if (ret) {
+ pr_err("SO_REUSEPORT failed");
+ return -1;
+ }
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+ if (ret) {
+ pr_err("SO_REUSEADDR failed");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+ int ret;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+
+ ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+ if (ret != 1) {
+ /* fallback to plain IPv4 */
+ ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+ if (ret != 1)
+ return -1;
+
+ /* add ::ffff prefix */
+ sin6->sin6_addr.s6_addr32[0] = 0;
+ sin6->sin6_addr.s6_addr32[1] = 0;
+ sin6->sin6_addr.s6_addr16[4] = 0;
+ sin6->sin6_addr.s6_addr16[5] = 0xffff;
+ }
+
+ return 0;
+}
+
+static struct netdev_queue_id *create_queues(void)
+{
+ struct netdev_queue_id *queues;
+ size_t i = 0;
+
+ queues = netdev_queue_id_alloc(num_queues);
+ for (i = 0; i < num_queues; i++) {
+ netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX);
+ netdev_queue_id_set_id(&queues[i], start_queue + i);
+ }
+
+ return queues;
+}
+
+static int do_server(struct memory_buffer *mem)
+{
+ struct ethtool_rings_get_rsp *ring_config;
+ char ctrl_data[sizeof(int) * 20000];
+ size_t non_page_aligned_frags = 0;
+ struct sockaddr_in6 client_addr;
+ struct sockaddr_in6 server_sin;
+ size_t page_aligned_frags = 0;
+ size_t total_received = 0;
+ socklen_t client_addr_len;
+ bool is_devmem = false;
+ char *tmp_mem = NULL;
+ struct ynl_sock *ys;
+ char iobuf[819200];
+ int ret, err = -1;
+ char buffer[256];
+ int socket_fd;
+ int client_fd;
+
+ ret = parse_address(server_ip, atoi(port), &server_sin);
+ if (ret < 0) {
+ pr_err("parse server address");
+ return -1;
+ }
+
+ ring_config = get_ring_config();
+ if (!ring_config) {
+ pr_err("Failed to get current ring configuration");
+ return -1;
+ }
+
+ if (configure_headersplit(ring_config, 1)) {
+ pr_err("Failed to enable TCP header split");
+ goto err_free_ring_config;
+ }
+
+ /* Configure RSS to divert all traffic from our devmem queues */
+ if (configure_rss()) {
+ pr_err("Failed to configure rss");
+ goto err_reset_headersplit;
+ }
+
+ /* Flow steer our devmem flows to start_queue */
+ if (configure_flow_steering(&server_sin)) {
+ pr_err("Failed to configure flow steering");
+ goto err_reset_rss;
+ }
+
+ if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) {
+ pr_err("Failed to bind");
+ goto err_reset_flow_steering;
+ }
+
+ tmp_mem = malloc(mem->size);
+ if (!tmp_mem)
+ goto err_unbind;
+
+ socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (socket_fd < 0) {
+ pr_err("Failed to create socket");
+ goto err_free_tmp;
+ }
+
+ if (enable_reuseaddr(socket_fd))
+ goto err_close_socket;
+
+ fprintf(stderr, "binding to address %s:%d\n", server_ip,
+ ntohs(server_sin.sin6_port));
+
+ ret = bind(socket_fd, &server_sin, sizeof(server_sin));
+ if (ret) {
+ pr_err("Failed to bind");
+ goto err_close_socket;
+ }
+
+ ret = listen(socket_fd, 1);
+ if (ret) {
+ pr_err("Failed to listen");
+ goto err_close_socket;
+ }
+
+ client_addr_len = sizeof(client_addr);
+
+ inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer,
+ sizeof(buffer));
+ fprintf(stderr, "Waiting or connection on %s:%d\n", buffer,
+ ntohs(server_sin.sin6_port));
+ client_fd = accept(socket_fd, &client_addr, &client_addr_len);
+ if (client_fd < 0) {
+ pr_err("Failed to accept");
+ goto err_close_socket;
+ }
+
+ inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer,
+ sizeof(buffer));
+ fprintf(stderr, "Got connection from %s:%d\n", buffer,
+ ntohs(client_addr.sin6_port));
+
+ while (1) {
+ struct iovec iov = { .iov_base = iobuf,
+ .iov_len = sizeof(iobuf) };
+ struct dmabuf_cmsg *dmabuf_cmsg = NULL;
+ struct cmsghdr *cm = NULL;
+ struct msghdr msg = { 0 };
+ struct dmabuf_token token;
+ ssize_t ret;
+
+ is_devmem = false;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = ctrl_data;
+ msg.msg_controllen = sizeof(ctrl_data);
+ ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM);
+ fprintf(stderr, "recvmsg ret=%ld\n", ret);
+ if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
+ continue;
+ if (ret < 0) {
+ perror("recvmsg");
+ if (errno == EFAULT) {
+ pr_err("received EFAULT, won't recover");
+ goto err_close_client;
+ }
+ continue;
+ }
+ if (ret == 0) {
+ errno = 0;
+ pr_err("client exited");
+ goto cleanup;
+ }
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if (cm->cmsg_level != SOL_SOCKET ||
+ (cm->cmsg_type != SCM_DEVMEM_DMABUF &&
+ cm->cmsg_type != SCM_DEVMEM_LINEAR)) {
+ fprintf(stderr, "skipping non-devmem cmsg\n");
+ continue;
+ }
+
+ dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm);
+ is_devmem = true;
+
+ if (cm->cmsg_type == SCM_DEVMEM_LINEAR) {
+ /* TODO: process data copied from skb's linear
+ * buffer.
+ */
+ fprintf(stderr,
+ "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n",
+ dmabuf_cmsg->frag_size);
+
+ continue;
+ }
+
+ token.token_start = dmabuf_cmsg->frag_token;
+ token.token_count = 1;
+
+ total_received += dmabuf_cmsg->frag_size;
+ fprintf(stderr,
+ "received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n",
+ dmabuf_cmsg->frag_offset >> PAGE_SHIFT,
+ dmabuf_cmsg->frag_offset % getpagesize(),
+ dmabuf_cmsg->frag_offset,
+ dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token,
+ total_received, dmabuf_cmsg->dmabuf_id);
+
+ if (dmabuf_cmsg->dmabuf_id != dmabuf_id) {
+ pr_err("received on wrong dmabuf_id: flow steering error");
+ goto err_close_client;
+ }
+
+ if (dmabuf_cmsg->frag_size % getpagesize())
+ non_page_aligned_frags++;
+ else
+ page_aligned_frags++;
+
+ provider->memcpy_from_device(tmp_mem, mem,
+ dmabuf_cmsg->frag_offset,
+ dmabuf_cmsg->frag_size);
+
+ if (do_validation) {
+ if (validate_buffer(tmp_mem,
+ dmabuf_cmsg->frag_size))
+ goto err_close_client;
+ } else {
+ print_nonzero_bytes(tmp_mem,
+ dmabuf_cmsg->frag_size);
+ }
+
+ ret = setsockopt(client_fd, SOL_SOCKET,
+ SO_DEVMEM_DONTNEED, &token,
+ sizeof(token));
+ if (ret != 1) {
+ pr_err("SO_DEVMEM_DONTNEED not enough tokens");
+ goto err_close_client;
+ }
+ }
+ if (!is_devmem) {
+ pr_err("flow steering error");
+ goto err_close_client;
+ }
+
+ fprintf(stderr, "total_received=%lu\n", total_received);
+ }
+
+ fprintf(stderr, "%s: ok\n", TEST_PREFIX);
+
+ fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
+ page_aligned_frags, non_page_aligned_frags);
+
+cleanup:
+ err = 0;
+
+err_close_client:
+ close(client_fd);
+err_close_socket:
+ close(socket_fd);
+err_free_tmp:
+ free(tmp_mem);
+err_unbind:
+ ynl_sock_destroy(ys);
+err_reset_flow_steering:
+ reset_flow_steering();
+err_reset_rss:
+ reset_rss();
+err_reset_headersplit:
+ restore_ring_config(ring_config);
+err_free_ring_config:
+ ethtool_rings_get_rsp_free(ring_config);
+ return err;
+}
+
+int run_devmem_tests(void)
+{
+ struct ethtool_rings_get_rsp *ring_config;
+ struct netdev_queue_id *queues;
+ struct memory_buffer *mem;
+ struct ynl_sock *ys;
+ int err = -1;
+
+ mem = provider->alloc(getpagesize() * NUM_PAGES);
+ if (!mem) {
+ pr_err("Failed to allocate memory buffer");
+ return -1;
+ }
+
+ ring_config = get_ring_config();
+ if (!ring_config) {
+ pr_err("Failed to get current ring configuration");
+ goto err_free_mem;
+ }
+
+ /* Configure RSS to divert all traffic from our devmem queues */
+ if (configure_rss()) {
+ pr_err("rss error");
+ goto err_free_ring_config;
+ }
+
+ if (configure_headersplit(ring_config, 1)) {
+ pr_err("Failed to configure header split");
+ goto err_reset_rss;
+ }
+
+ queues = netdev_queue_id_alloc(num_queues);
+ if (!queues) {
+ pr_err("Failed to allocate empty queues array");
+ goto err_reset_headersplit;
+ }
+
+ if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+ pr_err("Binding empty queues array should have failed");
+ goto err_unbind;
+ }
+
+ if (configure_headersplit(ring_config, 0)) {
+ pr_err("Failed to configure header split");
+ goto err_reset_headersplit;
+ }
+
+ queues = create_queues();
+ if (!queues) {
+ pr_err("Failed to create queues");
+ goto err_reset_headersplit;
+ }
+
+ if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+ pr_err("Configure dmabuf with header split off should have failed");
+ goto err_unbind;
+ }
+
+ if (configure_headersplit(ring_config, 1)) {
+ pr_err("Failed to configure header split");
+ goto err_reset_headersplit;
+ }
+
+ queues = create_queues();
+ if (!queues) {
+ pr_err("Failed to create queues");
+ goto err_reset_headersplit;
+ }
+
+ if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) {
+ pr_err("Failed to bind");
+ goto err_reset_headersplit;
+ }
+
+ /* Deactivating a bound queue should not be legal */
+ if (!check_changing_channels(num_queues, num_queues)) {
+ pr_err("Deactivating a bound queue should be illegal");
+ goto err_unbind;
+ }
+
+ err = 0;
+ goto err_unbind;
+
+err_unbind:
+ ynl_sock_destroy(ys);
+err_reset_headersplit:
+ restore_ring_config(ring_config);
+err_reset_rss:
+ reset_rss();
+err_free_ring_config:
+ ethtool_rings_get_rsp_free(ring_config);
+err_free_mem:
+ provider->free(mem);
+ return err;
+}
+
+static uint64_t gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL);
+}
+
+static int do_poll(int fd)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.revents = 0;
+ pfd.fd = fd;
+
+ ret = poll(&pfd, 1, waittime_ms);
+ if (ret == -1) {
+ pr_err("poll");
+ return -1;
+ }
+
+ return ret && (pfd.revents & POLLERR);
+}
+
+static int wait_compl(int fd)
+{
+ int64_t tstop = gettimeofday_ms() + waittime_ms;
+ char control[CMSG_SPACE(100)] = {};
+ struct sock_extended_err *serr;
+ struct msghdr msg = {};
+ struct cmsghdr *cm;
+ __u32 hi, lo;
+ int ret;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ while (gettimeofday_ms() < tstop) {
+ ret = do_poll(fd);
+ if (ret < 0)
+ return ret;
+ if (!ret)
+ continue;
+
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
+ pr_err("recvmsg(MSG_ERRQUEUE)");
+ return -1;
+ }
+ if (msg.msg_flags & MSG_CTRUNC) {
+ pr_err("MSG_CTRUNC");
+ return -1;
+ }
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if (cm->cmsg_level != SOL_IP &&
+ cm->cmsg_level != SOL_IPV6)
+ continue;
+ if (cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type != IP_RECVERR)
+ continue;
+ if (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type != IPV6_RECVERR)
+ continue;
+
+ serr = (void *)CMSG_DATA(cm);
+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
+ pr_err("wrong origin %u", serr->ee_origin);
+ return -1;
+ }
+ if (serr->ee_errno != 0) {
+ pr_err("wrong errno %d", serr->ee_errno);
+ return -1;
+ }
+
+ hi = serr->ee_data;
+ lo = serr->ee_info;
+
+ fprintf(stderr, "tx complete [%d,%d]\n", lo, hi);
+ return 0;
+ }
+ }
+
+ pr_err("did not receive tx completion");
+ return -1;
+}
+
+static int do_client(struct memory_buffer *mem)
+{
+ char ctrl_data[CMSG_SPACE(sizeof(__u32))];
+ struct sockaddr_in6 server_sin;
+ struct sockaddr_in6 client_sin;
+ struct ynl_sock *ys = NULL;
+ struct iovec iov[MAX_IOV];
+ struct msghdr msg = {};
+ ssize_t line_size = 0;
+ struct cmsghdr *cmsg;
+ char *line = NULL;
+ int ret, err = -1;
+ size_t len = 0;
+ int socket_fd;
+ __u32 ddmabuf;
+ int opt = 1;
+
+ ret = parse_address(server_ip, atoi(port), &server_sin);
+ if (ret < 0) {
+ pr_err("parse server address");
+ return -1;
+ }
+
+ if (client_ip) {
+ ret = parse_address(client_ip, atoi(port), &client_sin);
+ if (ret < 0) {
+ pr_err("parse client address");
+ return ret;
+ }
+ }
+
+ socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (socket_fd < 0) {
+ pr_err("create socket");
+ return -1;
+ }
+
+ if (enable_reuseaddr(socket_fd))
+ goto err_close_socket;
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname,
+ strlen(ifname) + 1);
+ if (ret) {
+ pr_err("bindtodevice");
+ goto err_close_socket;
+ }
+
+ if (bind_tx_queue(ifindex, mem->fd, &ys)) {
+ pr_err("Failed to bind");
+ goto err_close_socket;
+ }
+
+ if (client_ip) {
+ ret = bind(socket_fd, &client_sin, sizeof(client_sin));
+ if (ret) {
+ pr_err("bind");
+ goto err_unbind;
+ }
+ }
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt));
+ if (ret) {
+ pr_err("set sock opt");
+ goto err_unbind;
+ }
+
+ fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip,
+ ntohs(server_sin.sin6_port), ifname);
+
+ ret = connect(socket_fd, &server_sin, sizeof(server_sin));
+ if (ret) {
+ pr_err("connect");
+ goto err_unbind;
+ }
+
+ while (1) {
+ free(line);
+ line = NULL;
+ line_size = getline(&line, &len, stdin);
+
+ if (line_size < 0)
+ break;
+
+ if (max_chunk) {
+ msg.msg_iovlen =
+ (line_size + max_chunk - 1) / max_chunk;
+ if (msg.msg_iovlen > MAX_IOV) {
+ pr_err("can't partition %zd bytes into maximum of %d chunks",
+ line_size, MAX_IOV);
+ goto err_free_line;
+ }
+
+ for (int i = 0; i < msg.msg_iovlen; i++) {
+ iov[i].iov_base = (void *)(i * max_chunk);
+ iov[i].iov_len = max_chunk;
+ }
+
+ iov[msg.msg_iovlen - 1].iov_len =
+ line_size - (msg.msg_iovlen - 1) * max_chunk;
+ } else {
+ iov[0].iov_base = 0;
+ iov[0].iov_len = line_size;
+ msg.msg_iovlen = 1;
+ }
+
+ msg.msg_iov = iov;
+ provider->memcpy_to_device(mem, 0, line, line_size);
+
+ msg.msg_control = ctrl_data;
+ msg.msg_controllen = sizeof(ctrl_data);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_DEVMEM_DMABUF;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+
+ ddmabuf = tx_dmabuf_id;
+
+ *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf;
+
+ ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY);
+ if (ret < 0) {
+ pr_err("Failed sendmsg");
+ goto err_free_line;
+ }
+
+ fprintf(stderr, "sendmsg_ret=%d\n", ret);
+
+ if (ret != line_size) {
+ pr_err("Did not send all bytes %d vs %zd", ret, line_size);
+ goto err_free_line;
+ }
+
+ if (wait_compl(socket_fd))
+ goto err_free_line;
+ }
+
+ fprintf(stderr, "%s: tx ok\n", TEST_PREFIX);
+
+ err = 0;
+
+err_free_line:
+ free(line);
+err_unbind:
+ ynl_sock_destroy(ys);
+err_close_socket:
+ close(socket_fd);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ struct memory_buffer *mem;
+ int is_server = 0, opt;
+ int ret, err = 1;
+
+ while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) {
+ switch (opt) {
+ case 'l':
+ is_server = 1;
+ break;
+ case 's':
+ server_ip = optarg;
+ break;
+ case 'c':
+ client_ip = optarg;
+ break;
+ case 'p':
+ port = optarg;
+ break;
+ case 'v':
+ do_validation = atoll(optarg);
+ break;
+ case 'q':
+ num_queues = atoi(optarg);
+ break;
+ case 't':
+ start_queue = atoi(optarg);
+ break;
+ case 'f':
+ ifname = optarg;
+ break;
+ case 'z':
+ max_chunk = atoi(optarg);
+ break;
+ case '?':
+ fprintf(stderr, "unknown option: %c\n", optopt);
+ break;
+ }
+ }
+
+ if (!ifname) {
+ pr_err("Missing -f argument");
+ return 1;
+ }
+
+ ifindex = if_nametoindex(ifname);
+
+ fprintf(stderr, "using ifindex=%u\n", ifindex);
+
+ if (!server_ip && !client_ip) {
+ if (start_queue < 0 && num_queues < 0) {
+ num_queues = rxq_num(ifindex);
+ if (num_queues < 0) {
+ pr_err("couldn't detect number of queues");
+ return 1;
+ }
+ if (num_queues < 2) {
+ pr_err("number of device queues is too low");
+ return 1;
+ }
+ /* make sure can bind to multiple queues */
+ start_queue = num_queues / 2;
+ num_queues /= 2;
+ }
+
+ if (start_queue < 0 || num_queues < 0) {
+ pr_err("Both -t and -q are required");
+ return 1;
+ }
+
+ return run_devmem_tests();
+ }
+
+ if (start_queue < 0 && num_queues < 0) {
+ num_queues = rxq_num(ifindex);
+ if (num_queues < 2) {
+ pr_err("number of device queues is too low");
+ return 1;
+ }
+
+ num_queues = 1;
+ start_queue = rxq_num(ifindex) - num_queues;
+
+ if (start_queue < 0) {
+ pr_err("couldn't detect number of queues");
+ return 1;
+ }
+
+ fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues);
+ }
+
+ for (; optind < argc; optind++)
+ fprintf(stderr, "extra arguments: %s\n", argv[optind]);
+
+ if (start_queue < 0) {
+ pr_err("Missing -t argument");
+ return 1;
+ }
+
+ if (num_queues < 0) {
+ pr_err("Missing -q argument");
+ return 1;
+ }
+
+ if (!server_ip) {
+ pr_err("Missing -s argument");
+ return 1;
+ }
+
+ if (!port) {
+ pr_err("Missing -p argument");
+ return 1;
+ }
+
+ mem = provider->alloc(getpagesize() * NUM_PAGES);
+ if (!mem) {
+ pr_err("Failed to allocate memory buffer");
+ return 1;
+ }
+
+ ret = is_server ? do_server(mem) : do_client(mem);
+ if (ret)
+ goto err_free_mem;
+
+ err = 0;
+
+err_free_mem:
+ provider->free(mem);
+ return err;
+}
diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
new file mode 100755
index 000000000000..c1e943d53f19
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Tests related to configuration of HW timestamping
+"""
+
+import errno
+from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx
+from lib.py import NetDrvEnv, EthtoolFamily, NlError
+
+
+def __get_hwtimestamp_support(cfg):
+ """ Retrieve supported configuration information """
+
+ try:
+ tsinfo = cfg.ethnl.tsinfo_get({'header': {'dev-name': cfg.ifname}})
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported") from e
+ raise
+
+ ctx = {}
+ tx = tsinfo.get('tx-types', {})
+ rx = tsinfo.get('rx-filters', {})
+
+ bits = tx.get('bits', {})
+ ctx['tx'] = bits.get('bit', [])
+ bits = rx.get('bits', {})
+ ctx['rx'] = bits.get('bit', [])
+ return ctx
+
+
+def __get_hwtimestamp_config(cfg):
+ """ Retrieve current TS configuration information """
+
+ try:
+ tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}})
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported via netlink") from e
+ raise
+ return tscfg
+
+
+def __set_hwtimestamp_config(cfg, ts):
+ """ Setup new TS configuration information """
+
+ ts['header'] = {'dev-name': cfg.ifname}
+ try:
+ res = cfg.ethnl.tsconfig_set(ts)
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported via netlink") from e
+ raise
+ return res
+
+
+def test_hwtstamp_tx(cfg):
+ """
+ Test TX timestamp configuration.
+ The driver should apply provided config and report back proper state.
+ """
+
+ orig_tscfg = __get_hwtimestamp_config(cfg)
+ ts = __get_hwtimestamp_support(cfg)
+ tx = ts['tx']
+ for t in tx:
+ tscfg = orig_tscfg
+ tscfg['tx-types']['bits']['bit'] = [t]
+ res = __set_hwtimestamp_config(cfg, tscfg)
+ if res is None:
+ res = __get_hwtimestamp_config(cfg)
+ ksft_eq(res['tx-types']['bits']['bit'], [t])
+ __set_hwtimestamp_config(cfg, orig_tscfg)
+
+
+def test_hwtstamp_rx(cfg):
+ """
+ Test RX timestamp configuration.
+ The filter configuration is taken from the list of supported filters.
+ The driver should apply the config without error and report back proper state.
+ Some extension of the timestamping scope is allowed for PTP filters.
+ """
+
+ orig_tscfg = __get_hwtimestamp_config(cfg)
+ ts = __get_hwtimestamp_support(cfg)
+ rx = ts['rx']
+ for r in rx:
+ tscfg = orig_tscfg
+ tscfg['rx-filters']['bits']['bit'] = [r]
+ res = __set_hwtimestamp_config(cfg, tscfg)
+ if res is None:
+ res = __get_hwtimestamp_config(cfg)
+ if r['index'] == 0 or r['index'] == 1:
+ ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index'])
+ else:
+ # the driver can fallback to some value which has higher coverage for timestamping
+ ksft_ge(res['rx-filters']['bits']['bit'][0]['index'], r['index'])
+ __set_hwtimestamp_config(cfg, orig_tscfg)
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ ksft_run([test_hwtstamp_tx, test_hwtstamp_rx], args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
new file mode 100755
index 000000000000..2a51b60df8a1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Test driver resilience vs page pool allocation failures.
+"""
+
+import errno
+import time
+import math
+import os
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetdevFamily, NlError
+from lib.py import NetDrvEpEnv
+from lib.py import cmd, tool, GenerateTraffic
+
+
+def _write_fail_config(config):
+ for key, value in config.items():
+ path = "/sys/kernel/debug/fail_function/"
+ with open(path + key, "w", encoding='ascii') as fp:
+ fp.write(str(value) + "\n")
+
+
+def _enable_pp_allocation_fail():
+ if not os.path.exists("/sys/kernel/debug/fail_function"):
+ raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
+
+ if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
+ _write_fail_config({"inject": "page_pool_alloc_netmems"})
+
+ _write_fail_config({
+ "verbose": 0,
+ "interval": 511,
+ "probability": 100,
+ "times": -1,
+ })
+
+
+def _disable_pp_allocation_fail():
+ if not os.path.exists("/sys/kernel/debug/fail_function"):
+ return
+
+ if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
+ _write_fail_config({"inject": ""})
+
+ _write_fail_config({
+ "probability": 0,
+ "times": 0,
+ })
+
+
+def test_pp_alloc(cfg, netdevnl):
+ """
+ Configure page pool allocation fail injection while traffic is running.
+ """
+
+ def get_stats():
+ return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ def check_traffic_flowing():
+ stat1 = get_stats()
+ time.sleep(1)
+ stat2 = get_stats()
+ if stat2['rx-packets'] - stat1['rx-packets'] < 4000:
+ raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])
+
+
+ try:
+ stats = get_stats()
+ except NlError as e:
+ if e.nl_msg.error == -errno.EOPNOTSUPP:
+ stats = {}
+ else:
+ raise
+ if 'rx-alloc-fail' not in stats:
+ raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats")
+
+ set_g = False
+ traffic = None
+ try:
+ traffic = GenerateTraffic(cfg)
+
+ check_traffic_flowing()
+
+ _enable_pp_allocation_fail()
+
+ s1 = get_stats()
+ time.sleep(3)
+ s2 = get_stats()
+
+ seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail']
+ if seen_fails < 1:
+ raise KsftSkipEx("Allocation failures not increasing")
+ pkts = s2['rx-packets'] - s1['rx-packets']
+ # Expecting one failure per 512 buffers, 3.1x safety margin
+ want_fails = math.floor(pkts / 512 / 3.1)
+ if seen_fails < want_fails:
+ raise KsftSkipEx("Allocation increasing too slowly", seen_fails,
+ "packets:", pkts)
+ ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})")
+
+ # Basic failures are fine, try to wobble some settings to catch extra failures
+ check_traffic_flowing()
+ g = tool("ethtool", "-g " + cfg.ifname, json=True)[0]
+ if 'rx' in g and g["rx"] * 2 <= g["rx-max"]:
+ new_g = g['rx'] * 2
+ elif 'rx' in g:
+ new_g = g['rx'] // 2
+ else:
+ new_g = None
+
+ if new_g:
+ set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0
+ if set_g:
+ ksft_pr("ethtool -G change retval: success")
+ else:
+ ksft_pr("ethtool -G change retval: did not succeed", new_g)
+ else:
+ ksft_pr("ethtool -G change retval: did not try")
+
+ time.sleep(0.1)
+ check_traffic_flowing()
+ finally:
+ _disable_pp_allocation_fail()
+ if traffic:
+ traffic.stop()
+ time.sleep(0.1)
+ if set_g:
+ cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}")
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+ netdevnl = NetdevFamily()
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+
+ ksft_run([test_pp_alloc], args=(cfg, netdevnl, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py
new file mode 100755
index 000000000000..19847f3d4a00
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_api.py
@@ -0,0 +1,476 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+API level tests for RSS (mostly Netlink vs IOCTL).
+"""
+
+import errno
+import glob
+import random
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import defer, ethtool, CmdExitFailure
+from lib.py import EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+
+
+def _require_2qs(cfg):
+ qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
+ if qcnt < 2:
+ raise KsftSkipEx(f"Local has only {qcnt} queues")
+ return qcnt
+
+
+def _ethtool_create(cfg, act, opts):
+ output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
+ # Output will be something like: "New RSS context is 1" or
+ # "Added rule with ID 7", we want the integer from the end
+ return int(output.split()[-1])
+
+
+def _ethtool_get_cfg(cfg, fl_type, to_nl=False):
+ descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+
+ if to_nl:
+ converter = {
+ "IP SA": "ip-src",
+ "IP DA": "ip-dst",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3",
+ }
+
+ ret = set()
+ else:
+ converter = {
+ "IP SA": "s",
+ "IP DA": "d",
+ "L3 proto": "t",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "f",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
+ }
+
+ ret = ""
+
+ for line in descr.split("\n")[1:-2]:
+ # if this raises we probably need to add more keys to converter above
+ if to_nl:
+ ret.add(converter[line])
+ else:
+ ret += converter[line]
+ return ret
+
+
+def test_rxfh_nl_set_fail(cfg):
+ """
+ Test error path of Netlink SET.
+ """
+ _require_2qs(cfg)
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ with ksft_raises(NlError):
+ ethnl.rss_set({"header": {"dev-name": "lo"},
+ "indir": None})
+
+ with ksft_raises(NlError):
+ ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [100000]})
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ ksft_is(ntf, None)
+
+
+def test_rxfh_nl_set_indir(cfg):
+ """
+ Test setting indirection table via Netlink.
+ """
+ qcnt = _require_2qs(cfg)
+
+ # Test some SETs with a value
+ reset = defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex}, "indir": None})
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), {1})
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [0, 1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+ # Make sure we can't set the queue count below max queue used
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+ # Test reset back to default
+ reset.exec()
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt)))
+
+
+def test_rxfh_nl_set_indir_ctx(cfg):
+ """
+ Test setting indirection table for a custom context via Netlink.
+ """
+ _require_2qs(cfg)
+
+ # Get setting for ctx 0, we'll make sure they don't get clobbered
+ dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+
+ # Create context
+ ctx_id = _ethtool_create(cfg, "-X", "context new")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id, "indir": [1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id})
+ ksft_eq(set(rss.get("indir", [-1])), {1})
+
+ ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(ctx0, dflt)
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id, "indir": [0, 1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id})
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+ ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(ctx0, dflt)
+
+ # Make sure we can't set the queue count below max queue used
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+
+def test_rxfh_indir_ntf(cfg):
+ """
+ Check that Netlink notifications are generated when RSS indirection
+ table was modified.
+ """
+ _require_2qs(cfg)
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1")
+ reset = defer(ethtool, f"-X {cfg.ifname} default")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+ reset.exec()
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after reset")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_is(ntf["msg"].get("context"), None)
+ ksft_ne(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_indir_ctx_ntf(cfg):
+ """
+ Check that Netlink notifications are generated when RSS indirection
+ table was modified on an additional RSS context.
+ """
+ _require_2qs(cfg)
+
+ ctx_id = _ethtool_create(cfg, "-X", "context new")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(ntf["msg"].get("context"), ctx_id)
+ ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_nl_set_key(cfg):
+ """
+ Test setting hashing key via Netlink.
+ """
+
+ dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex},
+ "hkey": dflt["hkey"], "indir": None})
+
+ # Empty key should error out
+ with ksft_raises(NlError) as cm:
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hkey": None})
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey')
+
+ # Set key to random
+ mod = random.randbytes(len(dflt["hkey"]))
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hkey": mod})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(rss.get("hkey", [-1]), mod)
+
+ # Set key to random and indir tbl to something at once
+ mod = random.randbytes(len(dflt["hkey"]))
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [0, 1], "hkey": mod})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(rss.get("hkey", [-1]), mod)
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+
+def test_rxfh_fields(cfg):
+ """
+ Test reading Rx Flow Hash over Netlink.
+ """
+
+ flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+ ethnl = EthtoolFamily()
+
+ cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ for fl_type in flow_types:
+ one = _ethtool_get_cfg(cfg, fl_type, to_nl=True)
+ ksft_eq(one, cfg_nl["flow-hash"][fl_type],
+ comment="Config for " + fl_type)
+
+
+def test_rxfh_fields_set(cfg):
+ """ Test configuring Rx Flow Hash over Netlink. """
+
+ flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+
+ # Collect current settings
+ cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # symmetric hashing is config-order-sensitive make sure we leave
+ # symmetric mode, or make the flow-hash sym-compatible first
+ changes = [{"flow-hash": cfg_old["flow-hash"],},
+ {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+ if cfg_old.get("input-xfrm"):
+ changes = list(reversed(changes))
+ for old in changes:
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+ # symmetric hashing prevents some of the configs below
+ if cfg_old.get("input-xfrm"):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": {}})
+
+ for fl_type in flow_types:
+ cur = _ethtool_get_cfg(cfg, fl_type)
+ if cur == "sdfn":
+ change_nl = {"ip-src", "ip-dst"}
+ change_ic = "sd"
+ else:
+ change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+ change_ic = "sdfn"
+
+ cfg.ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {fl_type: change_nl}
+ })
+ reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} "
+ f"rx-flow-hash {fl_type} {cur}")
+
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type],
+ comment=f"Config for {fl_type} over Netlink")
+ cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+ ksft_eq(change_ic, cfg_ic,
+ comment=f"Config for {fl_type} over IOCTL")
+
+ reset.exec()
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type],
+ comment=f"Un-config for {fl_type} over Netlink")
+ cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+ ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL")
+
+ # Try to set multiple at once, the defer was already installed at the start
+ change = {"ip-src"}
+ if change == cfg_old["flow-hash"]["tcp4"]:
+ change = {"ip-dst"}
+ cfg.ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {x: change for x in flow_types}
+ })
+
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ for fl_type in flow_types:
+ ksft_eq(change, cfg_nl["flow-hash"][fl_type],
+ comment=f"multi-config for {fl_type} over Netlink")
+
+
+def test_rxfh_fields_set_xfrm(cfg):
+ """ Test changing Rx Flow Hash vs xfrm_input at once. """
+
+ def set_rss(cfg, xfrm, fh):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": xfrm, "flow-hash": fh})
+
+ # Install the reset handler
+ cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # symmetric hashing is config-order-sensitive make sure we leave
+ # symmetric mode, or make the flow-hash sym-compatible first
+ changes = [{"flow-hash": cfg_old["flow-hash"],},
+ {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+ if cfg_old.get("input-xfrm"):
+ changes = list(reversed(changes))
+ for old in changes:
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+ # Make sure we start with input-xfrm off, and tcp4 config non-sym
+ set_rss(cfg, {}, {})
+ set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+ # Setting sym and fixing tcp4 config not expected to pass right now
+ with ksft_raises(NlError):
+ set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}})
+ # One at a time should work, hopefully
+ set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}})
+ no_support = False
+ try:
+ set_rss(cfg, {"sym-xor"}, {})
+ except NlError:
+ try:
+ set_rss(cfg, {"sym-or-xor"}, {})
+ except NlError:
+ no_support = True
+ if no_support:
+ raise KsftSkipEx("no input-xfrm supported")
+ # Disabling two at once should not work either without kernel changes
+ with ksft_raises(NlError):
+ set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+
+def test_rxfh_fields_ntf(cfg):
+ """ Test Rx Flow Hash notifications. """
+
+ cur = _ethtool_get_cfg(cfg, "tcp4")
+ if cur == "sdfn":
+ change = {"ip-src", "ip-dst"}
+ else:
+ change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {"tcp4": change}
+ })
+ reset = defer(ethtool,
+ f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after IOCTL change")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change)
+ ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+ reset.exec()
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after Netlink change")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change)
+ ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+
+def test_rss_ctx_add(cfg):
+ """ Test creating an additional RSS context via Netlink """
+
+ _require_2qs(cfg)
+
+ # Test basic creation
+ ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+ d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ ksft_ne(ctx.get("context", 0), 0)
+ ksft_ne(set(ctx.get("indir", [0])), {0},
+ comment="Driver should init the indirection table")
+
+ # Try requesting the ID we just got allocated
+ with ksft_raises(NlError) as cm:
+ ctx = cfg.ethnl.rss_create_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx.get("context"),
+ })
+ ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ d.exec()
+ ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY)
+
+ # Test creating with a specified RSS table, and context ID
+ ctx_id = ctx.get("context")
+ ctx = cfg.ethnl.rss_create_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx_id,
+ "indir": [1],
+ })
+ ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ ksft_eq(ctx.get("context"), ctx_id)
+ ksft_eq(set(ctx.get("indir", [0])), {1})
+
+
+def test_rss_ctx_ntf(cfg):
+ """ Test notifications for creating additional RSS contexts """
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ # Create / delete via Netlink
+ ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+ cfg.ethnl.rss_delete_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx["context"],
+ })
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[NL] No notification after context creation")
+ ksft_eq(ntf["name"], "rss-create-ntf")
+ ksft_eq(ctx, ntf["msg"])
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[NL] No notification after context deletion")
+ ksft_eq(ntf["name"], "rss-delete-ntf")
+
+ # Create / deleve via IOCTL
+ ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new")
+ ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete")
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[IOCTL] No notification after context creation")
+ ksft_eq(ntf["name"], "rss-create-ntf")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[IOCTL] No notification after context deletion")
+ ksft_eq(ntf["name"], "rss-delete-ntf")
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
new file mode 100755
index 000000000000..ed7e405682f0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -0,0 +1,832 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import datetime
+import random
+import re
+from lib.py import ksft_run, ksft_pr, ksft_exit
+from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises
+from lib.py import NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import rand_port
+from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure
+
+
+def _rss_key_str(key):
+ return ":".join(["{:02x}".format(x) for x in key])
+
+
+def _rss_key_rand(length):
+ return [random.randint(0, 255) for _ in range(length)]
+
+
+def _rss_key_check(cfg, data=None, context=0):
+ if data is None:
+ data = get_rss(cfg, context=context)
+ if 'rss-hash-key' not in data:
+ return
+ non_zero = [x for x in data['rss-hash-key'] if x != 0]
+ ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}")
+
+
+def get_rss(cfg, context=0):
+ return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0]
+
+
+def get_drop_err_sum(cfg):
+ stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0]
+ cnt = 0
+ for key in ['errors', 'dropped', 'over_errors', 'fifo_errors',
+ 'length_errors', 'crc_errors', 'missed_errors',
+ 'frame_errors']:
+ cnt += stats["stats64"]["rx"][key]
+ return cnt, stats["stats64"]["tx"]["carrier_changes"]
+
+
+def ethtool_create(cfg, act, opts):
+ output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
+ # Output will be something like: "New RSS context is 1" or
+ # "Added rule with ID 7", we want the integer from the end
+ return int(output.split()[-1])
+
+
+def require_ntuple(cfg):
+ features = ethtool(f"-k {cfg.ifname}", json=True)[0]
+ if not features["ntuple-filters"]["active"]:
+ # ntuple is more of a capability than a config knob, don't bother
+ # trying to enable it (until some driver actually needs it).
+ raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"]))
+
+
+def require_context_cnt(cfg, need_cnt):
+ # There's no good API to get the context count, so the tests
+ # which try to add a lot opportunisitically set the count they
+ # discovered. Careful with test ordering!
+ if need_cnt and cfg.context_cnt and cfg.context_cnt < need_cnt:
+ raise KsftSkipEx(f"Test requires at least {need_cnt} contexts, but device only has {cfg.context_cnt}")
+
+
+# Get Rx packet counts for all queues, as a simple list of integers
+# if @prev is specified the prev counts will be subtracted
+def _get_rx_cnts(cfg, prev=None):
+ cfg.wait_hw_stats_settle()
+ data = cfg.netdevnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, dump=True)
+ data = [x for x in data if x['queue-type'] == "rx"]
+ max_q = max([x["queue-id"] for x in data])
+ queue_stats = [0] * (max_q + 1)
+ for q in data:
+ queue_stats[q["queue-id"]] = q["rx-packets"]
+ if prev and q["queue-id"] < len(prev):
+ queue_stats[q["queue-id"]] -= prev[q["queue-id"]]
+ return queue_stats
+
+
+def _send_traffic_check(cfg, port, name, params):
+ # params is a dict with 3 possible keys:
+ # - "target": required, which queues we expect to get iperf traffic
+ # - "empty": optional, which queues should see no traffic at all
+ # - "noise": optional, which queues we expect to see low traffic;
+ # used for queues of the main context, since some background
+ # OS activity may use those queues while we're testing
+ # the value for each is a list, or some other iterable containing queue ids.
+
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+
+ directed = sum(cnts[i] for i in params['target'])
+
+ ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts))
+ if params.get('noise'):
+ ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2,
+ f"traffic on other queues ({name})':" + str(cnts))
+ if params.get('empty'):
+ ksft_eq(sum(cnts[i] for i in params['empty']), 0,
+ f"traffic on inactive queues ({name}): " + str(cnts))
+
+
+def _ntuple_rule_check(cfg, rule_id, ctx_id):
+ """Check that ntuple rule references RSS context ID"""
+ text = ethtool(f"-n {cfg.ifname} rule {rule_id}").stdout
+ pattern = f"RSS Context (ID: )?{ctx_id}"
+ ksft_true(re.search(pattern, text), "RSS context not referenced in ntuple rule")
+
+
+def test_rss_key_indir(cfg):
+ """Test basics like updating the main RSS key and indirection table."""
+
+ qcnt = len(_get_rx_cnts(cfg))
+ if qcnt < 3:
+ raise KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)")
+
+ data = get_rss(cfg)
+ want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table']
+ for k in want_keys:
+ if k not in data:
+ raise KsftFailEx("ethtool results missing key: " + k)
+ if not data[k]:
+ raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}")
+
+ _rss_key_check(cfg, data=data)
+ key_len = len(data['rss-hash-key'])
+
+ # Set the key
+ key = _rss_key_rand(key_len)
+ ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key))
+
+ data = get_rss(cfg)
+ ksft_eq(key, data['rss-hash-key'])
+
+ # Set the indirection table and the key together
+ key = _rss_key_rand(key_len)
+ ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key))
+ reset_indir = defer(ethtool, f"-X {cfg.ifname} default")
+
+ data = get_rss(cfg)
+ _rss_key_check(cfg, data=data)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(2, max(data['rss-indirection-table']))
+
+ # Reset indirection table and set the key
+ key = _rss_key_rand(key_len)
+ ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key))
+ data = get_rss(cfg)
+ _rss_key_check(cfg, data=data)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(qcnt - 1, max(data['rss-indirection-table']))
+
+ # Set the indirection table
+ ethtool(f"-X {cfg.ifname} equal 2")
+ data = get_rss(cfg)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(1, max(data['rss-indirection-table']))
+
+ # Check we only get traffic on the first 2 queues
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+ # 2 queues, 20k packets, must be at least 5k per queue
+ ksft_ge(cnts[0], 5000, "traffic on main context (1/2): " + str(cnts))
+ ksft_ge(cnts[1], 5000, "traffic on main context (2/2): " + str(cnts))
+ # The other queues should be unused
+ ksft_eq(sum(cnts[2:]), 0, "traffic on unused queues: " + str(cnts))
+
+ # Restore, and check traffic gets spread again
+ reset_indir.exec()
+
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+ if qcnt > 4:
+ # First two queues get less traffic than all the rest
+ ksft_lt(sum(cnts[:2]), sum(cnts[2:]),
+ "traffic distributed: " + str(cnts))
+ else:
+ # When queue count is low make sure third queue got significant pkts
+ ksft_ge(cnts[2], 3500, "traffic distributed: " + str(cnts))
+
+
+def test_rss_queue_reconfigure(cfg, main_ctx=True):
+ """Make sure queue changes can't override requested RSS config.
+
+ By default main RSS table should change to include all queues.
+ When user sets a specific RSS config the driver should preserve it,
+ even when queue count changes. Driver should refuse to deactivate
+ queues used in the user-set RSS config.
+ """
+
+ if not main_ctx:
+ require_ntuple(cfg)
+
+ # Start with 4 queues, an arbitrary known number.
+ try:
+ qcnt = len(_get_rx_cnts(cfg))
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test or qstat not supported")
+
+ if main_ctx:
+ ctx_id = 0
+ ctx_ref = ""
+ else:
+ ctx_id = ethtool_create(cfg, "-X", "context new")
+ ctx_ref = f"context {ctx_id}"
+ defer(ethtool, f"-X {cfg.ifname} {ctx_ref} delete")
+
+ # Indirection table should be distributing to all queues.
+ data = get_rss(cfg, context=ctx_id)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(3, max(data['rss-indirection-table']))
+
+ # Increase queues, indirection table should be distributing to all queues.
+ # It's unclear whether tables of additional contexts should be reset, too.
+ if main_ctx:
+ ethtool(f"-L {cfg.ifname} combined 5")
+ data = get_rss(cfg)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(4, max(data['rss-indirection-table']))
+ ethtool(f"-L {cfg.ifname} combined 4")
+
+ # Configure the table explicitly
+ port = rand_port()
+ ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 0 1")
+ if main_ctx:
+ other_key = 'empty'
+ defer(ethtool, f"-X {cfg.ifname} default")
+ else:
+ other_key = 'noise'
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}"
+ ntuple = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
+
+ _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
+ other_key: (1, 2) })
+
+ # We should be able to increase queues, but table should be left untouched
+ ethtool(f"-L {cfg.ifname} combined 5")
+ data = get_rss(cfg, context=ctx_id)
+ ksft_eq({0, 3}, set(data['rss-indirection-table']))
+
+ _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
+ other_key: (1, 2, 4) })
+
+ # Setting queue count to 3 should fail, queue 3 is used
+ try:
+ ethtool(f"-L {cfg.ifname} combined 3")
+ except CmdExitFailure:
+ pass
+ else:
+ raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})")
+
+ if not main_ctx:
+ ethtool(f"-L {cfg.ifname} combined 4")
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 1"
+ try:
+ # this targets queue 4, which doesn't exist
+ ntuple2 = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
+ except CmdExitFailure:
+ pass
+ else:
+ raise Exception(f"Driver didn't prevent us from targeting a nonexistent queue (context {ctx_id})")
+ # change the table to target queues 0 and 2
+ ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0")
+ # ntuple rule therefore targets queues 1 and 3
+ try:
+ ntuple2 = ethtool_create(cfg, "-N", flow)
+ except CmdExitFailure:
+ ksft_pr("Driver does not support rss + queue offset")
+ return
+
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
+ # should replace existing filter
+ ksft_eq(ntuple, ntuple2)
+ _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3),
+ 'noise' : (0, 2) })
+ # Setting queue count to 3 should fail, queue 3 is used
+ try:
+ ethtool(f"-L {cfg.ifname} combined 3")
+ except CmdExitFailure:
+ pass
+ else:
+ raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})")
+
+
+def test_rss_resize(cfg):
+ """Test resizing of the RSS table.
+
+ Some devices dynamically increase and decrease the size of the RSS
+ indirection table based on the number of enabled queues.
+ When that happens driver must maintain the balance of entries
+ (preferably duplicating the smaller table).
+ """
+
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ ch_max = channels['combined-max']
+ qcnt = channels['combined-count']
+
+ if ch_max < 2:
+ raise KsftSkipEx(f"Not enough queues for the test: {ch_max}")
+
+ ethtool(f"-L {cfg.ifname} combined 2")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+
+ ethtool(f"-X {cfg.ifname} weight 1 7")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ ethtool(f"-L {cfg.ifname} combined {ch_max}")
+ data = get_rss(cfg)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(1, max(data['rss-indirection-table']))
+
+ ksft_eq(7,
+ data['rss-indirection-table'].count(1) /
+ data['rss-indirection-table'].count(0),
+ f"Table imbalance after resize: {data['rss-indirection-table']}")
+
+
+def test_hitless_key_update(cfg):
+ """Test that flows may be rehashed without impacting traffic.
+
+ Some workloads may want to rehash the flows in response to an imbalance.
+ Most effective way to do that is changing the RSS key. Check that changing
+ the key does not cause link flaps or traffic disruption.
+
+ Disrupting traffic for key update is not a bug, but makes the key
+ update unusable for rehashing under load.
+ """
+ data = get_rss(cfg)
+ key_len = len(data['rss-hash-key'])
+
+ ethnl = EthtoolFamily()
+ key = random.randbytes(key_len)
+
+ tgen = GenerateTraffic(cfg)
+ try:
+ errors0, carrier0 = get_drop_err_sum(cfg)
+ t0 = datetime.datetime.now()
+ ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key})
+ t1 = datetime.datetime.now()
+ errors1, carrier1 = get_drop_err_sum(cfg)
+ finally:
+ tgen.wait_pkts_and_stop(5000)
+
+ ksft_lt((t1 - t0).total_seconds(), 0.15)
+ ksft_eq(errors1 - errors1, 0)
+ ksft_eq(carrier1 - carrier0, 0)
+
+
+def test_rss_context_dump(cfg):
+ """
+ Test dumping RSS contexts. This tests mostly exercises the kernel APIs.
+ """
+
+ # Get a random key of the right size
+ data = get_rss(cfg)
+ if 'rss-hash-key' in data:
+ key_data = _rss_key_rand(len(data['rss-hash-key']))
+ key = _rss_key_str(key_data)
+ else:
+ key_data = []
+ key = "ba:ad"
+
+ ids = []
+ try:
+ ids.append(ethtool_create(cfg, "-X", f"context new"))
+ defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+
+ ids.append(ethtool_create(cfg, "-X", f"context new weight 1 1"))
+ defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+
+ ids.append(ethtool_create(cfg, "-X", f"context new hkey {key}"))
+ defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+ except CmdExitFailure:
+ if not ids:
+ raise KsftSkipEx("Unable to add any contexts")
+ ksft_pr(f"Added only {len(ids)} out of 3 contexts")
+
+ expect_tuples = set([(cfg.ifname, -1)] + [(cfg.ifname, ctx_id) for ctx_id in ids])
+
+ # Dump all
+ ctxs = cfg.ethnl.rss_get({}, dump=True)
+ tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+ ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump")
+ ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname])
+ ksft_eq(expect_tuples, ctx_tuples)
+
+ # Sanity-check the results
+ for data in ctxs:
+ ksft_ne(set(data.get('indir', [1])), {0}, "indir table is all zero")
+ ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero")
+
+ # More specific checks
+ if len(ids) > 1 and data.get('context') == ids[1]:
+ ksft_eq(set(data['indir']), {0, 1},
+ "ctx1 - indir table mismatch")
+ if len(ids) > 2 and data.get('context') == ids[2]:
+ ksft_eq(data['hkey'], bytes(key_data), "ctx2 - key mismatch")
+
+ # Ifindex filter
+ ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True)
+ tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+ ctx_tuples = set(tuples)
+ ksft_eq(len(tuples), len(ctx_tuples), "duplicates in context dump")
+ ksft_eq(expect_tuples, ctx_tuples)
+
+ # Skip ctx 0
+ expect_tuples.remove((cfg.ifname, -1))
+
+ ctxs = cfg.ethnl.rss_get({'start-context': 1}, dump=True)
+ tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+ ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump")
+ ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname])
+ ksft_eq(expect_tuples, ctx_tuples)
+
+ # And finally both with ifindex and skip main
+ ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}, 'start-context': 1}, dump=True)
+ ctx_tuples = set([(c['header']['dev-name'], c.get('context', -1)) for c in ctxs])
+ ksft_eq(expect_tuples, ctx_tuples)
+
+
+def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
+ """
+ Test separating traffic into RSS contexts.
+ The queues will be allocated 2 for each context:
+ ctx0 ctx1 ctx2 ctx3
+ [0 1] [2 3] [4 5] [6 7] ...
+ """
+
+ require_ntuple(cfg)
+
+ requested_ctx_cnt = ctx_cnt
+
+ # Try to allocate more queues when necessary
+ qcnt = len(_get_rx_cnts(cfg))
+ if qcnt < 2 + 2 * ctx_cnt:
+ try:
+ ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
+ ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test")
+
+ ports = []
+
+ # Use queues 0 and 1 for normal traffic
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ for i in range(ctx_cnt):
+ want_cfg = f"start {2 + i * 2} equal 2"
+ create_cfg = want_cfg if create_with_cfg else ""
+
+ try:
+ ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+ except CmdExitFailure:
+ # try to carry on and skip at the end
+ if i == 0:
+ raise
+ ksft_pr(f"Failed to create context {i + 1}, trying to test what we got")
+ ctx_cnt = i
+ if cfg.context_cnt is None:
+ cfg.context_cnt = ctx_cnt
+ break
+
+ _rss_key_check(cfg, context=ctx_id)
+
+ if not create_with_cfg:
+ ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}")
+ _rss_key_check(cfg, context=ctx_id)
+
+ # Sanity check the context we just created
+ data = get_rss(cfg, ctx_id)
+ ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data))
+ ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data))
+
+ ports.append(rand_port())
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}"
+ ntuple = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
+
+ _ntuple_rule_check(cfg, ntuple, ctx_id)
+
+ for i in range(ctx_cnt):
+ _send_traffic_check(cfg, ports[i], f"context {i}",
+ { 'target': (2+i*2, 3+i*2),
+ 'noise': (0, 1),
+ 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) })
+
+ if requested_ctx_cnt != ctx_cnt:
+ raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
+
+
+def test_rss_context4(cfg):
+ test_rss_context(cfg, 4)
+
+
+def test_rss_context32(cfg):
+ test_rss_context(cfg, 32)
+
+
+def test_rss_context4_create_with_cfg(cfg):
+ test_rss_context(cfg, 4, create_with_cfg=True)
+
+
+def test_rss_context_queue_reconfigure(cfg):
+ test_rss_queue_reconfigure(cfg, main_ctx=False)
+
+
+def test_rss_context_out_of_order(cfg, ctx_cnt=4):
+ """
+ Test separating traffic into RSS contexts.
+ Contexts are removed in semi-random order, and steering re-tested
+ to make sure removal doesn't break steering to surviving contexts.
+ Test requires 3 contexts to work.
+ """
+
+ require_ntuple(cfg)
+ require_context_cnt(cfg, 4)
+
+ # Try to allocate more queues when necessary
+ qcnt = len(_get_rx_cnts(cfg))
+ if qcnt < 2 + 2 * ctx_cnt:
+ try:
+ ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
+ ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test")
+
+ ntuple = []
+ ctx = []
+ ports = []
+
+ def remove_ctx(idx):
+ ntuple[idx].exec()
+ ntuple[idx] = None
+ ctx[idx].exec()
+ ctx[idx] = None
+
+ def check_traffic():
+ for i in range(ctx_cnt):
+ if ctx[i]:
+ expected = {
+ 'target': (2+i*2, 3+i*2),
+ 'noise': (0, 1),
+ 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt))
+ }
+ else:
+ expected = {
+ 'target': (0, 1),
+ 'empty': range(2, 2+2*ctx_cnt)
+ }
+
+ _send_traffic_check(cfg, ports[i], f"context {i}", expected)
+
+ # Use queues 0 and 1 for normal traffic
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ for i in range(ctx_cnt):
+ ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2")
+ ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete"))
+
+ ports.append(rand_port())
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}"))
+
+ check_traffic()
+
+ # Remove middle context
+ remove_ctx(ctx_cnt // 2)
+ check_traffic()
+
+ # Remove first context
+ remove_ctx(0)
+ check_traffic()
+
+ # Remove last context
+ remove_ctx(-1)
+ check_traffic()
+
+
+def test_rss_context_overlap(cfg, other_ctx=0):
+ """
+ Test contexts overlapping with each other.
+ Use 4 queues for the main context, but only queues 2 and 3 for context 1.
+ """
+
+ require_ntuple(cfg)
+ if other_ctx:
+ require_context_cnt(cfg, 2)
+
+ queue_cnt = len(_get_rx_cnts(cfg))
+ if queue_cnt < 4:
+ try:
+ ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test")
+
+ if other_ctx == 0:
+ ethtool(f"-X {cfg.ifname} equal 4")
+ defer(ethtool, f"-X {cfg.ifname} default")
+ else:
+ other_ctx = ethtool_create(cfg, "-X", "context new")
+ ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4")
+ defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete")
+
+ ctx_id = ethtool_create(cfg, "-X", "context new")
+ ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ port = rand_port()
+ if other_ctx:
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {other_ctx}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+ # Test the main context
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+
+ ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts))
+ ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts))
+ ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts))
+ if other_ctx == 0:
+ ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts))
+
+ # Now create a rule for context 1 and make sure traffic goes to a subset
+ if other_ctx:
+ ntuple.exec()
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+
+ directed = sum(cnts[2:4])
+ ksft_lt(sum(cnts[ :2]), directed / 2, "traffic on main context: " + str(cnts))
+ ksft_ge(directed, 20000, "traffic on extra context: " + str(cnts))
+ if other_ctx == 0:
+ ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts))
+
+
+def test_rss_context_overlap2(cfg):
+ test_rss_context_overlap(cfg, True)
+
+
+def test_flow_add_context_missing(cfg):
+ """
+ Test that we are not allowed to add a rule pointing to an RSS context
+ which was never created.
+ """
+
+ require_ntuple(cfg)
+
+ # Find a context which doesn't exist
+ for ctx_id in range(1, 100):
+ try:
+ get_rss(cfg, context=ctx_id)
+ except CmdExitFailure:
+ break
+
+ with ksft_raises(CmdExitFailure) as cm:
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port 1234 context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ethtool(f"-N {cfg.ifname} delete {ntuple_id}")
+ if cm.exception:
+ ksft_in('Invalid argument', cm.exception.cmd.stderr)
+
+
+def test_delete_rss_context_busy(cfg):
+ """
+ Test that deletion returns -EBUSY when an rss context is being used
+ by an ntuple filter.
+ """
+
+ require_ntuple(cfg)
+
+ # create additional rss context
+ ctx_id = ethtool_create(cfg, "-X", "context new")
+ ctx_deleter = defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ # utilize context from ntuple filter
+ port = rand_port()
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+ # attempt to delete in-use context
+ try:
+ ctx_deleter.exec_only()
+ ctx_deleter.cancel()
+ raise KsftFailEx(f"deleted context {ctx_id} used by rule {ntuple_id}")
+ except CmdExitFailure:
+ pass
+
+
+def test_rss_ntuple_addition(cfg):
+ """
+ Test that the queue offset (ring_cookie) of an ntuple rule is added
+ to the queue number read from the indirection table.
+ """
+
+ require_ntuple(cfg)
+
+ queue_cnt = len(_get_rx_cnts(cfg))
+ if queue_cnt < 4:
+ try:
+ ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test")
+
+ # Use queue 0 for normal traffic
+ ethtool(f"-X {cfg.ifname} equal 1")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ # create additional rss context
+ ctx_id = ethtool_create(cfg, "-X", "context new equal 2")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ # utilize context from ntuple filter
+ port = rand_port()
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 2"
+ try:
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ except CmdExitFailure:
+ raise KsftSkipEx("Ntuple filter with RSS and nonzero action not supported")
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+ _send_traffic_check(cfg, port, f"context {ctx_id}", { 'target': (2, 3),
+ 'empty' : (1,),
+ 'noise' : (0,) })
+
+
+def test_rss_default_context_rule(cfg):
+ """
+ Allocate a port, direct this port to context 0, then create a new RSS
+ context and steer all TCP traffic to it (context 1). Verify that:
+ * Traffic to the specific port continues to use queues of the main
+ context (0/1).
+ * Traffic to any other TCP port is redirected to the new context
+ (queues 2/3).
+ """
+
+ require_ntuple(cfg)
+
+ queue_cnt = len(_get_rx_cnts(cfg))
+ if queue_cnt < 4:
+ try:
+ ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+ except Exception as exc:
+ raise KsftSkipEx("Not enough queues for the test") from exc
+
+ # Use queues 0 and 1 for the main context
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ # Create a new RSS context that uses queues 2 and 3
+ ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ # Generic low-priority rule: redirect all TCP traffic to the new context.
+ # Give it an explicit higher location number (lower priority).
+ flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1"
+ ethtool(f"-N {cfg.ifname} {flow_generic}")
+ defer(ethtool, f"-N {cfg.ifname} delete 1")
+
+ # Specific high-priority rule for a random port that should stay on context 0.
+ # Assign loc 0 so it is evaluated before the generic rule.
+ port_main = rand_port()
+ flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0"
+ ethtool(f"-N {cfg.ifname} {flow_main}")
+ defer(ethtool, f"-N {cfg.ifname} delete 0")
+
+ _ntuple_rule_check(cfg, 1, ctx_id)
+
+ # Verify that traffic matching the specific rule still goes to queues 0/1
+ _send_traffic_check(cfg, port_main, "context 0",
+ { 'target': (0, 1),
+ 'empty' : (2, 3) })
+
+ # And that traffic for any other port is steered to the new context
+ port_other = rand_port()
+ _send_traffic_check(cfg, port_other, f"context {ctx_id}",
+ { 'target': (2, 3),
+ 'noise' : (0, 1) })
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.context_cnt = None
+ cfg.ethnl = EthtoolFamily()
+ cfg.netdevnl = NetdevFamily()
+
+ ksft_run([test_rss_key_indir, test_rss_queue_reconfigure,
+ test_rss_resize, test_hitless_key_update,
+ test_rss_context, test_rss_context4, test_rss_context32,
+ test_rss_context_dump, test_rss_context_queue_reconfigure,
+ test_rss_context_overlap, test_rss_context_overlap2,
+ test_rss_context_out_of_order, test_rss_context4_create_with_cfg,
+ test_flow_add_context_missing,
+ test_delete_rss_context_busy, test_rss_ntuple_addition,
+ test_rss_default_context_rule],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
new file mode 100755
index 000000000000..6fa95fe27c47
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Tests for RSS hashing on IPv6 Flow Label.
+"""
+
+import glob
+import os
+import socket
+from lib.py import CmdExitFailure
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \
+ ksft_not_in, ksft_raises, KsftSkipEx
+from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port
+from lib.py import NetDrvEpEnv
+
+
+def _check_system(cfg):
+ if not hasattr(socket, "SO_INCOMING_CPU"):
+ raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
+ qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
+ if qcnt < 2:
+ raise KsftSkipEx(f"Local has only {qcnt} queues")
+
+ for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt",
+ f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]:
+ try:
+ with open(f, 'r') as fp:
+ setting = fp.read().strip()
+ # CPU mask will be zeros and commas
+ if setting.replace("0", "").replace(",", ""):
+ raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}")
+ except FileNotFoundError:
+ pass
+
+ # 1 is the default, if someone changed it we probably shouldn"t mess with it
+ af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout
+ if af.strip() != "1":
+ raise KsftSkipEx("Remote does not have auto_flowlabels enabled")
+
+
+def _ethtool_get_cfg(cfg, fl_type):
+ descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+
+ converter = {
+ "IP SA": "s",
+ "IP DA": "d",
+ "L3 proto": "t",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "f",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
+ "IPv6 Flow Label": "l",
+ }
+
+ ret = ""
+ for line in descr.split("\n")[1:-2]:
+ # if this raises we probably need to add more keys to converter above
+ ret += converter[line]
+ return ret
+
+
+def _traffic(cfg, one_sock, one_cpu):
+ local_port = rand_port(socket.SOCK_DGRAM)
+ remote_port = rand_port(socket.SOCK_DGRAM)
+
+ sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
+ sock.bind(("", local_port))
+ sock.connect((cfg.remote_addr_v["6"], 0))
+ if one_sock:
+ send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \
+ "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-"
+ else:
+ send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \
+ f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done"
+
+ cpus = set()
+ with bkg(send, shell=True, host=cfg.remote, exit_wait=True):
+ for _ in range(20):
+ fd_read_timeout(sock.fileno(), 1)
+ cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
+ cpus.add(cpu)
+
+ if one_cpu:
+ ksft_eq(len(cpus), 1,
+ f"{one_sock=} - expected one CPU, got traffic on: {cpus=}")
+ else:
+ ksft_ge(len(cpus), 2,
+ f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}")
+
+
+def test_rss_flow_label(cfg):
+ """
+ Test hashing on IPv6 flow label. Send traffic over a single socket
+ and over multiple sockets. Depend on the remote having auto-label
+ enabled so that it randomizes the label per socket.
+ """
+
+ cfg.require_ipver("6")
+ cfg.require_cmd("socat", remote=True)
+ _check_system(cfg)
+
+ # Enable flow label hashing for UDP6
+ initial = _ethtool_get_cfg(cfg, "udp6")
+ no_lbl = initial.replace("l", "")
+ if "l" not in initial:
+ try:
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}")
+ except CmdExitFailure as exc:
+ raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc
+
+ defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
+
+ _traffic(cfg, one_sock=True, one_cpu=True)
+ _traffic(cfg, one_sock=False, one_cpu=False)
+
+ # Disable it, we should see no hashing (reset was already defer()ed)
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}")
+
+ _traffic(cfg, one_sock=False, one_cpu=True)
+
+
+def _check_v4_flow_types(cfg):
+ for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]:
+ try:
+ cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+ ksft_not_in("Flow Label", cur,
+ comment=f"{fl_type=} has Flow Label:" + cur)
+ except CmdExitFailure:
+ # Probably does not support this flow type
+ pass
+
+
+def test_rss_flow_label_6only(cfg):
+ """
+ Test interactions with IPv4 flow types. It should not be possible to set
+ IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also
+ not appear in the IPv4 "current config".
+ """
+
+ with ksft_raises(CmdExitFailure) as cm:
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl")
+ ksft_in("Invalid argument", cm.exception.cmd.stderr)
+
+ _check_v4_flow_types(cfg)
+
+ # Try to enable Flow Labels and check again, in case it leaks thru
+ initial = _ethtool_get_cfg(cfg, "udp6")
+ changed = initial.replace("l", "") if "l" in initial else initial + "l"
+
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}")
+ restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
+
+ _check_v4_flow_types(cfg)
+ restore.exec()
+ _check_v4_flow_types(cfg)
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ ksft_run([test_rss_flow_label,
+ test_rss_flow_label_6only],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
new file mode 100755
index 000000000000..72880e388478
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import multiprocessing
+import socket
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout
+from lib.py import NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import rand_port
+
+
+def traffic(cfg, local_port, remote_port, ipver):
+ af_inet = socket.AF_INET if ipver == "4" else socket.AF_INET6
+ sock = socket.socket(af_inet, socket.SOCK_DGRAM)
+ sock.bind(("", local_port))
+ sock.connect((cfg.remote_addr_v[ipver], remote_port))
+ tgt = f"{ipver}:[{cfg.addr_v[ipver]}]:{local_port},sourceport={remote_port}"
+ cmd("echo a | socat - UDP" + tgt, host=cfg.remote)
+ fd_read_timeout(sock.fileno(), 5)
+ return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
+
+
+def test_rss_input_xfrm(cfg, ipver):
+ """
+ Test symmetric input_xfrm.
+ If symmetric RSS hash is configured, send traffic twice, swapping the
+ src/dst UDP ports, and verify that the same queue is receiving the traffic
+ in both cases (IPs are constant).
+ """
+
+ if multiprocessing.cpu_count() < 2:
+ raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
+
+ cfg.require_cmd("socat", local=False, remote=True)
+
+ if not hasattr(socket, "SO_INCOMING_CPU"):
+ raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
+ rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}})
+ input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {})))
+
+ # Check for symmetric xor/or-xor
+ if not input_xfrm:
+ raise KsftSkipEx("Symmetric RSS hash not requested")
+
+ cpus = set()
+ successful = 0
+ for _ in range(100):
+ try:
+ port1 = rand_port(socket.SOCK_DGRAM)
+ port2 = rand_port(socket.SOCK_DGRAM)
+ cpu1 = traffic(cfg, port1, port2, ipver)
+ cpu2 = traffic(cfg, port2, port1, ipver)
+ cpus.update([cpu1, cpu2])
+ ksft_eq(
+ cpu1, cpu2, comment=f"Received traffic on different cpus with ports ({port1 = }, {port2 = }) while symmetric hash is configured")
+
+ successful += 1
+ if successful == 10:
+ break
+ except:
+ continue
+ else:
+ raise KsftFailEx("Failed to run traffic")
+
+ ksft_ge(len(cpus), 2,
+ comment=f"Received traffic on less than two cpus {cpus = }")
+
+
+def test_rss_input_xfrm_ipv4(cfg):
+ cfg.require_ipver("4")
+ test_rss_input_xfrm(cfg, "4")
+
+
+def test_rss_input_xfrm_ipv6(cfg):
+ cfg.require_ipver("6")
+ test_rss_input_xfrm(cfg, "6")
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netdevnl = NetdevFamily()
+
+ ksft_run([test_rss_input_xfrm_ipv4, test_rss_input_xfrm_ipv6],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/settings b/tools/testing/selftests/drivers/net/hw/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c
new file mode 100644
index 000000000000..d23b3b0c20a3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <ynl.h>
+#include "ethtool-user.h"
+
+#include "kselftest.h"
+#include "../../../net/lib/ksft.h"
+
+#define TOEPLITZ_KEY_MIN_LEN 40
+#define TOEPLITZ_KEY_MAX_LEN 60
+
+#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+#define RSS_MAX_INDIR (1 << 16)
+
+#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport = 8000;
+static int cfg_family = AF_INET6;
+static char *cfg_ifname = "eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type = SOCK_STREAM;
+static int cfg_timeout_msec = 1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+ int fd;
+ char *mmap;
+ int idx;
+ int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static unsigned int rss_indir_tbl[RSS_MAX_INDIR];
+static unsigned int rss_indir_tbl_size;
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+ const unsigned char *key)
+{
+ int i, bit, ret = 0;
+ uint32_t key32;
+
+ key32 = ntohl(*((uint32_t *)key));
+ key += 4;
+
+ for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+ for (bit = 7; bit >= 0; bit--) {
+ if (four_tuple[i] & (1 << bit))
+ ret ^= key32;
+
+ key32 <<= 1;
+ key32 |= !!(key[0] & (1 << bit));
+ }
+ key++;
+ }
+
+ return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+ int queue;
+
+ if (rss_indir_tbl_size)
+ queue = rss_indir_tbl[rx_hash % rss_indir_tbl_size];
+ else
+ queue = rx_hash % cfg_num_queues;
+
+ log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+ if (rx_irq_cpus[queue] != cpu) {
+ log_verbose(". error: rss cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+ int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+ log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+ if (rps_silo_to_cpu[silo] != cpu) {
+ log_verbose(". error: rps cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+ const char *addrs, int addr_len)
+{
+ char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+ uint16_t *ports;
+
+ if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+ !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+ error(1, 0, "address parse error");
+
+ ports = (void *)addrs + (addr_len * 2);
+ log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+ cpu, rx_hash, saddr, daddr,
+ ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+ unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+ uint32_t rx_hash_sw;
+ const char *addrs;
+ int addr_len;
+
+ if (cfg_family == AF_INET) {
+ addr_len = sizeof(struct in_addr);
+ addrs = pkt + offsetof(struct iphdr, saddr);
+ } else {
+ addr_len = sizeof(struct in6_addr);
+ addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+ }
+
+ memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+ rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+ if (cfg_verbose)
+ log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+ if (rx_hash != rx_hash_sw) {
+ log_verbose(" != expected 0x%x\n", rx_hash_sw);
+ frames_error++;
+ return;
+ }
+
+ log_verbose(" OK");
+ if (cfg_num_queues)
+ verify_rss(rx_hash, cpu);
+ else if (cfg_num_rps_cpus)
+ verify_rps(rx_hash, cpu);
+ log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+ struct tpacket3_hdr *hdr = (void *)frame;
+
+ if (hdr->hv1.tp_rxhash)
+ verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+ ring->cpu);
+ else
+ frames_nohash++;
+
+ return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static bool recv_block(struct ring_state *ring)
+{
+ struct tpacket_block_desc *block;
+ char *frame;
+ int i;
+
+ block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+ if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+ return false;
+
+ frame = (char *)block;
+ frame += block->hdr.bh1.offset_to_first_pkt;
+
+ for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+ frame = recv_frame(ring, frame);
+ frames_received++;
+ }
+
+ block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+ ring->idx = (ring->idx + 1) % ring_block_nr;
+
+ return true;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+ int i;
+
+ usleep(1000 * cfg_timeout_msec);
+
+ for (i = 0; i < num_cpus; i++)
+ do {} while (recv_block(&rings[i]));
+
+ fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+ frames_received - frames_nohash - frames_error,
+ frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+ struct tpacket_req3 req3 = {0};
+ void *ring;
+
+ req3.tp_retire_blk_tov = cfg_timeout_msec / 8;
+ req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+ req3.tp_frame_size = 2048;
+ req3.tp_frame_nr = 1 << 10;
+ req3.tp_block_nr = 16;
+
+ req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+ req3.tp_block_size /= req3.tp_block_nr;
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+ error(1, errno, "setsockopt PACKET_RX_RING");
+
+ ring_block_sz = req3.tp_block_size;
+ ring_block_nr = req3.tp_block_nr;
+
+ ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+ if (ring == MAP_FAILED)
+ error(1, 0, "mmap failed");
+
+ return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = ARRAY_SIZE(filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+ const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
+ uint8_t proto;
+
+ proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+ if (cfg_family == AF_INET)
+ __set_filter(fd, offsetof(struct iphdr, protocol), proto,
+ sizeof(struct iphdr) + off_dport);
+ else
+ __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+ sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = ARRAY_SIZE(filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+ struct fanout_args args = {
+ .id = 1,
+ .type_flags = PACKET_FANOUT_CPU,
+ .max_num_members = RSS_MAX_CPUS
+ };
+ struct sockaddr_ll ll = { 0 };
+ int fd, val;
+
+ fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket creation failed");
+
+ val = TPACKET_V3;
+ if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+ error(1, errno, "setsockopt PACKET_VERSION");
+ *ring = setup_ring(fd);
+
+ /* block packets until all rings are added to the fanout group:
+ * else packets can arrive during setup and get misclassified
+ */
+ set_filter_null(fd);
+
+ ll.sll_family = AF_PACKET;
+ ll.sll_ifindex = if_nametoindex(cfg_ifname);
+ ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+ htons(ETH_P_IPV6);
+ if (bind(fd, (void *)&ll, sizeof(ll)))
+ error(1, errno, "bind");
+
+ /* must come after bind: verifies all programs in group match */
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+ /* on failure, retry using old API if that is sufficient:
+ * it has a hard limit of 256 sockets, so only try if
+ * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+ * in this API, the third argument is left implicit.
+ */
+ if (cfg_num_queues || num_cpus > 256 ||
+ setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+ &args, sizeof(uint32_t)))
+ error(1, errno, "setsockopt PACKET_FANOUT cpu");
+ }
+
+ return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+ int fd, val;
+
+ fd = socket(cfg_family, cfg_type, 0);
+ if (fd == -1)
+ error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+ val = 1 << 20;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+
+ return fd;
+}
+
+static void setup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ rings[i].cpu = i;
+ rings[i].fd = create_ring(&rings[i].mmap);
+ }
+
+ /* accept packets once all rings in the fanout group are up */
+ for (i = 0; i < num_cpus; i++)
+ set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+ error(1, errno, "munmap");
+ if (close(rings[i].fd))
+ error(1, errno, "close");
+ }
+}
+
+static void parse_cpulist(const char *arg)
+{
+ do {
+ rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+ arg = strchr(arg, ',');
+ if (!arg)
+ break;
+ arg++; // skip ','
+ } while (1);
+}
+
+static void show_cpulist(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_queues; i++)
+ fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_rps_cpus; i++)
+ fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+ int i, ret, off;
+
+ if (slen < TOEPLITZ_STR_MIN_LEN ||
+ slen > TOEPLITZ_STR_MAX_LEN + 1)
+ error(1, 0, "invalid toeplitz key");
+
+ for (i = 0, off = 0; off < slen; i++, off += 3) {
+ ret = sscanf(str + off, "%hhx", &key[i]);
+ if (ret != 1)
+ error(1, 0, "key parse error at %d off %d len %d",
+ i, off, slen);
+ }
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+ unsigned long bitmap;
+ int i;
+
+ bitmap = strtoul(arg, NULL, 0);
+
+ if (bitmap & ~(RPS_MAX_CPUS - 1))
+ error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+ bitmap, RPS_MAX_CPUS - 1);
+
+ for (i = 0; i < RPS_MAX_CPUS; i++)
+ if (bitmap & 1UL << i)
+ rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void read_rss_dev_info_ynl(void)
+{
+ struct ethtool_rss_get_req *req;
+ struct ethtool_rss_get_rsp *rsp;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, NULL);
+ if (!ys)
+ error(1, errno, "ynl_sock_create failed");
+
+ req = ethtool_rss_get_req_alloc();
+ if (!req)
+ error(1, errno, "ethtool_rss_get_req_alloc failed");
+
+ ethtool_rss_get_req_set_header_dev_name(req, cfg_ifname);
+
+ rsp = ethtool_rss_get(ys, req);
+ if (!rsp)
+ error(1, ys->err.code, "YNL: %s", ys->err.msg);
+
+ if (!rsp->_len.hkey)
+ error(1, 0, "RSS key not available for %s", cfg_ifname);
+
+ if (rsp->_len.hkey < TOEPLITZ_KEY_MIN_LEN ||
+ rsp->_len.hkey > TOEPLITZ_KEY_MAX_LEN)
+ error(1, 0, "RSS key length %u out of bounds [%u, %u]",
+ rsp->_len.hkey, TOEPLITZ_KEY_MIN_LEN,
+ TOEPLITZ_KEY_MAX_LEN);
+
+ memcpy(toeplitz_key, rsp->hkey, rsp->_len.hkey);
+
+ if (rsp->_count.indir > RSS_MAX_INDIR)
+ error(1, 0, "RSS indirection table too large (%u > %u)",
+ rsp->_count.indir, RSS_MAX_INDIR);
+
+ /* If indir table not available we'll fallback to simple modulo math */
+ if (rsp->_count.indir) {
+ memcpy(rss_indir_tbl, rsp->indir,
+ rsp->_count.indir * sizeof(rss_indir_tbl[0]));
+ rss_indir_tbl_size = rsp->_count.indir;
+
+ log_verbose("RSS indirection table size: %u\n",
+ rss_indir_tbl_size);
+ }
+
+ ethtool_rss_get_rsp_free(rsp);
+ ethtool_rss_get_req_free(req);
+ ynl_sock_destroy(ys);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ static struct option long_options[] = {
+ {"dport", required_argument, 0, 'd'},
+ {"cpus", required_argument, 0, 'C'},
+ {"key", required_argument, 0, 'k'},
+ {"iface", required_argument, 0, 'i'},
+ {"ipv4", no_argument, 0, '4'},
+ {"ipv6", no_argument, 0, '6'},
+ {"sink", no_argument, 0, 's'},
+ {"tcp", no_argument, 0, 't'},
+ {"timeout", required_argument, 0, 'T'},
+ {"udp", no_argument, 0, 'u'},
+ {"verbose", no_argument, 0, 'v'},
+ {"rps", required_argument, 0, 'r'},
+ {0, 0, 0, 0}
+ };
+ bool have_toeplitz = false;
+ int index, c;
+
+ while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = AF_INET;
+ break;
+ case '6':
+ cfg_family = AF_INET6;
+ break;
+ case 'C':
+ parse_cpulist(optarg);
+ break;
+ case 'd':
+ cfg_dport = strtol(optarg, NULL, 0);
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'k':
+ parse_toeplitz_key(optarg, strlen(optarg),
+ toeplitz_key);
+ have_toeplitz = true;
+ break;
+ case 'r':
+ parse_rps_bitmap(optarg);
+ break;
+ case 's':
+ cfg_sink = true;
+ break;
+ case 't':
+ cfg_type = SOCK_STREAM;
+ break;
+ case 'T':
+ cfg_timeout_msec = strtol(optarg, NULL, 0);
+ break;
+ case 'u':
+ cfg_type = SOCK_DGRAM;
+ break;
+ case 'v':
+ cfg_verbose = true;
+ break;
+
+ default:
+ error(1, 0, "unknown option %c", optopt);
+ break;
+ }
+ }
+
+ if (!have_toeplitz)
+ read_rss_dev_info_ynl();
+
+ num_cpus = get_nprocs();
+ if (num_cpus > RSS_MAX_CPUS)
+ error(1, 0, "increase RSS_MAX_CPUS");
+
+ if (cfg_num_queues && cfg_num_rps_cpus)
+ error(1, 0,
+ "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+ if (cfg_verbose) {
+ show_cpulist();
+ show_silos();
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const int min_tests = 10;
+ int fd_sink = -1;
+
+ parse_opts(argc, argv);
+
+ if (cfg_sink)
+ fd_sink = setup_sink();
+
+ setup_rings();
+
+ /* Signal to test framework that we're ready to receive */
+ ksft_ready();
+
+ process_rings();
+ cleanup_rings();
+
+ if (cfg_sink && close(fd_sink))
+ error(1, errno, "close sink");
+
+ if (frames_received - frames_nohash < min_tests)
+ error(1, 0, "too few frames for verification");
+
+ return frames_error;
+}
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.py b/tools/testing/selftests/drivers/net/hw/toeplitz.py
new file mode 100755
index 000000000000..d2db5ee9e358
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Toeplitz Rx hashing test:
+ - rxhash (the hash value calculation itself);
+ - RSS mapping from rxhash to rx queue;
+ - RPS mapping from rxhash to cpu.
+"""
+
+import glob
+import os
+import socket
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import NetDrvEpEnv, EthtoolFamily, NetdevFamily
+from lib.py import cmd, bkg, rand_port, defer
+from lib.py import ksft_in
+from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, KsftFailEx
+
+# "define" for the ID of the Toeplitz hash function
+ETH_RSS_HASH_TOP = 1
+
+
+def _check_rps_and_rfs_not_configured(cfg):
+ """Verify that RPS is not already configured."""
+
+ for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
+ with open(rps_file, "r", encoding="utf-8") as fp:
+ val = fp.read().strip()
+ if set(val) - {"0", ","}:
+ raise KsftSkipEx(f"RPS already configured on {rps_file}: {val}")
+
+ rfs_file = "/proc/sys/net/core/rps_sock_flow_entries"
+ with open(rfs_file, "r", encoding="utf-8") as fp:
+ val = fp.read().strip()
+ if val != "0":
+ raise KsftSkipEx(f"RFS already configured {rfs_file}: {val}")
+
+
+def _get_cpu_for_irq(irq):
+ with open(f"/proc/irq/{irq}/smp_affinity_list", "r",
+ encoding="utf-8") as fp:
+ data = fp.read().strip()
+ if "," in data or "-" in data:
+ raise KsftFailEx(f"IRQ{irq} is not mapped to a single core: {data}")
+ return int(data)
+
+
+def _get_irq_cpus(cfg):
+ """
+ Read the list of IRQs for the device Rx queues.
+ """
+ queues = cfg.netnl.queue_get({"ifindex": cfg.ifindex}, dump=True)
+ napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+
+ # Remap into ID-based dicts
+ napis = {n["id"]: n for n in napis}
+ queues = {f"{q['type']}{q['id']}": q for q in queues}
+
+ cpus = []
+ for rx in range(9999):
+ name = f"rx{rx}"
+ if name not in queues:
+ break
+ cpus.append(_get_cpu_for_irq(napis[queues[name]["napi-id"]]["irq"]))
+
+ return cpus
+
+
+def _get_unused_cpus(cfg, count=2):
+ """
+ Get CPUs that are not used by Rx queues.
+ Returns a list of at least 'count' CPU numbers.
+ """
+
+ # Get CPUs used by Rx queues
+ rx_cpus = set(_get_irq_cpus(cfg))
+
+ # Get total number of CPUs
+ num_cpus = os.cpu_count()
+
+ # Find unused CPUs
+ unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus]
+
+ if len(unused_cpus) < count:
+ raise KsftSkipEx(f"Need at {count} CPUs not used by Rx queues, found {len(unused_cpus)}")
+
+ return unused_cpus[:count]
+
+
+def _configure_rps(cfg, rps_cpus):
+ """Configure RPS for all Rx queues."""
+
+ mask = 0
+ for cpu in rps_cpus:
+ mask |= (1 << cpu)
+ mask = hex(mask)[2:]
+
+ # Set RPS bitmap for all rx queues
+ for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
+ with open(rps_file, "w", encoding="utf-8") as fp:
+ fp.write(mask)
+
+ return mask
+
+
+def _send_traffic(cfg, proto_flag, ipver, port):
+ """Send 20 packets of requested type."""
+
+ # Determine protocol and IP version for socat
+ if proto_flag == "-u":
+ proto = "UDP"
+ else:
+ proto = "TCP"
+
+ baddr = f"[{cfg.addr_v['6']}]" if ipver == "6" else cfg.addr_v["4"]
+
+ # Run socat in a loop to send traffic periodically
+ # Use sh -c with a loop similar to toeplitz_client.sh
+ socat_cmd = f"""
+ for i in `seq 20`; do
+ echo "msg $i" | socat -{ipver} -t 0.1 - {proto}:{baddr}:{port};
+ sleep 0.001;
+ done
+ """
+
+ cmd(socat_cmd, shell=True, host=cfg.remote)
+
+
+def _test_variants():
+ for grp in ["", "rss", "rps"]:
+ for l4 in ["tcp", "udp"]:
+ for l3 in ["4", "6"]:
+ name = f"{l4}_ipv{l3}"
+ if grp:
+ name = f"{grp}_{name}"
+ yield KsftNamedVariant(name, "-" + l4[0], l3, grp)
+
+
+@ksft_variants(_test_variants())
+def test(cfg, proto_flag, ipver, grp):
+ """Run a single toeplitz test."""
+
+ cfg.require_ipver(ipver)
+
+ # Check that rxhash is enabled
+ ksft_in("receive-hashing: on", cmd(f"ethtool -k {cfg.ifname}").stdout)
+
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # Make sure NIC is configured to use Toeplitz hash, and no key xfrm.
+ if rss.get('hfunc') != ETH_RSS_HASH_TOP or rss.get('input-xfrm'):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hfunc": ETH_RSS_HASH_TOP,
+ "input-xfrm": {}})
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},
+ "hfunc": rss.get('hfunc'),
+ "input-xfrm": rss.get('input-xfrm', {})
+ })
+
+ port = rand_port(socket.SOCK_DGRAM)
+
+ toeplitz_path = cfg.test_dir / "toeplitz"
+ rx_cmd = [
+ str(toeplitz_path),
+ "-" + ipver,
+ proto_flag,
+ "-d", str(port),
+ "-i", cfg.ifname,
+ "-T", "4000",
+ "-s",
+ "-v"
+ ]
+
+ if grp:
+ _check_rps_and_rfs_not_configured(cfg)
+ if grp == "rss":
+ irq_cpus = ",".join([str(x) for x in _get_irq_cpus(cfg)])
+ rx_cmd += ["-C", irq_cpus]
+ ksft_pr(f"RSS using CPUs: {irq_cpus}")
+ elif grp == "rps":
+ # Get CPUs not used by Rx queues and configure them for RPS
+ rps_cpus = _get_unused_cpus(cfg, count=2)
+ rps_mask = _configure_rps(cfg, rps_cpus)
+ defer(_configure_rps, cfg, [])
+ rx_cmd += ["-r", rps_mask]
+ ksft_pr(f"RPS using CPUs: {rps_cpus}, mask: {rps_mask}")
+
+ # Run rx in background, it will exit once it has seen enough packets
+ with bkg(" ".join(rx_cmd), ksft_ready=True, exit_wait=True) as rx_proc:
+ while rx_proc.proc.poll() is None:
+ _send_traffic(cfg, proto_flag, ipver, port)
+
+ # Check rx result
+ ksft_pr("Receiver output:")
+ ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
+ if rx_proc.stderr:
+ ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+
+
+def main() -> None:
+ """Ksft boilerplate main."""
+
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+ ksft_run(cases=[test], args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
new file mode 100755
index 000000000000..0998e68ebaf0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Run the tools/testing/selftests/net/csum testsuite."""
+
+import fcntl
+import socket
+import struct
+import termios
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_eq, ksft_ge, ksft_lt
+from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen
+
+
+def sock_wait_drain(sock, max_wait=1000):
+ """Wait for all pending write data on the socket to get ACKed."""
+ for _ in range(max_wait):
+ one = b'\0' * 4
+ outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one)
+ outq = struct.unpack("I", outq)[0]
+ if outq == 0:
+ break
+ time.sleep(0.01)
+ ksft_eq(outq, 0)
+
+
+def tcp_sock_get_retrans(sock):
+ """Get the number of retransmissions for the TCP socket."""
+ info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512)
+ return struct.unpack("I", info[100:104])[0]
+
+
+def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
+ cfg.require_cmd("socat", local=False, remote=True)
+
+ port = rand_port()
+ listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof"
+
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
+ wait_port_listen(port, host=cfg.remote)
+
+ if ipver == "4":
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((remote_v4, port))
+ else:
+ sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+ sock.connect((remote_v6, port))
+
+ # Small send to make sure the connection is working.
+ sock.send("ping".encode())
+ sock_wait_drain(sock)
+
+ # Send 4MB of data, record the LSO packet count.
+ qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+ buf = b"0" * 1024 * 1024 * 4
+ sock.send(buf)
+ sock_wait_drain(sock)
+ qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ # Check that at least 90% of the data was sent as LSO packets.
+ # System noise may cause false negatives. Also header overheads
+ # will add up to 5% of extra packes... The check is best effort.
+ total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"]
+ total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"]
+
+ # Make sure we have order of magnitude more LSO packets than
+ # retransmits, in case TCP retransmitted all the LSO packets.
+ ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4)
+ sock.close()
+
+ if should_lso:
+ if cfg.have_stat_super_count:
+ ksft_ge(qstat_new['tx-hw-gso-packets'] -
+ qstat_old['tx-hw-gso-packets'],
+ total_lso_super,
+ comment="Number of LSO super-packets with LSO enabled")
+ if cfg.have_stat_wire_count:
+ ksft_ge(qstat_new['tx-hw-gso-wire-packets'] -
+ qstat_old['tx-hw-gso-wire-packets'],
+ total_lso_wire,
+ comment="Number of LSO wire-packets with LSO enabled")
+ else:
+ if cfg.have_stat_super_count:
+ ksft_lt(qstat_new['tx-hw-gso-packets'] -
+ qstat_old['tx-hw-gso-packets'],
+ 15, comment="Number of LSO super-packets with LSO disabled")
+ if cfg.have_stat_wire_count:
+ ksft_lt(qstat_new['tx-hw-gso-wire-packets'] -
+ qstat_old['tx-hw-gso-wire-packets'],
+ 500, comment="Number of LSO wire-packets with LSO disabled")
+
+
+def build_tunnel(cfg, outer_ipver, tun_info):
+ local_v4 = NetDrvEpEnv.nsim_v4_pfx + "1"
+ local_v6 = NetDrvEpEnv.nsim_v6_pfx + "1"
+ remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2"
+ remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2"
+
+ local_addr = cfg.addr_v[outer_ipver]
+ remote_addr = cfg.remote_addr_v[outer_ipver]
+
+ tun_type = tun_info[0]
+ tun_arg = tun_info[1]
+ ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}")
+ defer(ip, f"link del {tun_type}-ksft")
+ ip(f"link set dev {tun_type}-ksft up")
+ ip(f"addr add {local_v4}/24 dev {tun_type}-ksft")
+ ip(f"addr add {local_v6}/64 dev {tun_type}-ksft")
+
+ ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}",
+ host=cfg.remote)
+ defer(ip, f"link del {tun_type}-ksft", host=cfg.remote)
+ ip(f"link set dev {tun_type}-ksft up", host=cfg.remote)
+ ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote)
+ ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote)
+
+ return remote_v4, remote_v6
+
+
+def restore_wanted_features(cfg):
+ features_cmd = ""
+ for feature in cfg.hw_features:
+ setting = "on" if feature in cfg.wanted_features else "off"
+ features_cmd += f" {feature} {setting}"
+ try:
+ ethtool(f"-K {cfg.ifname} {features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure restoring wanted features: {e}")
+
+
+def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
+ """Construct specific tests from the common template."""
+ def f(cfg):
+ cfg.require_ipver(outer_ipver)
+ defer(restore_wanted_features, cfg)
+
+ if not cfg.have_stat_super_count and \
+ not cfg.have_stat_wire_count:
+ raise KsftSkipEx(f"Device does not support LSO queue stats")
+
+ if feature not in cfg.hw_features:
+ raise KsftSkipEx(f"Device does not support {feature}")
+
+ ipver = outer_ipver
+ if tun:
+ remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun)
+ ipver = inner_ipver
+ else:
+ remote_v4 = cfg.remote_addr_v["4"]
+ remote_v6 = cfg.remote_addr_v["6"]
+
+ # First test without the feature enabled.
+ ethtool(f"-K {cfg.ifname} {feature} off")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False)
+
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
+ if feature in cfg.partial_features:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+ if ipver == "4":
+ ksft_pr("Testing with mangleid enabled")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
+
+ # Full feature enabled.
+ ethtool(f"-K {cfg.ifname} {feature} on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
+
+ f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver
+ return f
+
+
+def query_nic_features(cfg) -> None:
+ """Query and cache the NIC features."""
+ cfg.have_stat_super_count = False
+ cfg.have_stat_wire_count = False
+
+ features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+
+ cfg.wanted_features = set()
+ for f in features["wanted"]["bits"]["bit"]:
+ cfg.wanted_features.add(f["name"])
+
+ cfg.hw_features = set()
+ hw_all_features_cmd = ""
+ for f in features["hw"]["bits"]["bit"]:
+ if f.get("value", False):
+ feature = f["name"]
+ cfg.hw_features.add(feature)
+ hw_all_features_cmd += f" {feature} on"
+ try:
+ ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure enabling all hw features: {e}")
+ ksft_pr("partial gso feature detection may be impacted")
+
+ # Check which features are supported via GSO partial
+ cfg.partial_features = set()
+ if 'tx-gso-partial' in cfg.hw_features:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+
+ no_partial = set()
+ features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ no_partial.add(f["name"])
+ cfg.partial_features = cfg.hw_features - no_partial
+ ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+
+ restore_wanted_features(cfg)
+
+ stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)
+ if stats:
+ if 'tx-hw-gso-packets' in stats[0]:
+ ksft_pr("Detected qstat for LSO super-packets")
+ cfg.have_stat_super_count = True
+ if 'tx-hw-gso-wire-packets' in stats[0]:
+ ksft_pr("Detected qstat for LSO wire-packets")
+ cfg.have_stat_wire_count = True
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+
+ query_nic_features(cfg)
+
+ test_info = (
+ # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions)
+ ("", "4", "tx-tcp-segmentation", None),
+ ("", "6", "tx-tcp6-segmentation", None),
+ ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))),
+ ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))),
+ ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))),
+ ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))),
+ ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))),
+ )
+
+ cases = []
+ for outer_ipver in ["4", "6"]:
+ for info in test_info:
+ # Skip if test which only works for a specific IP version
+ if info[1] and outer_ipver != info[1]:
+ continue
+
+ if info[3]:
+ cases += [
+ test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver)
+ for inner_ipver in info[3][2]
+ ]
+ else:
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver))
+
+ ksft_run(cases=cases, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
new file mode 100755
index 000000000000..d19d1d518208
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+# This is intended to be run on a virtio-net guest interface.
+# The test binds the XDP socket to the interface without setting
+# the fill ring to trigger delayed refill_work. This helps to
+# make it easier to reproduce the deadlock when XDP program,
+# XDP socket bind/unbind, rx ring resize race with refill_work on
+# the buggy kernel.
+#
+# The Qemu command to setup virtio-net
+# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no
+# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on
+
+from lib.py import ksft_exit, ksft_run
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetDrvEnv
+from lib.py import bkg, ip, cmd, ethtool
+import time
+
+def _get_rx_ring_entries(cfg):
+ output = ethtool(f"-g {cfg.ifname}", json=True)
+ return output[0]["rx"]
+
+def setup_xsk(cfg, xdp_queue_id = 0) -> bkg:
+ # Probe for support
+ xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
+ if xdp.ret == 255:
+ raise KsftSkipEx('AF_XDP unsupported')
+ elif xdp.ret > 0:
+ raise KsftFailEx('unable to create AF_XDP socket')
+
+ try:
+ return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \
+ '{xdp_queue_id} -z', ksft_wait=3)
+ except:
+ raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \
+ 'Please consider adding iommu_platform=on ' \
+ 'when setting up virtio-net-pci')
+
+def check_xdp_bind(cfg):
+ with setup_xsk(cfg):
+ ip(f"link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
+ ip(f"link set dev %s xdp off" % cfg.ifname)
+
+def check_rx_resize(cfg):
+ with setup_xsk(cfg):
+ rx_ring = _get_rx_ring_entries(cfg)
+ ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2))
+ ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring))
+
+def main():
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ ksft_run([check_xdp_bind, check_rx_resize],
+ args=(cfg, ))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()