diff options
Diffstat (limited to 'tools/testing/selftests/drivers/net/hw')
31 files changed, 7892 insertions, 0 deletions
diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore new file mode 100644 index 000000000000..46540468a775 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/.gitignore @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +iou-zcrx +ncdevmem +toeplitz diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile new file mode 100644 index 000000000000..9c163ba6feee --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +# Check if io_uring supports zero-copy receive +HAS_IOURING_ZCRX := $(shell \ + echo -e '#include <liburing.h>\n' \ + 'void *func = (void *)io_uring_register_ifq;\n' \ + 'int main() {return 0;}' | \ + $(CC) -luring -x c - -o /dev/null 2>&1 && echo y) + +ifeq ($(HAS_IOURING_ZCRX),y) +COND_GEN_FILES += iou-zcrx +else +$(warning excluding iouring tests, liburing not installed or too old) +endif + +TEST_GEN_FILES := \ + $(COND_GEN_FILES) \ +# end of TEST_GEN_FILES + +TEST_PROGS = \ + csum.py \ + devlink_port_split.py \ + devlink_rate_tc_bw.py \ + devmem.py \ + ethtool.sh \ + ethtool_extended_state.sh \ + ethtool_mm.sh \ + ethtool_rmon.sh \ + hw_stats_l3.sh \ + hw_stats_l3_gre.sh \ + iou-zcrx.py \ + irq.py \ + loopback.sh \ + nic_timestamp.py \ + pp_alloc_fail.py \ + rss_api.py \ + rss_ctx.py \ + rss_flow_label.py \ + rss_input_xfrm.py \ + toeplitz.py \ + tso.py \ + xsk_reconfig.py \ + # + +TEST_FILES := \ + ethtool_lib.sh \ + # + +TEST_INCLUDES := \ + $(wildcard lib/py/*.py ../lib/py/*.py) \ + ../../../net/lib.sh \ + ../../../net/forwarding/ipip_lib.sh \ + ../../../net/forwarding/lib.sh \ + ../../../net/forwarding/tc_common.sh \ + # + +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := \ + ncdevmem \ + toeplitz \ +# end of YNL_GEN_FILES +TEST_GEN_FILES += $(YNL_GEN_FILES) +TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) + +include ../../../lib.mk + +# YNL build +YNL_GENS := \ + ethtool \ + netdev \ +# end of YNL_GENS + +include ../../../net/ynl.mk + +include ../../../net/bpf.mk + +ifeq ($(HAS_IOURING_ZCRX),y) +$(OUTPUT)/iou-zcrx: LDLIBS += -luring +endif diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config new file mode 100644 index 000000000000..2307aa001be1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/config @@ -0,0 +1,11 @@ +CONFIG_FAIL_FUNCTION=y +CONFIG_FAULT_INJECTION=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FUNCTION_ERROR_INJECTION=y +CONFIG_IO_URING=y +CONFIG_IPV6=y +CONFIG_IPV6_GRE=y +CONFIG_NET_IPGRE=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_UDMABUF=y +CONFIG_VXLAN=y diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py new file mode 100755 index 000000000000..3e3a89a34afe --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Run the tools/testing/selftests/net/csum testsuite.""" + +from os import path + +from lib.py import ksft_run, ksft_exit, KsftSkipEx +from lib.py import EthtoolFamily, NetDrvEpEnv +from lib.py import bkg, cmd, wait_port_listen + +def test_receive(cfg, ipver="6", extra_args=None): + """Test local nic checksum receive. Remote host sends crafted packets.""" + if not cfg.have_rx_csum: + raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}") + + ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}" + + rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" + tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}" + + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(34000, proto="udp") + cmd(tx_cmd, host=cfg.remote) + + +def test_transmit(cfg, ipver="6", extra_args=None): + """Test local nic checksum transmit. Remote host verifies packets.""" + if (not cfg.have_tx_csum_generic and + not (cfg.have_tx_csum_ipv4 and ipver == "4") and + not (cfg.have_tx_csum_ipv6 and ipver == "6")): + raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}") + + ip_args = f"-{ipver} -S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}" + + # Cannot randomize input when calculating zero checksum + if extra_args != "-U -Z": + extra_args += " -r 1" + + rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}" + tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}" + + with bkg(rx_cmd, host=cfg.remote, exit_wait=True): + wait_port_listen(34000, proto="udp", host=cfg.remote) + cmd(tx_cmd) + + +def test_builder(name, cfg, ipver="6", tx=False, extra_args=""): + """Construct specific tests from the common template. + + Most tests follow the same basic pattern, differing only in + Direction of the test and optional flags passed to csum.""" + def f(cfg): + cfg.require_ipver(ipver) + + if tx: + test_transmit(cfg, ipver, extra_args) + else: + test_receive(cfg, ipver, extra_args) + + f.__name__ = f"ipv{ipver}_" + name + return f + + +def check_nic_features(cfg) -> None: + """Test whether Tx and Rx checksum offload are enabled. + + If the device under test has either off, then skip the relevant tests.""" + cfg.have_tx_csum_generic = False + cfg.have_tx_csum_ipv4 = False + cfg.have_tx_csum_ipv6 = False + cfg.have_rx_csum = False + + ethnl = EthtoolFamily() + features = ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + for f in features["active"]["bits"]["bit"]: + if f["name"] == "tx-checksum-ip-generic": + cfg.have_tx_csum_generic = True + elif f["name"] == "tx-checksum-ipv4": + cfg.have_tx_csum_ipv4 = True + elif f["name"] == "tx-checksum-ipv6": + cfg.have_tx_csum_ipv6 = True + elif f["name"] == "rx-checksum": + cfg.have_rx_csum = True + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + check_nic_features(cfg) + + cfg.bin_local = cfg.net_lib_dir / "csum" + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + cases = [] + for ipver in ["4", "6"]: + cases.append(test_builder("rx_tcp", cfg, ipver, False, "-t")) + cases.append(test_builder("rx_tcp_invalid", cfg, ipver, False, "-t -E")) + + cases.append(test_builder("rx_udp", cfg, ipver, False, "")) + cases.append(test_builder("rx_udp_invalid", cfg, ipver, False, "-E")) + + cases.append(test_builder("tx_udp_csum_offload", cfg, ipver, True, "-U")) + cases.append(test_builder("tx_udp_zero_checksum", cfg, ipver, True, "-U -Z")) + + ksft_run(cases=cases, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/devlink_port_split.py b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py new file mode 100755 index 000000000000..2d84c7a0be6b --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from subprocess import PIPE, Popen +import json +import time +import argparse +import collections +import sys + +# +# Test port split configuration using devlink-port lanes attribute. +# The test is skipped in case the attribute is not available. +# +# First, check that all the ports with 1 lane fail to split. +# Second, check that all the ports with more than 1 lane can be split +# to all valid configurations (e.g., split to 2, split to 4 etc.) +# + + +# Kselftest framework requirement - SKIP code is 4 +KSFT_SKIP=4 +Port = collections.namedtuple('Port', 'bus_info name') + + +def run_command(cmd, should_fail=False): + """ + Run a command in subprocess. + Return: Tuple of (stdout, stderr). + """ + + p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) + stdout, stderr = p.communicate() + stdout, stderr = stdout.decode(), stderr.decode() + + if stderr != "" and not should_fail: + print("Error sending command: %s" % cmd) + print(stdout) + print(stderr) + return stdout, stderr + + +class devlink_ports(object): + """ + Class that holds information on the devlink ports, required to the tests; + if_names: A list of interfaces in the devlink ports. + """ + + def get_if_names(dev): + """ + Get a list of physical devlink ports. + Return: Array of tuples (bus_info/port, if_name). + """ + + arr = [] + + cmd = "devlink -j port show" + stdout, stderr = run_command(cmd) + assert stderr == "" + ports = json.loads(stdout)['port'] + + validate_devlink_output(ports, 'flavour') + + for port in ports: + if dev in port: + if ports[port]['flavour'] == 'physical': + arr.append(Port(bus_info=port, name=ports[port]['netdev'])) + + return arr + + def __init__(self, dev): + self.if_names = devlink_ports.get_if_names(dev) + + +def get_max_lanes(port): + """ + Get the $port's maximum number of lanes. + Return: number of lanes, e.g. 1, 2, 4 and 8. + """ + + cmd = "devlink -j port show %s" % port + stdout, stderr = run_command(cmd) + assert stderr == "" + values = list(json.loads(stdout)['port'].values())[0] + + if 'lanes' in values: + lanes = values['lanes'] + else: + lanes = 0 + return lanes + + +def get_split_ability(port): + """ + Get the $port split ability. + Return: split ability, true or false. + """ + + cmd = "devlink -j port show %s" % port.name + stdout, stderr = run_command(cmd) + assert stderr == "" + values = list(json.loads(stdout)['port'].values())[0] + + return values['splittable'] + + +def split(k, port, should_fail=False): + """ + Split $port into $k ports. + If should_fail == True, the split should fail. Otherwise, should pass. + Return: Array of sub ports after splitting. + If the $port wasn't split, the array will be empty. + """ + + cmd = "devlink port split %s count %s" % (port.bus_info, k) + stdout, stderr = run_command(cmd, should_fail=should_fail) + + if should_fail: + if not test(stderr != "", "%s is unsplittable" % port.name): + print("split an unsplittable port %s" % port.name) + return create_split_group(port, k) + else: + if stderr == "": + return create_split_group(port, k) + print("didn't split a splittable port %s" % port.name) + + return [] + + +def unsplit(port): + """ + Unsplit $port. + """ + + cmd = "devlink port unsplit %s" % port + stdout, stderr = run_command(cmd) + test(stderr == "", "Unsplit port %s" % port) + + +def exists(port, dev): + """ + Check if $port exists in the devlink ports. + Return: True is so, False otherwise. + """ + + return any(dev_port.name == port + for dev_port in devlink_ports.get_if_names(dev)) + + +def exists_and_lanes(ports, lanes, dev): + """ + Check if every port in the list $ports exists in the devlink ports and has + $lanes number of lanes after splitting. + Return: True if both are True, False otherwise. + """ + + for port in ports: + max_lanes = get_max_lanes(port) + if not exists(port, dev): + print("port %s doesn't exist in devlink ports" % port) + return False + if max_lanes != lanes: + print("port %s has %d lanes, but %s were expected" + % (port, lanes, max_lanes)) + return False + return True + + +def test(cond, msg): + """ + Check $cond and print a message accordingly. + Return: True is pass, False otherwise. + """ + + if cond: + print("TEST: %-60s [ OK ]" % msg) + else: + print("TEST: %-60s [FAIL]" % msg) + + return cond + + +def create_split_group(port, k): + """ + Create the split group for $port. + Return: Array with $k elements, which are the split port group. + """ + + return list(port.name + "s" + str(i) for i in range(k)) + + +def split_unsplittable_port(port, k): + """ + Test that splitting of unsplittable port fails. + """ + + # split to max + new_split_group = split(k, port, should_fail=True) + + if new_split_group != []: + unsplit(port.bus_info) + + +def split_splittable_port(port, k, lanes, dev): + """ + Test that splitting of splittable port passes correctly. + """ + + new_split_group = split(k, port) + + # Once the split command ends, it takes some time to the sub ifaces' + # to get their names. Use udevadm to continue only when all current udev + # events are handled. + cmd = "udevadm settle" + stdout, stderr = run_command(cmd) + assert stderr == "" + + if new_split_group != []: + test(exists_and_lanes(new_split_group, lanes/k, dev), + "split port %s into %s" % (port.name, k)) + + unsplit(port.bus_info) + + +def validate_devlink_output(devlink_data, target_property=None): + """ + Determine if test should be skipped by checking: + 1. devlink_data contains values + 2. The target_property exist in devlink_data + """ + skip_reason = None + if any(devlink_data.values()): + if target_property: + skip_reason = "{} not found in devlink output, test skipped".format(target_property) + for key in devlink_data: + if target_property in devlink_data[key]: + skip_reason = None + else: + skip_reason = 'devlink output is empty, test skipped' + + if skip_reason: + print(skip_reason) + sys.exit(KSFT_SKIP) + + +def make_parser(): + parser = argparse.ArgumentParser(description='A test for port splitting.') + parser.add_argument('--dev', + help='The devlink handle of the device under test. ' + + 'The default is the first registered devlink ' + + 'handle.') + + return parser + + +def main(cmdline=None): + parser = make_parser() + args = parser.parse_args(cmdline) + + dev = args.dev + if not dev: + cmd = "devlink -j dev show" + stdout, stderr = run_command(cmd) + assert stderr == "" + + validate_devlink_output(json.loads(stdout)) + devs = json.loads(stdout)['dev'] + dev = list(devs.keys())[0] + + cmd = "devlink dev show %s" % dev + stdout, stderr = run_command(cmd) + if stderr != "": + print("devlink device %s can not be found" % dev) + sys.exit(1) + + ports = devlink_ports(dev) + + found_max_lanes = False + for port in ports.if_names: + max_lanes = get_max_lanes(port.name) + + # If max lanes is 0, do not test port splitting at all + if max_lanes == 0: + continue + + # If 1 lane, shouldn't be able to split + elif max_lanes == 1: + test(not get_split_ability(port), + "%s should not be able to split" % port.name) + split_unsplittable_port(port, max_lanes) + + # Else, splitting should pass and all the split ports should exist. + else: + lane = max_lanes + test(get_split_ability(port), + "%s should be able to split" % port.name) + while lane > 1: + split_splittable_port(port, lane, max_lanes, dev) + + lane //= 2 + found_max_lanes = True + + if not found_max_lanes: + print(f"Test not started, no port of device {dev} reports max_lanes") + sys.exit(KSFT_SKIP) + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py new file mode 100755 index 000000000000..4e4faa9275bb --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py @@ -0,0 +1,439 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Devlink Rate TC Bandwidth Test Suite +=================================== + +This test suite verifies the functionality of devlink-rate traffic class (TC) +bandwidth distribution in a virtualized environment. The tests validate that +bandwidth can be properly allocated between different traffic classes and +that TC mapping works as expected. + +Test Environment: +---------------- +- Creates 1 VF +- Establishes a bridge connecting the VF representor and the uplink representor +- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102) +- Configures different traffic classes (TC3 and TC4) for each VLAN + +Test Cases: +---------- +1. test_no_tc_mapping_bandwidth: + - Verifies that without TC mapping, bandwidth is NOT distributed according to + the configured 20/80 split between TC3 and TC4 + - This test should fail if bandwidth matches the 20/80 split without TC + mapping + - Expected: Bandwidth should NOT be distributed as 20/80 + +2. test_tc_mapping_bandwidth: + - Configures TC mapping using mqprio qdisc + - Verifies that with TC mapping, bandwidth IS distributed according to the + configured 20/80 split between TC3 and TC4 + - Expected: Bandwidth should be distributed as 20/80 + +Bandwidth Distribution: +---------------------- +- TC3 (VLAN 101): Configured for 20% of total bandwidth +- TC4 (VLAN 102): Configured for 80% of total bandwidth +- Total bandwidth: 1Gbps +- Tolerance: +-12% + +Hardware-Specific Behavior (mlx5): +-------------------------- +mlx5 hardware enforces traffic class separation by ensuring that each transmit +queue (SQ) is associated with a single TC. If a packet is sent on a queue that +doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set +mapping), the hardware moves the queue to the correct TC scheduler to preserve +traffic isolation. + +This behavior means that even without explicit TC-to-queue mapping, bandwidth +enforcement may still appear to workâbecause the hardware dynamically adjusts +the scheduling context. However, this can lead to performance issues in high +rates and HOL blocking if traffic from different TCs is mixed on the same queue. +""" + +import json +import os +import subprocess +import threading +import time + +from lib.py import ksft_pr, ksft_run, ksft_exit +from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx +from lib.py import NetDrvEpEnv, DevlinkFamily +from lib.py import NlError +from lib.py import cmd, defer, ethtool, ip +from lib.py import Iperf3Runner + + +class BandwidthValidator: + """ + Validates total bandwidth and individual shares with tolerance + relative to the overall total. + """ + + def __init__(self, shares): + self.tolerance_percent = 12 + self.expected_total = sum(shares.values()) + self.bounds = {} + + for name, exp in shares.items(): + self.bounds[name] = (self.min_expected(exp), self.max_expected(exp)) + + def min_expected(self, value): + """Calculates the minimum acceptable value based on tolerance.""" + return value - (self.expected_total * self.tolerance_percent / 100) + + def max_expected(self, value): + """Calculates the maximum acceptable value based on tolerance.""" + return value + (self.expected_total * self.tolerance_percent / 100) + + def bound(self, values): + """ + Return True if all given values fall within tolerance. + """ + for name, value in values.items(): + low, high = self.bounds[name] + if not low <= value <= high: + return False + return True + + +def setup_vf(cfg, set_tc_mapping=True): + """ + Sets up a VF on the given network interface. + + Enables SR-IOV and switchdev mode, brings the VF interface up, + and optionally configures TC mapping using mqprio. + """ + try: + cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev") + defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy") + except Exception as exc: + raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc + try: + cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True) + defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True) + except Exception as exc: + raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc + + time.sleep(2) + vf_ifc = (os.listdir( + f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0] + if vf_ifc: + ip(f"link set dev {vf_ifc} up") + else: + raise KsftSkipEx("VF interface not found") + if set_tc_mapping: + cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8") + + return vf_ifc + + +def setup_vlans_on_vf(vf_ifc): + """ + Sets up two VLAN interfaces on the given VF, each mapped to a different TC. + """ + vlan_configs = [ + {"vlan_id": 101, "tc": 3, "ip": "198.51.100.1"}, + {"vlan_id": 102, "tc": 4, "ip": "198.51.100.9"}, + ] + + for config in vlan_configs: + vlan_dev = f"{vf_ifc}.{config['vlan_id']}" + ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}") + ip(f"addr add {config['ip']}/29 dev {vlan_dev}") + ip(f"link set dev {vlan_dev} up") + ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}") + ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}") + + +def get_vf_info(cfg): + """ + Finds the VF representor interface and devlink port index + for the given PCI device used in the test environment. + """ + cfg.vf_representor = None + cfg.vf_port_index = None + out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8") + ports = json.loads(out)["port"] + + for port_name, props in ports.items(): + netdev = props.get("netdev") + + if (port_name.startswith(f"pci/{cfg.pci}/") and + props.get("vfnum") == 0): + cfg.vf_representor = netdev + cfg.vf_port_index = int(port_name.split("/")[-1]) + break + + +def setup_bridge(cfg): + """ + Creates and configures a Linux bridge, with both the uplink + and VF representor interfaces attached to it. + """ + bridge_name = f"br_{os.getpid()}" + ip(f"link add name {bridge_name} type bridge") + defer(cmd, f"ip link del name {bridge_name} type bridge") + + ip(f"link set dev {cfg.ifname} master {bridge_name}") + + rep_name = cfg.vf_representor + if rep_name: + ip(f"link set dev {rep_name} master {bridge_name}") + ip(f"link set dev {rep_name} up") + ksft_pr(f"Set representor {rep_name} up and added to bridge") + else: + raise KsftSkipEx("Could not find representor for the VF") + + ip(f"link set dev {bridge_name} up") + + +def setup_devlink_rate(cfg): + """ + Configures devlink rate tx_max and traffic class bandwidth for the VF. + """ + port_index = cfg.vf_port_index + if port_index is None: + raise KsftSkipEx("Could not find VF port index") + try: + cfg.devnl.rate_set({ + "bus-name": "pci", + "dev-name": cfg.pci, + "port-index": port_index, + "rate-tx-max": 125000000, + "rate-tc-bws": [ + {"index": 0, "bw": 0}, + {"index": 1, "bw": 0}, + {"index": 2, "bw": 0}, + {"index": 3, "bw": 20}, + {"index": 4, "bw": 80}, + {"index": 5, "bw": 0}, + {"index": 6, "bw": 0}, + {"index": 7, "bw": 0}, + ] + }) + except NlError as exc: + if exc.error == 95: # EOPNOTSUPP + raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc + raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc + + +def setup_remote_vlans(cfg): + """ + Sets up VLAN interfaces on the remote side. + """ + remote_dev = cfg.remote_ifname + vlan_ids = [101, 102] + remote_ips = ["198.51.100.2", "198.51.100.10"] + + for vlan_id, ip_addr in zip(vlan_ids, remote_ips): + vlan_dev = f"{remote_dev}.{vlan_id}" + cmd(f"ip link add link {remote_dev} name {vlan_dev} " + f"type vlan id {vlan_id}", host=cfg.remote) + cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote) + cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote) + defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote) + + +def setup_test_environment(cfg, set_tc_mapping=True): + """ + Sets up the complete test environment including VF creation, VLANs, + bridge configuration and devlink rate setup. + """ + vf_ifc = setup_vf(cfg, set_tc_mapping) + ksft_pr(f"Created VF interface: {vf_ifc}") + + setup_vlans_on_vf(vf_ifc) + + get_vf_info(cfg) + setup_bridge(cfg) + + setup_devlink_rate(cfg) + setup_remote_vlans(cfg) + + +def measure_bandwidth(cfg, server_ip, client_ip, barrier): + """ + Synchronizes with peers and runs an iperf3-based bandwidth measurement + between the given endpoints. Returns average Gbps. + """ + runner = Iperf3Runner(cfg, server_ip=server_ip, client_ip=client_ip) + try: + barrier.wait(timeout=10) + except Exception as exc: + raise KsftFailEx("iperf3 barrier wait timed") from exc + + try: + bw_gbps = runner.measure_bandwidth(reverse=True) + except Exception as exc: + raise KsftFailEx("iperf3 bandwidth measurement failed") from exc + + return bw_gbps + + +def run_bandwidth_test(cfg): + """ + Runs parallel bandwidth measurements for each VLAN/TC pair and collects results. + """ + def _run_measure_bandwidth_thread(local_ip, remote_ip, results, barrier, tc_ix): + results[tc_ix] = measure_bandwidth(cfg, local_ip, remote_ip, barrier) + + vf_vlan_data = [ + # (local_ip, remote_ip, TC) + ("198.51.100.1", "198.51.100.2", 3), + ("198.51.100.9", "198.51.100.10", 4), + ] + + results = {} + threads = [] + start_barrier = threading.Barrier(len(vf_vlan_data)) + + for local_ip, remote_ip, tc_ix in vf_vlan_data: + thread = threading.Thread( + target=_run_measure_bandwidth_thread, + args=(local_ip, remote_ip, results, start_barrier, tc_ix) + ) + thread.start() + threads.append(thread) + + for thread in threads: + thread.join() + + for tc_ix, tc_bw in results.items(): + if tc_bw is None: + raise KsftFailEx("iperf3 failed; cannot evaluate bandwidth") + + return results + + +def calculate_bandwidth_percentages(results): + """ + Calculates the percentage of total bandwidth received by TC3 and TC4. + """ + if 3 not in results or 4 not in results: + raise KsftFailEx(f"Missing expected TC results in {results}") + + tc3_bw = results[3] + tc4_bw = results[4] + total_bw = tc3_bw + tc4_bw + tc3_percentage = (tc3_bw / total_bw) * 100 + tc4_percentage = (tc4_bw / total_bw) * 100 + + return { + 'tc3_bw': tc3_bw, + 'tc4_bw': tc4_bw, + 'tc3_percentage': tc3_percentage, + 'tc4_percentage': tc4_percentage, + 'total_bw': total_bw + } + + +def print_bandwidth_results(bw_data, test_name): + """ + Prints bandwidth measurements and TC usage summary for a given test. + """ + ksft_pr(f"Bandwidth check results {test_name}:") + ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec") + ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec") + ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec") + ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%") + ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%") + + +def verify_total_bandwidth(bw_data, validator): + """ + Ensures the total measured bandwidth falls within the acceptable tolerance. + """ + total = bw_data['total_bw'] + + if validator.bound({"total": total}): + return + + low, high = validator.bounds["total"] + + if total < low: + raise KsftSkipEx( + f"Total bandwidth {total:.2f} Gbps < minimum " + f"{low:.2f} Gbps; " + f"parent tx_max ({validator.expected_total:.1f} G) " + f"not reached, cannot validate share" + ) + + raise KsftFailEx( + f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling " + f"{high:.2f} Gbps " + f"(VF tx_max set to {validator.expected_total:.1f} G)" + ) + + +def run_bandwidth_distribution_test(cfg, set_tc_mapping): + """ + Runs parallel bandwidth measurements for both TCs and collects results. + """ + setup_test_environment(cfg, set_tc_mapping) + bandwidths = run_bandwidth_test(cfg) + bw_data = calculate_bandwidth_percentages(bandwidths) + test_name = "with TC mapping" if set_tc_mapping else "without TC mapping" + print_bandwidth_results(bw_data, test_name) + + verify_total_bandwidth(bw_data, cfg.traffic_bw_validator) + + return cfg.tc_bw_validator.bound({"tc3": bw_data['tc3_percentage'], + "tc4": bw_data['tc4_percentage']}) + + +def test_no_tc_mapping_bandwidth(cfg): + """ + Verifies that bandwidth is not split 20/80 without traffic class mapping. + """ + pass_bw_msg = "Bandwidth is NOT distributed as 20/80 without TC mapping" + fail_bw_msg = "Bandwidth matched 20/80 split without TC mapping" + is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout + + if run_bandwidth_distribution_test(cfg, set_tc_mapping=False): + if is_mlx5: + raise KsftXfailEx(fail_bw_msg) + raise KsftFailEx(fail_bw_msg) + if is_mlx5: + raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg) + ksft_pr(pass_bw_msg) + + +def test_tc_mapping_bandwidth(cfg): + """ + Verifies that bandwidth is correctly split 20/80 between TC3 and TC4 + when traffic class mapping is set. + """ + if run_bandwidth_distribution_test(cfg, set_tc_mapping=True): + ksft_pr("Bandwidth is distributed as 20/80 with TC mapping") + else: + raise KsftFailEx("Bandwidth did not match 20/80 split with TC mapping") + + +def main() -> None: + """ + Main entry point for running the test cases. + """ + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.devnl = DevlinkFamily() + + cfg.pci = os.path.basename( + os.path.realpath(f"/sys/class/net/{cfg.ifname}/device") + ) + if not cfg.pci: + raise KsftSkipEx("Could not get PCI address of the interface") + + cfg.traffic_bw_validator = BandwidthValidator({"total": 1}) + cfg.tc_bw_validator = BandwidthValidator({"tc3": 20, "tc4": 80}) + + cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth] + + ksft_run(cases=cases, args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py new file mode 100755 index 000000000000..45c2d49d55b6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from os import path +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, KsftSkipEx +from lib.py import NetDrvEpEnv +from lib.py import bkg, cmd, rand_port, wait_port_listen +from lib.py import ksft_disruptive + + +def require_devmem(cfg): + if not hasattr(cfg, "_devmem_probed"): + probe_command = f"{cfg.bin_local} -f {cfg.ifname}" + cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0 + cfg._devmem_probed = True + + if not cfg._devmem_supported: + raise KsftSkipEx("Test requires devmem support") + + +@ksft_disruptive +def check_rx(cfg) -> None: + require_devmem(cfg) + + port = rand_port() + socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}" + listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7" + + with bkg(listen_cmd, exit_wait=True) as ncdevmem: + wait_port_listen(port) + cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \ + head -c 1K | {socat}", host=cfg.remote, shell=True) + + ksft_eq(ncdevmem.ret, 0) + + +@ksft_disruptive +def check_tx(cfg) -> None: + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" + + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True) + + ksft_eq(socat.stdout.strip(), "hello\nworld") + + +@ksft_disruptive +def check_tx_chunks(cfg) -> None: + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" + + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True) + + ksft_eq(socat.stdout.strip(), "hello\nworld") + + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem") + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + ksft_run([check_rx, check_tx, check_tx_chunks], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/ethtool.sh b/tools/testing/selftests/drivers/net/hw/ethtool.sh new file mode 100755 index 000000000000..fa6953de6b6d --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ethtool.sh @@ -0,0 +1,297 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + same_speeds_autoneg_off + different_speeds_autoneg_off + combination_of_neg_on_and_off + advertise_subset_of_speeds + check_highest_speed_is_chosen + different_speeds_autoneg_on +" +NUM_NETIFS=2 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh +source ethtool_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy +} + +same_speeds_autoneg_off() +{ + # Check that when each of the reported speeds is forced, the links come + # up and are operational. + local -a speeds_arr=($(common_speeds_get $h1 $h2 0 0)) + + for speed in "${speeds_arr[@]}"; do + RET=0 + ethtool_set $h1 speed $speed autoneg off + ethtool_set $h2 speed $speed autoneg off + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_err $? "ping with speed $speed autoneg off" + log_test "force speed $speed on both ends" + done + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +different_speeds_autoneg_off() +{ + # Test that when we force different speeds, links are not up and ping + # fails. + RET=0 + + local -a speeds_arr=($(different_speeds_get $h1 $h2 0 0)) + local speed1=${speeds_arr[0]} + local speed2=${speeds_arr[1]} + + ethtool_set $h1 speed $speed1 autoneg off + ethtool_set $h2 speed $speed2 autoneg off + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_fail $? "ping with different speeds" + + log_test "force of different speeds autoneg off" + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +combination_of_neg_on_and_off() +{ + # Test that when one device is forced to a speed supported by both + # endpoints and the other device is configured to autoneg on, the links + # are up and ping passes. + local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1)) + + for speed in "${speeds_arr[@]}"; do + RET=0 + ethtool_set $h1 speed $speed autoneg off + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_err $? "ping with h1-speed=$speed autoneg off, h2 autoneg on" + log_test "force speed $speed vs. autoneg" + done + + ethtool -s $h1 autoneg on +} + +hex_speed_value_get() +{ + local speed=$1; shift + + local shift_size=${speed_values[$speed]} + speed=$((0x1 << $"shift_size")) + printf "%#x" "$speed" +} + +subset_of_common_speeds_get() +{ + local dev1=$1; shift + local dev2=$1; shift + local adver=$1; shift + + local -a speeds_arr=($(common_speeds_get $dev1 $dev2 0 $adver)) + local speed_to_advertise=0 + local speed_to_remove=${speeds_arr[0]} + speed_to_remove+='base' + + local -a speeds_mode_arr=($(common_speeds_get $dev1 $dev2 1 $adver)) + + for speed in ${speeds_mode_arr[@]}; do + if [[ $speed != $speed_to_remove* ]]; then + speed=$(hex_speed_value_get $speed) + speed_to_advertise=$(($speed_to_advertise | \ + $speed)) + fi + + done + + # Convert to hex. + printf "%#x" "$speed_to_advertise" +} + +speed_to_advertise_get() +{ + # The function returns the hex number that is composed by OR-ing all + # the modes corresponding to the provided speed. + local speed_without_mode=$1; shift + local supported_speeds=("$@"); shift + local speed_to_advertise=0 + + speed_without_mode+='base' + + for speed in ${supported_speeds[@]}; do + if [[ $speed == $speed_without_mode* ]]; then + speed=$(hex_speed_value_get $speed) + speed_to_advertise=$(($speed_to_advertise | \ + $speed)) + fi + + done + + # Convert to hex. + printf "%#x" "$speed_to_advertise" +} + +advertise_subset_of_speeds() +{ + # Test that when one device advertises a subset of speeds and another + # advertises a specific speed (but all modes of this speed), the links + # are up and ping passes. + RET=0 + + local speed_1_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1) + ethtool_set $h1 advertise $speed_1_to_advertise + + if [ $RET != 0 ]; then + log_test "advertise subset of speeds" + return + fi + + local -a speeds_arr_without_mode=($(common_speeds_get $h1 $h2 0 1)) + # Check only speeds that h1 advertised. Remove the first speed. + unset speeds_arr_without_mode[0] + local -a speeds_arr_with_mode=($(common_speeds_get $h1 $h2 1 1)) + + for speed_value in ${speeds_arr_without_mode[@]}; do + RET=0 + local speed_2_to_advertise=$(speed_to_advertise_get $speed_value \ + "${speeds_arr_with_mode[@]}") + ethtool_set $h2 advertise $speed_2_to_advertise + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_err $? "ping with h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)" + + log_test "advertise $speed_1_to_advertise vs. $speed_2_to_advertise" + done + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +check_highest_speed_is_chosen() +{ + # Test that when one device advertises a subset of speeds, the other + # chooses the highest speed. This test checks configuration without + # traffic. + RET=0 + + local max_speed + local chosen_speed + local speed_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1) + + ethtool_set $h1 advertise $speed_to_advertise + + if [ $RET != 0 ]; then + log_test "check highest speed" + return + fi + + local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1)) + + max_speed=${speeds_arr[0]} + for current in ${speeds_arr[@]}; do + if [[ $current -gt $max_speed ]]; then + max_speed=$current + fi + done + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + chosen_speed=$(ethtool $h1 | grep 'Speed:') + chosen_speed=${chosen_speed%"Mb/s"*} + chosen_speed=${chosen_speed#*"Speed: "} + ((chosen_speed == max_speed)) + check_err $? "h1 advertise $speed_to_advertise, h2 sync to speed $chosen_speed" + + log_test "check highest speed" + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +different_speeds_autoneg_on() +{ + # Test that when we configure links to advertise different speeds, + # links are not up and ping fails. + RET=0 + + local -a speeds=($(different_speeds_get $h1 $h2 1 1)) + local speed1=${speeds[0]} + local speed2=${speeds[1]} + + speed1=$(hex_speed_value_get $speed1) + speed2=$(hex_speed_value_get $speed2) + + ethtool_set $h1 advertise $speed1 + ethtool_set $h2 advertise $speed2 + + if (($RET)); then + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_fail $? "ping with different speeds autoneg on" + fi + + log_test "advertise different speeds autoneg on" + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +declare -gA speed_values +eval "speed_values=($(speeds_arr_get))" + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh new file mode 100755 index 000000000000..a7584448416e --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh @@ -0,0 +1,116 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + autoneg + autoneg_force_mode + no_cable +" + +NUM_NETIFS=2 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh +source ethtool_lib.sh + +TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + swp3=$NETIF_NO_CABLE +} + +ethtool_ext_state() +{ + local dev=$1; shift + local expected_ext_state=$1; shift + local expected_ext_substate=${1:-""}; shift + + local ext_state=$(ethtool $dev | grep "Link detected" \ + | cut -d "(" -f2 | cut -d ")" -f1) + local ext_substate=$(echo $ext_state | cut -sd "," -f2 \ + | sed -e 's/^[[:space:]]*//') + ext_state=$(echo $ext_state | cut -d "," -f1) + + if [[ $ext_state != $expected_ext_state ]]; then + echo "Expected \"$expected_ext_state\", got \"$ext_state\"" + return 1 + fi + if [[ $ext_substate != $expected_ext_substate ]]; then + echo "Expected \"$expected_ext_substate\", got \"$ext_substate\"" + return 1 + fi +} + +autoneg() +{ + local msg + + RET=0 + + ip link set dev $swp1 up + + msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ + "Autoneg" "No partner detected") + check_err $? "$msg" + + log_test "Autoneg, No partner detected" + + ip link set dev $swp1 down +} + +autoneg_force_mode() +{ + local msg + + RET=0 + + ip link set dev $swp1 up + ip link set dev $swp2 up + + local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0)) + local speed1=${speeds_arr[0]} + local speed2=${speeds_arr[1]} + + ethtool_set $swp1 speed $speed1 autoneg off + ethtool_set $swp2 speed $speed2 autoneg off + + msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ + "Autoneg" "No partner detected during force mode") + check_err $? "$msg" + + msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \ + "Autoneg" "No partner detected during force mode") + check_err $? "$msg" + + log_test "Autoneg, No partner detected during force mode" + + ethtool -s $swp2 autoneg on + ethtool -s $swp1 autoneg on + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +no_cable() +{ + local msg + + RET=0 + + ip link set dev $swp3 up + + msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable") + check_err $? "$msg" + + log_test "No cable" + + ip link set dev $swp3 down +} + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh new file mode 100644 index 000000000000..b9bfb45085af --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +speeds_arr_get() +{ + cmd='/ETHTOOL_LINK_MODE_[^[:space:]]*_BIT[[:space:]]+=[[:space:]]+/ \ + {sub(/,$/, "") \ + sub(/ETHTOOL_LINK_MODE_/,"") \ + sub(/_BIT/,"") \ + sub(/_Full/,"/Full") \ + sub(/_Half/,"/Half");\ + print "["$1"]="$3}' + + awk "${cmd}" /usr/include/linux/ethtool.h +} + +ethtool_set() +{ + local cmd="$@" + local out=$(ethtool -s $cmd 2>&1 | wc -l) + + check_err $out "error in configuration. $cmd" +} + +dev_linkmodes_params_get() +{ + local dev=$1; shift + local adver=$1; shift + local -a linkmodes_params + local param_count + local arr + + if (($adver)); then + mode="Advertised link modes" + else + mode="Supported link modes" + fi + + local -a dev_linkmodes=($(dev_speeds_get $dev 1 $adver)) + for ((i=0; i<${#dev_linkmodes[@]}; i++)); do + linkmodes_params[$i]=$(echo -e "${dev_linkmodes[$i]}" | \ + # Replaces all non numbers with spaces + sed -e 's/[^0-9]/ /g' | \ + # Squeeze spaces in sequence to 1 space + tr -s ' ') + # Count how many numbers were found in the linkmode + param_count=$(echo "${linkmodes_params[$i]}" | wc -w) + if [[ $param_count -eq 1 ]]; then + linkmodes_params[$i]="${linkmodes_params[$i]} 1" + elif [[ $param_count -ge 3 ]]; then + arr=(${linkmodes_params[$i]}) + # Take only first two params + linkmodes_params[$i]=$(echo "${arr[@]:0:2}") + fi + done + echo ${linkmodes_params[@]} +} + +dev_speeds_get() +{ + local dev=$1; shift + local with_mode=$1; shift + local adver=$1; shift + local speeds_str + + if (($adver)); then + mode="Advertised link modes" + else + mode="Supported link modes" + fi + + speeds_str=$(ethtool "$dev" | \ + # Snip everything before the link modes section. + sed -n '/'"$mode"':/,$p' | \ + # Quit processing the rest at the start of the next section. + # When checking, skip the header of this section (hence the 2,). + sed -n '2,${/^[\t][^ \t]/q};p' | \ + # Drop the section header of the current section. + cut -d':' -f2) + + local -a speeds_arr=($speeds_str) + if [[ $with_mode -eq 0 ]]; then + for ((i=0; i<${#speeds_arr[@]}; i++)); do + speeds_arr[$i]=${speeds_arr[$i]%base*} + done + fi + echo ${speeds_arr[@]} +} + +common_speeds_get() +{ + dev1=$1; shift + dev2=$1; shift + with_mode=$1; shift + adver=$1; shift + + local -a dev1_speeds=($(dev_speeds_get $dev1 $with_mode $adver)) + local -a dev2_speeds=($(dev_speeds_get $dev2 $with_mode $adver)) + + comm -12 \ + <(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \ + <(printf '%s\n' "${dev2_speeds[@]}" | sort -u) +} + +different_speeds_get() +{ + local dev1=$1; shift + local dev2=$1; shift + local with_mode=$1; shift + local adver=$1; shift + + local -a speeds_arr + + speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver)) + if [[ ${#speeds_arr[@]} < 2 ]]; then + check_err 1 "cannot check different speeds. There are not enough speeds" + fi + + echo ${speeds_arr[0]} ${speeds_arr[1]} +} diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh new file mode 100755 index 000000000000..c301e735c8ab --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh @@ -0,0 +1,341 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + manual_with_verification_h1_to_h2 + manual_with_verification_h2_to_h1 + manual_without_verification_h1_to_h2 + manual_without_verification_h2_to_h1 + manual_failed_verification_h1_to_h2 + manual_failed_verification_h2_to_h1 + lldp +" + +NUM_NETIFS=2 +REQUIRE_MZ=no +PREEMPTIBLE_PRIO=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +traffic_test() +{ + local if=$1; shift + local src=$1; shift + local num_pkts=10000 + local before= + local after= + local delta= + + if [ ${has_pmac_stats[$if]} = false ]; then + src="aggregate" + fi + + before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src) + + $MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO + + after=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src) + + delta=$((after - before)) + + # Allow an extra 1% tolerance for random packets sent by the stack + [ $delta -ge $num_pkts ] && [ $delta -le $((num_pkts + 100)) ] +} + +manual_with_verification() +{ + local tx=$1; shift + local rx=$1; shift + + RET=0 + + # It isn't completely clear from IEEE 802.3-2018 Figure 99-5: Transmit + # Processing state diagram whether the "send_r" variable (send response + # to verification frame) should be taken into consideration while the + # MAC Merge TX direction is disabled. That being said, at least the + # NXP ENETC does not, and requires tx-enabled on in order to respond to + # the link partner's verification frames. + ethtool --set-mm $rx tx-enabled on + ethtool --set-mm $tx verify-enabled on tx-enabled on + + # Wait for verification to finish + sleep 1 + + ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ + grep -q 'SUCCEEDED' + check_err "$?" "Verification did not succeed" + + ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' + check_err "$?" "pMAC TX is not active" + + traffic_test $tx "pmac" + check_err "$?" "Traffic did not get sent through $tx's pMAC" + + ethtool --set-mm $tx verify-enabled off tx-enabled off + ethtool --set-mm $rx tx-enabled off + + log_test "Manual configuration with verification: $tx to $rx" +} + +manual_with_verification_h1_to_h2() +{ + manual_with_verification $h1 $h2 +} + +manual_with_verification_h2_to_h1() +{ + manual_with_verification $h2 $h1 +} + +manual_without_verification() +{ + local tx=$1; shift + local rx=$1; shift + + RET=0 + + ethtool --set-mm $tx verify-enabled off tx-enabled on + + ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ + grep -q 'DISABLED' + check_err "$?" "Verification is not disabled" + + ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' + check_err "$?" "pMAC TX is not active" + + traffic_test $tx "pmac" + check_err "$?" "Traffic did not get sent through $tx's pMAC" + + ethtool --set-mm $tx verify-enabled off tx-enabled off + + log_test "Manual configuration without verification: $tx to $rx" +} + +manual_without_verification_h1_to_h2() +{ + manual_without_verification $h1 $h2 +} + +manual_without_verification_h2_to_h1() +{ + manual_without_verification $h2 $h1 +} + +manual_failed_verification() +{ + local tx=$1; shift + local rx=$1; shift + + RET=0 + + ethtool --set-mm $rx pmac-enabled off + ethtool --set-mm $tx verify-enabled on tx-enabled on + + # Wait for verification to time out + sleep 1 + + ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ + grep -q 'SUCCEEDED' + check_fail "$?" "Verification succeeded when it shouldn't have" + + ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' + check_fail "$?" "pMAC TX is active when it shouldn't have" + + traffic_test $tx "emac" + check_err "$?" "Traffic did not get sent through $tx's eMAC" + + ethtool --set-mm $tx verify-enabled off tx-enabled off + ethtool --set-mm $rx pmac-enabled on + + log_test "Manual configuration with failed verification: $tx to $rx" +} + +manual_failed_verification_h1_to_h2() +{ + manual_failed_verification $h1 $h2 +} + +manual_failed_verification_h2_to_h1() +{ + manual_failed_verification $h2 $h1 +} + +smallest_supported_add_frag_size() +{ + local iface=$1 + local rx_min_frag_size= + + rx_min_frag_size=$(ethtool --json --show-mm $iface | \ + jq '.[]."rx-min-frag-size"') + + if [ $rx_min_frag_size -le 60 ]; then + echo 0 + elif [ $rx_min_frag_size -le 124 ]; then + echo 1 + elif [ $rx_min_frag_size -le 188 ]; then + echo 2 + elif [ $rx_min_frag_size -le 252 ]; then + echo 3 + else + echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP" + exit 1 + fi +} + +expected_add_frag_size() +{ + local iface=$1 + local requested=$2 + local min=$(smallest_supported_add_frag_size $iface) + + [ $requested -le $min ] && echo $min || echo $requested +} + +lldp_change_add_frag_size() +{ + local add_frag_size=$1 + local pattern= + + lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null + # Wait for TLVs to be received + sleep 2 + pattern=$(printf "Additional fragment size: %d" \ + $(expected_add_frag_size $h1 $add_frag_size)) + lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern" +} + +lldp() +{ + RET=0 + + systemctl start lldpad + + # Configure the interfaces to receive and transmit LLDPDUs + lldptool -L -i $h1 adminStatus=rxtx >/dev/null + lldptool -L -i $h2 adminStatus=rxtx >/dev/null + + # Enable the transmission of Additional Ethernet Capabilities TLV + lldptool -T -i $h1 -V addEthCaps enableTx=yes >/dev/null + lldptool -T -i $h2 -V addEthCaps enableTx=yes >/dev/null + + # Wait for TLVs to be received + sleep 2 + + lldptool -i $h1 -t -n -V addEthCaps | \ + grep -q "Preemption capability active" + check_err "$?" "$h1 pMAC TX is not active" + + lldptool -i $h2 -t -n -V addEthCaps | \ + grep -q "Preemption capability active" + check_err "$?" "$h2 pMAC TX is not active" + + lldp_change_add_frag_size 3 + check_err "$?" "addFragSize 3" + + lldp_change_add_frag_size 2 + check_err "$?" "addFragSize 2" + + lldp_change_add_frag_size 1 + check_err "$?" "addFragSize 1" + + lldp_change_add_frag_size 0 + check_err "$?" "addFragSize 0" + + traffic_test $h1 "pmac" + check_err "$?" "Traffic did not get sent through $h1's pMAC" + + traffic_test $h2 "pmac" + check_err "$?" "Traffic did not get sent through $h2's pMAC" + + systemctl stop lldpad + + log_test "LLDP" +} + +h1_create() +{ + ip link set dev $h1 up + + tc qdisc add dev $h1 root mqprio num_tc 4 map 0 1 2 3 \ + queues 1@0 1@1 1@2 1@3 \ + fp P E E E \ + hw 1 + + ethtool --set-mm $h1 pmac-enabled on tx-enabled off verify-enabled off +} + +h2_create() +{ + ip link set dev $h2 up + + ethtool --set-mm $h2 pmac-enabled on tx-enabled off verify-enabled off + + tc qdisc add dev $h2 root mqprio num_tc 4 map 0 1 2 3 \ + queues 1@0 1@1 1@2 1@3 \ + fp P E E E \ + hw 1 +} + +h1_destroy() +{ + ethtool --set-mm $h1 pmac-enabled off tx-enabled off verify-enabled off + + tc qdisc del dev $h1 root + + ip link set dev $h1 down +} + +h2_destroy() +{ + tc qdisc del dev $h2 root + + ethtool --set-mm $h2 pmac-enabled off tx-enabled off verify-enabled off + + ip link set dev $h2 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy +} + +check_ethtool_mm_support +check_tc_fp_support +require_command lldptool +bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually" + +for netif in ${NETIFS[@]}; do + ethtool --show-mm $netif 2>&1 &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: $netif does not support MAC Merge" + exit $ksft_skip + fi + + if check_ethtool_pmac_std_stats_support $netif eth-mac; then + has_pmac_stats[$netif]=true + else + has_pmac_stats[$netif]=false + echo "$netif does not report pMAC statistics, falling back to aggregate" + fi +done + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh new file mode 100755 index 000000000000..8f60c1685ad4 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + rmon_rx_histogram + rmon_tx_histogram +" + +NUM_NETIFS=2 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +ETH_FCS_LEN=4 +ETH_HLEN=$((6+6+2)) + +declare -A netif_mtu + +ensure_mtu() +{ + local iface=$1; shift + local len=$1; shift + local current=$(ip -j link show dev $iface | jq -r '.[0].mtu') + local required=$((len - ETH_HLEN - ETH_FCS_LEN)) + + if [ $current -lt $required ]; then + ip link set dev $iface mtu $required || return 1 + fi +} + +bucket_test() +{ + local iface=$1; shift + local neigh=$1; shift + local set=$1; shift + local bucket=$1; shift + local len=$1; shift + local num_rx=10000 + local num_tx=20000 + local expected= + local before= + local after= + local delta= + + # Mausezahn does not include FCS bytes in its length - but the + # histogram counters do + len=$((len - ETH_FCS_LEN)) + len=$((len > 0 ? len : 0)) + + before=$(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") + + # Send 10k one way and 20k in the other, to detect counters + # mapped to the wrong direction + $MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us + $MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us + + after=$(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") + + delta=$((after - before)) + + expected=$([ $set = rx ] && echo $num_rx || echo $num_tx) + + # Allow some extra tolerance for other packets sent by the stack + [ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ] +} + +rmon_histogram() +{ + local iface=$1; shift + local neigh=$1; shift + local set=$1; shift + local nbuckets=0 + local step= + + RET=0 + + while read -r -a bucket; do + step="$set-pkts${bucket[0]}to${bucket[1]} on $iface" + + for if in $iface $neigh; do + if ! ensure_mtu $if ${bucket[0]}; then + log_test_xfail "$if does not support the required MTU for $step" + return + fi + done + + if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then + check_err 1 "$step failed" + return 1 + fi + log_test "$step" + nbuckets=$((nbuckets + 1)) + done < <(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null) + + if [ $nbuckets -eq 0 ]; then + log_test_xfail "$iface does not support $set histogram counters" + return + fi +} + +rmon_rx_histogram() +{ + rmon_histogram $h1 $h2 rx + rmon_histogram $h2 $h1 rx +} + +rmon_tx_histogram() +{ + rmon_histogram $h1 $h2 tx + rmon_histogram $h2 $h1 tx +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + for iface in $h1 $h2; do + netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu') + ip link set dev $iface up + done +} + +cleanup() +{ + pre_cleanup + + for iface in $h2 $h1; do + ip link set dev $iface \ + mtu ${netif_mtu[$iface]} \ + down + done +} + +check_ethtool_counter_group_support +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh new file mode 100755 index 000000000000..67fafefc80be --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh @@ -0,0 +1,334 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +----------------------+ +# | H1 | | H2 | +# | | | | +# | $h1.200 + | | + $h2.200 | +# | 192.0.2.1/28 | | | | 192.0.2.18/28 | +# | 2001:db8:1::1/64 | | | | 2001:db8:2::1/64 | +# | | | | | | +# | $h1 + | | + $h2 | +# | | | | | | +# +------------------|-+ +-|--------------------+ +# | | +# +------------------|-------------------------|--------------------+ +# | SW | | | +# | | | | +# | $rp1 + + $rp2 | +# | | | | +# | $rp1.200 + + $rp2.200 | +# | 192.0.2.2/28 192.0.2.17/28 | +# | 2001:db8:1::2/64 2001:db8:2::2/64 | +# | | +# +-----------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + test_stats_rx_ipv4 + test_stats_tx_ipv4 + test_stats_rx_ipv6 + test_stats_tx_ipv6 + respin_enablement + test_stats_rx_ipv4 + test_stats_tx_ipv4 + test_stats_rx_ipv6 + test_stats_tx_ipv6 + reapply_config + ping_ipv4 + ping_ipv6 + test_stats_rx_ipv4 + test_stats_tx_ipv4 + test_stats_rx_ipv6 + test_stats_tx_ipv6 + test_stats_report_rx + test_stats_report_tx + test_destroy_enabled + test_double_enable +" +NUM_NETIFS=4 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh +source "$lib_dir"/../../../net/forwarding/tc_common.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64 + ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 + ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + vlan_destroy $h1 200 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64 + ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2 + ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 + vlan_destroy $h2 200 + simple_if_fini $h2 +} + +router_rp1_200_create() +{ + ip link add name $rp1.200 link $rp1 type vlan id 200 + ip link set dev $rp1.200 addrgenmode eui64 + ip link set dev $rp1.200 up + ip address add dev $rp1.200 192.0.2.2/28 + ip address add dev $rp1.200 2001:db8:1::2/64 + ip stats set dev $rp1.200 l3_stats on +} + +router_rp1_200_destroy() +{ + ip stats set dev $rp1.200 l3_stats off + ip address del dev $rp1.200 2001:db8:1::2/64 + ip address del dev $rp1.200 192.0.2.2/28 + ip link del dev $rp1.200 +} + +router_create() +{ + ip link set dev $rp1 up + router_rp1_200_create + + ip link set dev $rp2 up + vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64 +} + +router_destroy() +{ + vlan_destroy $rp2 200 + ip link set dev $rp2 down + + router_rp1_200_destroy + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1mac=$(mac_get $rp1) + rp2mac=$(mac_get $rp2) + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.200 192.0.2.18 " IPv4" +} + +ping_ipv6() +{ + ping_test $h1.200 2001:db8:2::1 " IPv6" +} + +send_packets_rx_ipv4() +{ + # Send 21 packets instead of 20, because the first one might trap and go + # through the SW datapath, which might not bump the HW counter. + $MZ $h1.200 -c 21 -d 20msec -p 100 \ + -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \ + -q -t udp sp=54321,dp=12345 +} + +send_packets_rx_ipv6() +{ + $MZ $h1.200 -6 -c 21 -d 20msec -p 100 \ + -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \ + -q -t udp sp=54321,dp=12345 +} + +send_packets_tx_ipv4() +{ + $MZ $h2.200 -c 21 -d 20msec -p 100 \ + -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \ + -q -t udp sp=54321,dp=12345 +} + +send_packets_tx_ipv6() +{ + $MZ $h2.200 -6 -c 21 -d 20msec -p 100 \ + -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \ + -q -t udp sp=54321,dp=12345 +} + +___test_stats() +{ + local dir=$1; shift + local prot=$1; shift + + local a + local b + + a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets) + send_packets_${dir}_${prot} + "$@" + b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ + hw_stats_get l3_stats $rp1.200 ${dir} packets) + check_err $? "Traffic not reflected in the counter: $a -> $b" +} + +__test_stats() +{ + local dir=$1; shift + local prot=$1; shift + + RET=0 + ___test_stats "$dir" "$prot" + log_test "Test $dir packets: $prot" +} + +test_stats_rx_ipv4() +{ + __test_stats rx ipv4 +} + +test_stats_tx_ipv4() +{ + __test_stats tx ipv4 +} + +test_stats_rx_ipv6() +{ + __test_stats rx ipv6 +} + +test_stats_tx_ipv6() +{ + __test_stats tx ipv6 +} + +# Make sure everything works well even after stats have been disabled and +# reenabled on the same device without touching the L3 configuration. +respin_enablement() +{ + log_info "Turning stats off and on again" + ip stats set dev $rp1.200 l3_stats off + ip stats set dev $rp1.200 l3_stats on +} + +# For the initial run, l3_stats is enabled on a completely set up netdevice. Now +# do it the other way around: enabling the L3 stats on an L2 netdevice, and only +# then apply the L3 configuration. +reapply_config() +{ + log_info "Reapplying configuration" + + router_rp1_200_destroy + + ip link add name $rp1.200 link $rp1 type vlan id 200 + ip link set dev $rp1.200 addrgenmode none + ip stats set dev $rp1.200 l3_stats on + ip link set dev $rp1.200 addrgenmode eui64 + ip link set dev $rp1.200 up + ip address add dev $rp1.200 192.0.2.2/28 + ip address add dev $rp1.200 2001:db8:1::2/64 +} + +__test_stats_report() +{ + local dir=$1; shift + local prot=$1; shift + + local a + local b + + RET=0 + + a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets) + send_packets_${dir}_${prot} + ip address flush dev $rp1.200 + b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ + hw_stats_get l3_stats $rp1.200 ${dir} packets) + check_err $? "Traffic not reflected in the counter: $a -> $b" + log_test "Test ${dir} packets: stats pushed on loss of L3" + + ip stats set dev $rp1.200 l3_stats off + ip link del dev $rp1.200 + router_rp1_200_create +} + +test_stats_report_rx() +{ + __test_stats_report rx ipv4 +} + +test_stats_report_tx() +{ + __test_stats_report tx ipv4 +} + +test_destroy_enabled() +{ + RET=0 + + ip link del dev $rp1.200 + router_rp1_200_create + + log_test "Destroy l3_stats-enabled netdev" +} + +test_double_enable() +{ + RET=0 + ___test_stats rx ipv4 \ + ip stats set dev $rp1.200 l3_stats on + log_test "Test stat retention across a spurious enablement" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info | + jq '.[].info.l3_stats.used') +[[ $used = true ]] +check_err $? "hw_stats_info.used=$used" +log_test "l3_stats offloaded" +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh new file mode 100755 index 000000000000..a94d92e1abce --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test L3 stats on IP-in-IP GRE tunnel without key. + +# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more +# details. + +ALL_TESTS=" + ping_ipv4 + test_stats_rx + test_stats_tx +" +NUM_NETIFS=6 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh +source "$lib_dir"/../../../net/forwarding/ipip_lib.sh +source "$lib_dir"/../../../net/forwarding/tc_common.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + ol1mac=$(mac_get $ol1) + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create gre $ol1 $ul1 + sw2_flat_create gre $ol2 $ul2 + ip stats set dev g1a l3_stats on + ip stats set dev g2a l3_stats on +} + +cleanup() +{ + pre_cleanup + + ip stats set dev g1a l3_stats off + ip stats set dev g2a l3_stats off + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + + vrf_cleanup + forwarding_restore +} + +ping_ipv4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre flat" +} + +send_packets_ipv4() +{ + # Send 21 packets instead of 20, because the first one might trap and go + # through the SW datapath, which might not bump the HW counter. + $MZ $h1 -c 21 -d 20msec -p 100 \ + -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \ + -q -t udp sp=54321,dp=12345 +} + +test_stats() +{ + local dev=$1; shift + local dir=$1; shift + + local a + local b + + RET=0 + + a=$(hw_stats_get l3_stats $dev $dir packets) + send_packets_ipv4 + b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ + hw_stats_get l3_stats $dev $dir packets) + check_err $? "Traffic not reflected in the counter: $a -> $b" + + log_test "Test $dir packets: $prot" +} + +test_stats_tx() +{ + test_stats g1a tx +} + +test_stats_rx() +{ + test_stats g2a rx +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c new file mode 100644 index 000000000000..62456df947bc --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c @@ -0,0 +1,464 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <linux/errqueue.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> + +#include <liburing.h> + +static long page_size; +#define AREA_SIZE (8192 * page_size) +#define SEND_SIZE (512 * 4096) +#define min(a, b) \ + ({ \ + typeof(a) _a = (a); \ + typeof(b) _b = (b); \ + _a < _b ? _a : _b; \ + }) +#define min_t(t, a, b) \ + ({ \ + t _ta = (a); \ + t _tb = (b); \ + min(_ta, _tb); \ + }) + +#define ALIGN_UP(v, align) (((v) + (align) - 1) & ~((align) - 1)) + +static int cfg_server; +static int cfg_client; +static int cfg_port = 8000; +static int cfg_payload_len; +static const char *cfg_ifname; +static int cfg_queue_id = -1; +static bool cfg_oneshot; +static int cfg_oneshot_recvs; +static int cfg_send_size = SEND_SIZE; +static struct sockaddr_in6 cfg_addr; + +static char *payload; +static void *area_ptr; +static void *ring_ptr; +static size_t ring_size; +static struct io_uring_zcrx_rq rq_ring; +static unsigned long area_token; +static int connfd; +static bool stop; +static size_t received; + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static inline size_t get_refill_ring_size(unsigned int rq_entries) +{ + size_t size; + + ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe); + /* add space for the header (head/tail/etc.) */ + ring_size += page_size; + return ALIGN_UP(ring_size, page_size); +} + +static void setup_zcrx(struct io_uring *ring) +{ + unsigned int ifindex; + unsigned int rq_entries = 4096; + int ret; + + ifindex = if_nametoindex(cfg_ifname); + if (!ifindex) + error(1, 0, "bad interface name: %s", cfg_ifname); + + area_ptr = mmap(NULL, + AREA_SIZE, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, + 0); + if (area_ptr == MAP_FAILED) + error(1, 0, "mmap(): zero copy area"); + + ring_size = get_refill_ring_size(rq_entries); + ring_ptr = mmap(NULL, + ring_size, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, + 0); + + struct io_uring_region_desc region_reg = { + .size = ring_size, + .user_addr = (__u64)(unsigned long)ring_ptr, + .flags = IORING_MEM_REGION_TYPE_USER, + }; + + struct io_uring_zcrx_area_reg area_reg = { + .addr = (__u64)(unsigned long)area_ptr, + .len = AREA_SIZE, + .flags = 0, + }; + + struct io_uring_zcrx_ifq_reg reg = { + .if_idx = ifindex, + .if_rxq = cfg_queue_id, + .rq_entries = rq_entries, + .area_ptr = (__u64)(unsigned long)&area_reg, + .region_ptr = (__u64)(unsigned long)®ion_reg, + }; + + ret = io_uring_register_ifq(ring, ®); + if (ret) + error(1, 0, "io_uring_register_ifq(): %d", ret); + + rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head); + rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail); + rq_ring.rqes = (struct io_uring_zcrx_rqe *)((char *)ring_ptr + reg.offsets.rqes); + rq_ring.rq_tail = 0; + rq_ring.ring_entries = reg.rq_entries; + + area_token = area_reg.rq_area_token; +} + +static void add_accept(struct io_uring *ring, int sockfd) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_accept(sqe, sockfd, NULL, NULL, 0); + sqe->user_data = 1; +} + +static void add_recvzc(struct io_uring *ring, int sockfd) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, 0, 0); + sqe->ioprio |= IORING_RECV_MULTISHOT; + sqe->user_data = 2; +} + +static void add_recvzc_oneshot(struct io_uring *ring, int sockfd, size_t len) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, len, 0); + sqe->ioprio |= IORING_RECV_MULTISHOT; + sqe->user_data = 2; +} + +static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe) +{ + if (cqe->res < 0) + error(1, 0, "accept()"); + if (connfd) + error(1, 0, "Unexpected second connection"); + + connfd = cqe->res; + if (cfg_oneshot) + add_recvzc_oneshot(ring, connfd, page_size); + else + add_recvzc(ring, connfd); +} + +static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe) +{ + unsigned rq_mask = rq_ring.ring_entries - 1; + struct io_uring_zcrx_cqe *rcqe; + struct io_uring_zcrx_rqe *rqe; + struct io_uring_sqe *sqe; + uint64_t mask; + char *data; + ssize_t n; + int i; + + if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs == 0) { + stop = true; + return; + } + + if (cqe->res < 0) + error(1, 0, "recvzc(): %d", cqe->res); + + if (cfg_oneshot) { + if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) { + add_recvzc_oneshot(ring, connfd, page_size); + cfg_oneshot_recvs--; + } + } else if (!(cqe->flags & IORING_CQE_F_MORE)) { + add_recvzc(ring, connfd); + } + + rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1); + + n = cqe->res; + mask = (1ULL << IORING_ZCRX_AREA_SHIFT) - 1; + data = (char *)area_ptr + (rcqe->off & mask); + + for (i = 0; i < n; i++) { + if (*(data + i) != payload[(received + i)]) + error(1, 0, "payload mismatch at %d", i); + } + received += n; + + rqe = &rq_ring.rqes[(rq_ring.rq_tail & rq_mask)]; + rqe->off = (rcqe->off & ~IORING_ZCRX_AREA_MASK) | area_token; + rqe->len = cqe->res; + io_uring_smp_store_release(rq_ring.ktail, ++rq_ring.rq_tail); +} + +static void server_loop(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + unsigned int count = 0; + unsigned int head; + int i, ret; + + io_uring_submit_and_wait(ring, 1); + + io_uring_for_each_cqe(ring, head, cqe) { + if (cqe->user_data == 1) + process_accept(ring, cqe); + else if (cqe->user_data == 2) + process_recvzc(ring, cqe); + else + error(1, 0, "unknown cqe"); + count++; + } + io_uring_cq_advance(ring, count); +} + +static void run_server(void) +{ + unsigned int flags = 0; + struct io_uring ring; + int fd, enable, ret; + uint64_t tstop; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, 0, "socket()"); + + enable = 1; + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)); + if (ret < 0) + error(1, 0, "setsockopt(SO_REUSEADDR)"); + + ret = bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + if (ret < 0) + error(1, 0, "bind()"); + + if (listen(fd, 1024) < 0) + error(1, 0, "listen()"); + + flags |= IORING_SETUP_COOP_TASKRUN; + flags |= IORING_SETUP_SINGLE_ISSUER; + flags |= IORING_SETUP_DEFER_TASKRUN; + flags |= IORING_SETUP_SUBMIT_ALL; + flags |= IORING_SETUP_CQE32; + + io_uring_queue_init(512, &ring, flags); + + setup_zcrx(&ring); + + add_accept(&ring, fd); + + tstop = gettimeofday_ms() + 5000; + while (!stop && gettimeofday_ms() < tstop) + server_loop(&ring); + + if (!stop) + error(1, 0, "test failed\n"); +} + +static void run_client(void) +{ + ssize_t to_send = cfg_send_size; + ssize_t sent = 0; + ssize_t chunk, res; + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, 0, "socket()"); + + if (connect(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr))) + error(1, 0, "connect()"); + + while (to_send) { + void *src = &payload[sent]; + + chunk = min_t(ssize_t, cfg_payload_len, to_send); + res = send(fd, src, chunk, 0); + if (res < 0) + error(1, 0, "send(): %zd", sent); + sent += res; + to_send -= res; + } + + close(fd); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-4|-6) (-s|-c) -h<server_ip> -p<port> " + "-l<payload_size> -i<ifname> -q<rxq_id>", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + const int max_payload_len = SEND_SIZE - + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) - + 40 /* max tcp options */; + struct sockaddr_in6 *addr6 = (void *) &cfg_addr; + char *addr = NULL; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + cfg_payload_len = max_payload_len; + + while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) { + switch (c) { + case 's': + if (cfg_client) + error(1, 0, "Pass one of -s or -c"); + cfg_server = 1; + break; + case 'c': + if (cfg_server) + error(1, 0, "Pass one of -s or -c"); + cfg_client = 1; + break; + case 'h': + addr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 'i': + cfg_ifname = optarg; + break; + case 'q': + cfg_queue_id = strtoul(optarg, NULL, 0); + break; + case 'o': { + cfg_oneshot = true; + cfg_oneshot_recvs = strtoul(optarg, NULL, 0); + break; + } + case 'z': + cfg_send_size = strtoul(optarg, NULL, 0); + break; + } + } + + if (cfg_server && addr) + error(1, 0, "Receiver cannot have -h specified"); + + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + addr6->sin6_addr = in6addr_any; + if (addr) { + ret = parse_address(addr, cfg_port, addr6); + if (ret) + error(1, 0, "receiver address parse error: %s", addr); + } + + if (cfg_payload_len > max_payload_len) + error(1, 0, "-l: payload exceeds max (%d)", max_payload_len); +} + +int main(int argc, char **argv) +{ + const char *cfg_test = argv[argc - 1]; + int i; + + page_size = sysconf(_SC_PAGESIZE); + if (page_size < 0) + return 1; + + if (posix_memalign((void **)&payload, page_size, SEND_SIZE)) + return 1; + + parse_opts(argc, argv); + + for (i = 0; i < SEND_SIZE; i++) + payload[i] = 'a' + (i % 26); + + if (cfg_server) + run_server(); + else if (cfg_client) + run_client(); + + return 0; +} diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py new file mode 100755 index 000000000000..712c806508b5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import re +from os import path +from lib.py import ksft_run, ksft_exit, KsftSkipEx +from lib.py import NetDrvEpEnv +from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen + + +def _get_current_settings(cfg): + output = ethtool(f"-g {cfg.ifname}", json=True)[0] + return (output['rx'], output['hds-thresh']) + + +def _get_combined_channels(cfg): + output = ethtool(f"-l {cfg.ifname}").stdout + values = re.findall(r'Combined:\s+(\d+)', output) + return int(values[1]) + + +def _create_rss_ctx(cfg, chan): + output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout + values = re.search(r'New RSS context is (\d+)', output).group(1) + ctx_id = int(values) + return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}")) + + +def _set_flow_rule(cfg, port, chan): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout + values = re.search(r'ID (\d+)', output).group(1) + return int(values) + + +def _set_flow_rule_rss(cfg, port, ctx_id): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout + values = re.search(r'ID (\d+)', output).group(1) + return int(values) + + +def test_zcrx(cfg) -> None: + cfg.require_ipver('6') + + combined_chans = _get_combined_channels(cfg) + if combined_chans < 2: + raise KsftSkipEx('at least 2 combined channels required') + (rx_ring, hds_thresh) = _get_current_settings(cfg) + port = rand_port() + + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") + + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") + + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") + + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") + + flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) + + +def test_zcrx_oneshot(cfg) -> None: + cfg.require_ipver('6') + + combined_chans = _get_combined_channels(cfg) + if combined_chans < 2: + raise KsftSkipEx('at least 2 combined channels required') + (rx_ring, hds_thresh) = _get_current_settings(cfg) + port = rand_port() + + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") + + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") + + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") + + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") + + flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) + + +def test_zcrx_rss(cfg) -> None: + cfg.require_ipver('6') + + combined_chans = _get_combined_channels(cfg) + if combined_chans < 2: + raise KsftSkipEx('at least 2 combined channels required') + (rx_ring, hds_thresh) = _get_current_settings(cfg) + port = rand_port() + + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") + + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") + + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") + + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") + + (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1) + flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) + + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx") + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py new file mode 100755 index 000000000000..0699d6a8b4e2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/irq.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_ge, ksft_eq +from lib.py import KsftSkipEx +from lib.py import ksft_disruptive +from lib.py import EthtoolFamily, NetdevFamily +from lib.py import NetDrvEnv +from lib.py import cmd, ip, defer + + +def read_affinity(irq) -> str: + with open(f'/proc/irq/{irq}/smp_affinity', 'r') as fp: + return fp.read().lstrip("0,").strip() + + +def write_affinity(irq, what) -> str: + if what != read_affinity(irq): + with open(f'/proc/irq/{irq}/smp_affinity', 'w') as fp: + fp.write(what) + + +def check_irqs_reported(cfg) -> None: + """ Check that device reports IRQs for NAPI instances """ + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + irqs = sum(['irq' in x for x in napis]) + + ksft_ge(irqs, 1) + ksft_eq(irqs, len(napis)) + + +def _check_reconfig(cfg, reconfig_cb) -> None: + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + for n in reversed(napis): + if 'irq' in n: + break + else: + raise KsftSkipEx(f"Device has no NAPI with IRQ attribute (#napis: {len(napis)}") + + old = read_affinity(n['irq']) + # pick an affinity that's not the current one + new = "3" if old != "3" else "5" + write_affinity(n['irq'], new) + defer(write_affinity, n['irq'], old) + + reconfig_cb(cfg) + + ksft_eq(read_affinity(n['irq']), new, comment="IRQ affinity changed after reconfig") + + +def check_reconfig_queues(cfg) -> None: + def reconfig(cfg) -> None: + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + rx_type = 'rx' + else: + rx_type = 'combined' + cur_queue_cnt = channels[f'{rx_type}-count'] + max_queue_cnt = channels[f'{rx_type}-max'] + + cmd(f"ethtool -L {cfg.ifname} {rx_type} 1") + cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}") + cmd(f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}") + + _check_reconfig(cfg, reconfig) + + +def check_reconfig_xdp(cfg) -> None: + def reconfig(cfg) -> None: + ip(f"link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o")) + ip(f"link set dev %s xdp off" % cfg.ifname) + + _check_reconfig(cfg, reconfig) + + +@ksft_disruptive +def check_down(cfg) -> None: + def reconfig(cfg) -> None: + ip("link set dev %s down" % cfg.ifname) + ip("link set dev %s up" % cfg.ifname) + + _check_reconfig(cfg, reconfig) + + +def main() -> None: + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + + ksft_run([check_irqs_reported, check_reconfig_queues, + check_reconfig_xdp, check_down], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py new file mode 100644 index 000000000000..766bfc4ad842 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: GPL-2.0 + +""" +Driver test environment (hardware-only tests). +NetDrvEnv and NetDrvEpEnv are the main environment classes. +Former is for local host only tests, latter creates / connects +to a remote endpoint. See NIPA wiki for more information about +running and writing driver tests. +""" + +import sys +from pathlib import Path + +KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve() + +try: + sys.path.append(KSFT_DIR.as_posix()) + + # Import one by one to avoid pylint false positives + from net.lib.py import NetNS, NetNSEnter, NetdevSimDev + from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ + NlError, RtnlFamily, DevlinkFamily, PSPFamily + from net.lib.py import CmdExitFailure + from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ + fd_read_timeout, ip, rand_port, wait_port_listen, wait_file + from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx + from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ + ksft_setup, ksft_variants, KsftNamedVariant + from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ + ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none + from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner + from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv + + __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", + "EthtoolFamily", "NetdevFamily", "NetshaperFamily", + "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", + "CmdExitFailure", + "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", + "fd_read_timeout", "ip", "rand_port", + "wait_port_listen", "wait_file", + "KsftSkipEx", "KsftFailEx", "KsftXfailEx", + "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", + "ksft_setup", "ksft_variants", "KsftNamedVariant", + "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", + "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", + "ksft_not_none", "ksft_not_none", + "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote", + "Iperf3Runner"] +except ModuleNotFoundError as e: + print("Failed importing `net` library from kernel sources") + print(str(e)) + sys.exit(4) diff --git a/tools/testing/selftests/drivers/net/hw/loopback.sh b/tools/testing/selftests/drivers/net/hw/loopback.sh new file mode 100755 index 000000000000..5acc3ff820aa --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/loopback.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +ALL_TESTS="loopback_test" +NUM_NETIFS=2 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/tc_common.sh +source "$lib_dir"/../../../net/forwarding/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 + tc qdisc add dev $h1 clsact +} + +h1_destroy() +{ + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 +} + +h2_destroy() +{ + simple_if_fini $h2 +} + +loopback_test() +{ + RET=0 + + tc filter add dev $h1 ingress protocol arp pref 1 handle 101 flower \ + skip_hw arp_op reply arp_tip 192.0.2.1 action drop + + $MZ $h1 -c 1 -t arp -q + + tc_check_packets "dev $h1 ingress" 101 1 + check_fail $? "Matched on a filter without loopback setup" + + ethtool -K $h1 loopback on + check_err $? "Failed to enable loopback" + + setup_wait_dev $h1 + + $MZ $h1 -c 1 -t arp -q + + tc_check_packets "dev $h1 ingress" 101 1 + check_err $? "Did not match on filter with loopback" + + ethtool -K $h1 loopback off + check_err $? "Failed to disable loopback" + + $MZ $h1 -c 1 -t arp -q + + tc_check_packets "dev $h1 ingress" 101 2 + check_fail $? "Matched on a filter after loopback was removed" + + tc filter del dev $h1 ingress protocol arp pref 1 handle 101 flower + + log_test "loopback" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + vrf_prepare + + h1_create + h2_create + + if ethtool -k $h1 | grep loopback | grep -q fixed; then + log_test "SKIP: dev $h1 does not support loopback feature" + exit $ksft_skip + fi +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c new file mode 100644 index 000000000000..3288ed04ce08 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -0,0 +1,1524 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tcpdevmem netcat. Works similarly to netcat but does device memory TCP + * instead of regular TCP. Uses udmabuf to mock a dmabuf provider. + * + * Usage: + * + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 + * + * On client: + * echo -n "hello\nworld" | \ + * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1 + * + * Note this is compatible with regular netcat. i.e. the sender or receiver can + * be replaced with regular netcat to test the RX or TX path in isolation. + * + * Test data validation (devmem TCP on RX only): + * + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7 + * + * On client: + * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ + * head -c 1G | \ + * nc <server IP> 5201 -p 5201 + * + * Test data validation (devmem TCP on RX and TX, validation happens on RX): + * + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1 + * + * On client: + * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \ + * head -c 1M | \ + * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1 + */ +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + +#include <linux/uio.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#define __iovec_defined +#include <fcntl.h> +#include <malloc.h> +#include <error.h> +#include <poll.h> + +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <sys/time.h> + +#include <linux/memfd.h> +#include <linux/dma-buf.h> +#include <linux/errqueue.h> +#include <linux/udmabuf.h> +#include <linux/types.h> +#include <linux/netlink.h> +#include <linux/genetlink.h> +#include <linux/netdev.h> +#include <linux/ethtool_netlink.h> +#include <time.h> +#include <net/if.h> + +#include "netdev-user.h" +#include "ethtool-user.h" +#include <ynl.h> + +#define PAGE_SHIFT 12 +#define TEST_PREFIX "ncdevmem" +#define NUM_PAGES 16000 + +#ifndef MSG_SOCK_DEVMEM +#define MSG_SOCK_DEVMEM 0x2000000 +#endif + +#define MAX_IOV 1024 + +static size_t max_chunk; +static char *server_ip; +static char *client_ip; +static char *port; +static size_t do_validation; +static int start_queue = -1; +static int num_queues = -1; +static char *ifname; +static unsigned int ifindex; +static unsigned int dmabuf_id; +static uint32_t tx_dmabuf_id; +static int waittime_ms = 500; + +/* System state loaded by current_config_load() */ +#define MAX_FLOWS 8 +static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, }; + +struct memory_buffer { + int fd; + size_t size; + + int devfd; + int memfd; + char *buf_mem; +}; + +struct memory_provider { + struct memory_buffer *(*alloc)(size_t size); + void (*free)(struct memory_buffer *ctx); + void (*memcpy_to_device)(struct memory_buffer *dst, size_t off, + void *src, int n); + void (*memcpy_from_device)(void *dst, struct memory_buffer *src, + size_t off, int n); +}; + +static void pr_err(const char *fmt, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", TEST_PREFIX); + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + if (errno != 0) + fprintf(stderr, ": %s", strerror(errno)); + fprintf(stderr, "\n"); +} + +static struct memory_buffer *udmabuf_alloc(size_t size) +{ + struct udmabuf_create create; + struct memory_buffer *ctx; + int ret; + + ctx = malloc(sizeof(*ctx)); + if (!ctx) + return NULL; + + ctx->size = size; + + ctx->devfd = open("/dev/udmabuf", O_RDWR); + if (ctx->devfd < 0) { + pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]"); + goto err_free_ctx; + } + + ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); + if (ctx->memfd < 0) { + pr_err("[skip,no-memfd]"); + goto err_close_dev; + } + + ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) { + pr_err("[skip,fcntl-add-seals]"); + goto err_close_memfd; + } + + ret = ftruncate(ctx->memfd, size); + if (ret == -1) { + pr_err("[FAIL,memfd-truncate]"); + goto err_close_memfd; + } + + memset(&create, 0, sizeof(create)); + + create.memfd = ctx->memfd; + create.offset = 0; + create.size = size; + ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); + if (ctx->fd < 0) { + pr_err("[FAIL, create udmabuf]"); + goto err_close_fd; + } + + ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + ctx->fd, 0); + if (ctx->buf_mem == MAP_FAILED) { + pr_err("[FAIL, map udmabuf]"); + goto err_close_fd; + } + + return ctx; + +err_close_fd: + close(ctx->fd); +err_close_memfd: + close(ctx->memfd); +err_close_dev: + close(ctx->devfd); +err_free_ctx: + free(ctx); + return NULL; +} + +static void udmabuf_free(struct memory_buffer *ctx) +{ + munmap(ctx->buf_mem, ctx->size); + close(ctx->fd); + close(ctx->memfd); + close(ctx->devfd); + free(ctx); +} + +static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off, + void *src, int n) +{ + struct dma_buf_sync sync = {}; + + sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE; + ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); + + memcpy(dst->buf_mem + off, src, n); + + sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE; + ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); +} + +static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, + size_t off, int n) +{ + struct dma_buf_sync sync = {}; + + sync.flags = DMA_BUF_SYNC_START; + ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); + + memcpy(dst, src->buf_mem + off, n); + + sync.flags = DMA_BUF_SYNC_END; + ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); +} + +static struct memory_provider udmabuf_memory_provider = { + .alloc = udmabuf_alloc, + .free = udmabuf_free, + .memcpy_to_device = udmabuf_memcpy_to_device, + .memcpy_from_device = udmabuf_memcpy_from_device, +}; + +static struct memory_provider *provider = &udmabuf_memory_provider; + +static void print_nonzero_bytes(void *ptr, size_t size) +{ + unsigned char *p = ptr; + unsigned int i; + + for (i = 0; i < size; i++) + putchar(p[i]); +} + +int validate_buffer(void *line, size_t size) +{ + static unsigned char seed = 1; + unsigned char *ptr = line; + unsigned char expected; + static int errors; + size_t i; + + for (i = 0; i < size; i++) { + expected = seed ? seed : '\n'; + if (ptr[i] != expected) { + fprintf(stderr, + "Failed validation: expected=%u, actual=%u, index=%lu\n", + expected, ptr[i], i); + errors++; + if (errors > 20) { + pr_err("validation failed"); + return -1; + } + } + seed++; + if (seed == do_validation) + seed = 0; + } + + fprintf(stdout, "Validated buffer\n"); + return 0; +} + +static int +__run_command(char *out, size_t outlen, const char *cmd, va_list args) +{ + char command[256]; + FILE *fp; + + vsnprintf(command, sizeof(command), cmd, args); + + fprintf(stderr, "Running: %s\n", command); + fp = popen(command, "r"); + if (!fp) + return -1; + if (out) { + size_t len; + + if (!fgets(out, outlen, fp)) + return -1; + + /* Remove trailing newline if present */ + len = strlen(out); + if (len && out[len - 1] == '\n') + out[len - 1] = '\0'; + } + return pclose(fp); +} + +static int run_command(const char *cmd, ...) +{ + va_list args; + int ret; + + va_start(args, cmd); + ret = __run_command(NULL, 0, cmd, args); + va_end(args); + + return ret; +} + +static int ethtool_add_flow(const char *format, ...) +{ + char local_output[256], cmd[256]; + const char *id_start; + int flow_idx, ret; + char *endptr; + long flow_id; + va_list args; + + for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++) + if (ntuple_ids[flow_idx] == -1) + break; + if (flow_idx == MAX_FLOWS) { + fprintf(stderr, "Error: too many flows\n"); + return -1; + } + + snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format); + + va_start(args, format); + ret = __run_command(local_output, sizeof(local_output), cmd, args); + va_end(args); + + if (ret != 0) + return ret; + + /* Extract the ID from the output */ + id_start = strstr(local_output, "Added rule with ID "); + if (!id_start) + return -1; + id_start += strlen("Added rule with ID "); + + flow_id = strtol(id_start, &endptr, 10); + if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX) + return -1; + + fprintf(stderr, "Added flow rule with ID %ld\n", flow_id); + ntuple_ids[flow_idx] = flow_id; + return flow_id; +} + +static int rxq_num(int ifindex) +{ + struct ethtool_channels_get_req *req; + struct ethtool_channels_get_rsp *rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + int num = -1; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = ethtool_channels_get_req_alloc(); + ethtool_channels_get_req_set_header_dev_index(req, ifindex); + rsp = ethtool_channels_get(ys, req); + if (rsp) + num = rsp->rx_count + rsp->combined_count; + ethtool_channels_get_req_free(req); + ethtool_channels_get_rsp_free(rsp); + + ynl_sock_destroy(ys); + + return num; +} + +static void reset_flow_steering(void) +{ + int i; + + for (i = 0; i < MAX_FLOWS; i++) { + if (ntuple_ids[i] == -1) + continue; + run_command("ethtool -N %s delete %d", + ifname, ntuple_ids[i]); + ntuple_ids[i] = -1; + } +} + +static const char *tcp_data_split_str(int val) +{ + switch (val) { + case 0: + return "off"; + case 1: + return "auto"; + case 2: + return "on"; + default: + return "?"; + } +} + +static struct ethtool_rings_get_rsp *get_ring_config(void) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return NULL; + } + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + ynl_sock_destroy(ys); + + return get_rsp; +} + +static void restore_ring_config(const struct ethtool_rings_get_rsp *config) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ethtool_rings_set_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + if (!config) + return; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return; + } + + req = ethtool_rings_set_req_alloc(); + ethtool_rings_set_req_set_header_dev_index(req, ifindex); + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + if (config->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh); + + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg); + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + /* use explicit value if UKNOWN didn't give us the previous */ + if (get_rsp->tcp_data_split != config->tcp_data_split) { + ethtool_rings_set_req_set_tcp_data_split(req, + config->tcp_data_split); + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring expl HDS cfg: %s\n", + ys->err.msg); + } + + ethtool_rings_get_rsp_free(get_rsp); + ethtool_rings_set_req_free(req); + + ynl_sock_destroy(ys); +} + +static int +configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ethtool_rings_set_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = ethtool_rings_set_req_alloc(); + ethtool_rings_set_req_set_header_dev_index(req, ifindex); + if (on) { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_ENABLED); + if (old->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, 0); + } else { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + } + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL failed: %s\n", ys->err.msg); + ethtool_rings_set_req_free(req); + + if (ret == 0) { + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + if (get_rsp) + fprintf(stderr, "TCP header split: %s\n", + tcp_data_split_str(get_rsp->tcp_data_split)); + ethtool_rings_get_rsp_free(get_rsp); + } + + ynl_sock_destroy(ys); + + return ret; +} + +static int configure_rss(void) +{ + return run_command("ethtool -X %s equal %d >&2", ifname, start_queue); +} + +static void reset_rss(void) +{ + run_command("ethtool -X %s default >&2", ifname, start_queue); +} + +static int check_changing_channels(unsigned int rx, unsigned int tx) +{ + struct ethtool_channels_get_req *gchan; + struct ethtool_channels_set_req *schan; + struct ethtool_channels_get_rsp *chan; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx); + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + gchan = ethtool_channels_get_req_alloc(); + if (!gchan) { + ret = -1; + goto exit_close_sock; + } + + ethtool_channels_get_req_set_header_dev_index(gchan, ifindex); + chan = ethtool_channels_get(ys, gchan); + ethtool_channels_get_req_free(gchan); + if (!chan) { + fprintf(stderr, "YNL get channels: %s\n", ys->err.msg); + ret = -1; + goto exit_close_sock; + } + + schan = ethtool_channels_set_req_alloc(); + if (!schan) { + ret = -1; + goto exit_free_chan; + } + + ethtool_channels_set_req_set_header_dev_index(schan, ifindex); + + if (chan->_present.combined_count) { + if (chan->_present.rx_count || chan->_present.tx_count) { + ethtool_channels_set_req_set_rx_count(schan, 0); + ethtool_channels_set_req_set_tx_count(schan, 0); + } + + if (rx == tx) { + ethtool_channels_set_req_set_combined_count(schan, rx); + } else if (rx > tx) { + ethtool_channels_set_req_set_combined_count(schan, tx); + ethtool_channels_set_req_set_rx_count(schan, rx - tx); + } else { + ethtool_channels_set_req_set_combined_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx - rx); + } + + } else if (chan->_present.rx_count) { + ethtool_channels_set_req_set_rx_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx); + } else { + fprintf(stderr, "Error: device has neither combined nor rx channels\n"); + ret = -1; + goto exit_free_schan; + } + + ret = ethtool_channels_set(ys, schan); + if (ret) { + fprintf(stderr, "YNL set channels: %s\n", ys->err.msg); + } else { + /* We were expecting a failure, go back to previous settings */ + ethtool_channels_set_req_set_combined_count(schan, + chan->combined_count); + ethtool_channels_set_req_set_rx_count(schan, chan->rx_count); + ethtool_channels_set_req_set_tx_count(schan, chan->tx_count); + + ret = ethtool_channels_set(ys, schan); + if (ret) + fprintf(stderr, "YNL un-setting channels: %s\n", + ys->err.msg); + } + +exit_free_schan: + ethtool_channels_set_req_free(schan); +exit_free_chan: + ethtool_channels_get_rsp_free(chan); +exit_close_sock: + ynl_sock_destroy(ys); + + return ret; +} + +static int configure_flow_steering(struct sockaddr_in6 *server_sin) +{ + const char *type = "tcp6"; + const char *server_addr; + char buf[40]; + int flow_id; + + inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); + server_addr = buf; + + if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) { + type = "tcp4"; + server_addr = strrchr(server_addr, ':') + 1; + } + + /* Try configure 5-tuple */ + flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d", + type, + client_ip ? "src-ip" : "", + client_ip ?: "", + server_addr, + client_ip ? "src-port" : "", + client_ip ? port : "", + port, start_queue); + if (flow_id < 0) { + /* If that fails, try configure 3-tuple */ + flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d", + type, server_addr, port, start_queue); + if (flow_id < 0) + /* If that fails, return error */ + return -1; + } + + return 0; +} + +static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, + struct netdev_queue_id *queues, + unsigned int n_queue_index, struct ynl_sock **ys) +{ + struct netdev_bind_rx_req *req = NULL; + struct netdev_bind_rx_rsp *rsp = NULL; + struct ynl_error yerr; + + *ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!*ys) { + netdev_queue_id_free(queues); + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = netdev_bind_rx_req_alloc(); + netdev_bind_rx_req_set_ifindex(req, ifindex); + netdev_bind_rx_req_set_fd(req, dmabuf_fd); + __netdev_bind_rx_req_set_queues(req, queues, n_queue_index); + + rsp = netdev_bind_rx(*ys, req); + if (!rsp) { + perror("netdev_bind_rx"); + goto err_close; + } + + if (!rsp->_present.id) { + perror("id not present"); + goto err_close; + } + + fprintf(stderr, "got dmabuf id=%d\n", rsp->id); + dmabuf_id = rsp->id; + + netdev_bind_rx_req_free(req); + netdev_bind_rx_rsp_free(rsp); + + return 0; + +err_close: + fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); + netdev_bind_rx_req_free(req); + ynl_sock_destroy(*ys); + return -1; +} + +static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd, + struct ynl_sock **ys) +{ + struct netdev_bind_tx_req *req = NULL; + struct netdev_bind_tx_rsp *rsp = NULL; + struct ynl_error yerr; + + *ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!*ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = netdev_bind_tx_req_alloc(); + netdev_bind_tx_req_set_ifindex(req, ifindex); + netdev_bind_tx_req_set_fd(req, dmabuf_fd); + + rsp = netdev_bind_tx(*ys, req); + if (!rsp) { + perror("netdev_bind_tx"); + goto err_close; + } + + if (!rsp->_present.id) { + perror("id not present"); + goto err_close; + } + + fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id); + tx_dmabuf_id = rsp->id; + + netdev_bind_tx_req_free(req); + netdev_bind_tx_rsp_free(rsp); + + return 0; + +err_close: + fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); + netdev_bind_tx_req_free(req); + ynl_sock_destroy(*ys); + return -1; +} + +static int enable_reuseaddr(int fd) +{ + int opt = 1; + int ret; + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); + if (ret) { + pr_err("SO_REUSEPORT failed"); + return -1; + } + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + if (ret) { + pr_err("SO_REUSEADDR failed"); + return -1; + } + + return 0; +} + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static struct netdev_queue_id *create_queues(void) +{ + struct netdev_queue_id *queues; + size_t i = 0; + + queues = netdev_queue_id_alloc(num_queues); + for (i = 0; i < num_queues; i++) { + netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX); + netdev_queue_id_set_id(&queues[i], start_queue + i); + } + + return queues; +} + +static int do_server(struct memory_buffer *mem) +{ + struct ethtool_rings_get_rsp *ring_config; + char ctrl_data[sizeof(int) * 20000]; + size_t non_page_aligned_frags = 0; + struct sockaddr_in6 client_addr; + struct sockaddr_in6 server_sin; + size_t page_aligned_frags = 0; + size_t total_received = 0; + socklen_t client_addr_len; + bool is_devmem = false; + char *tmp_mem = NULL; + struct ynl_sock *ys; + char iobuf[819200]; + int ret, err = -1; + char buffer[256]; + int socket_fd; + int client_fd; + + ret = parse_address(server_ip, atoi(port), &server_sin); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } + + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + return -1; + } + + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to enable TCP header split"); + goto err_free_ring_config; + } + + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) { + pr_err("Failed to configure rss"); + goto err_reset_headersplit; + } + + /* Flow steer our devmem flows to start_queue */ + if (configure_flow_steering(&server_sin)) { + pr_err("Failed to configure flow steering"); + goto err_reset_rss; + } + + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_flow_steering; + } + + tmp_mem = malloc(mem->size); + if (!tmp_mem) + goto err_unbind; + + socket_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (socket_fd < 0) { + pr_err("Failed to create socket"); + goto err_free_tmp; + } + + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; + + fprintf(stderr, "binding to address %s:%d\n", server_ip, + ntohs(server_sin.sin6_port)); + + ret = bind(socket_fd, &server_sin, sizeof(server_sin)); + if (ret) { + pr_err("Failed to bind"); + goto err_close_socket; + } + + ret = listen(socket_fd, 1); + if (ret) { + pr_err("Failed to listen"); + goto err_close_socket; + } + + client_addr_len = sizeof(client_addr); + + inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer, + sizeof(buffer)); + fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, + ntohs(server_sin.sin6_port)); + client_fd = accept(socket_fd, &client_addr, &client_addr_len); + if (client_fd < 0) { + pr_err("Failed to accept"); + goto err_close_socket; + } + + inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, + sizeof(buffer)); + fprintf(stderr, "Got connection from %s:%d\n", buffer, + ntohs(client_addr.sin6_port)); + + while (1) { + struct iovec iov = { .iov_base = iobuf, + .iov_len = sizeof(iobuf) }; + struct dmabuf_cmsg *dmabuf_cmsg = NULL; + struct cmsghdr *cm = NULL; + struct msghdr msg = { 0 }; + struct dmabuf_token token; + ssize_t ret; + + is_devmem = false; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = ctrl_data; + msg.msg_controllen = sizeof(ctrl_data); + ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM); + fprintf(stderr, "recvmsg ret=%ld\n", ret); + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) + continue; + if (ret < 0) { + perror("recvmsg"); + if (errno == EFAULT) { + pr_err("received EFAULT, won't recover"); + goto err_close_client; + } + continue; + } + if (ret == 0) { + errno = 0; + pr_err("client exited"); + goto cleanup; + } + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if (cm->cmsg_level != SOL_SOCKET || + (cm->cmsg_type != SCM_DEVMEM_DMABUF && + cm->cmsg_type != SCM_DEVMEM_LINEAR)) { + fprintf(stderr, "skipping non-devmem cmsg\n"); + continue; + } + + dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm); + is_devmem = true; + + if (cm->cmsg_type == SCM_DEVMEM_LINEAR) { + /* TODO: process data copied from skb's linear + * buffer. + */ + fprintf(stderr, + "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", + dmabuf_cmsg->frag_size); + + continue; + } + + token.token_start = dmabuf_cmsg->frag_token; + token.token_count = 1; + + total_received += dmabuf_cmsg->frag_size; + fprintf(stderr, + "received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", + dmabuf_cmsg->frag_offset >> PAGE_SHIFT, + dmabuf_cmsg->frag_offset % getpagesize(), + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, + total_received, dmabuf_cmsg->dmabuf_id); + + if (dmabuf_cmsg->dmabuf_id != dmabuf_id) { + pr_err("received on wrong dmabuf_id: flow steering error"); + goto err_close_client; + } + + if (dmabuf_cmsg->frag_size % getpagesize()) + non_page_aligned_frags++; + else + page_aligned_frags++; + + provider->memcpy_from_device(tmp_mem, mem, + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size); + + if (do_validation) { + if (validate_buffer(tmp_mem, + dmabuf_cmsg->frag_size)) + goto err_close_client; + } else { + print_nonzero_bytes(tmp_mem, + dmabuf_cmsg->frag_size); + } + + ret = setsockopt(client_fd, SOL_SOCKET, + SO_DEVMEM_DONTNEED, &token, + sizeof(token)); + if (ret != 1) { + pr_err("SO_DEVMEM_DONTNEED not enough tokens"); + goto err_close_client; + } + } + if (!is_devmem) { + pr_err("flow steering error"); + goto err_close_client; + } + + fprintf(stderr, "total_received=%lu\n", total_received); + } + + fprintf(stderr, "%s: ok\n", TEST_PREFIX); + + fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + page_aligned_frags, non_page_aligned_frags); + +cleanup: + err = 0; + +err_close_client: + close(client_fd); +err_close_socket: + close(socket_fd); +err_free_tmp: + free(tmp_mem); +err_unbind: + ynl_sock_destroy(ys); +err_reset_flow_steering: + reset_flow_steering(); +err_reset_rss: + reset_rss(); +err_reset_headersplit: + restore_ring_config(ring_config); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); + return err; +} + +int run_devmem_tests(void) +{ + struct ethtool_rings_get_rsp *ring_config; + struct netdev_queue_id *queues; + struct memory_buffer *mem; + struct ynl_sock *ys; + int err = -1; + + mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return -1; + } + + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + goto err_free_mem; + } + + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) { + pr_err("rss error"); + goto err_free_ring_config; + } + + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_rss; + } + + queues = netdev_queue_id_alloc(num_queues); + if (!queues) { + pr_err("Failed to allocate empty queues array"); + goto err_reset_headersplit; + } + + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Binding empty queues array should have failed"); + goto err_unbind; + } + + if (configure_headersplit(ring_config, 0)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } + + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } + + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Configure dmabuf with header split off should have failed"); + goto err_unbind; + } + + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } + + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } + + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_headersplit; + } + + /* Deactivating a bound queue should not be legal */ + if (!check_changing_channels(num_queues, num_queues)) { + pr_err("Deactivating a bound queue should be illegal"); + goto err_unbind; + } + + err = 0; + goto err_unbind; + +err_unbind: + ynl_sock_destroy(ys); +err_reset_headersplit: + restore_ring_config(ring_config); +err_reset_rss: + reset_rss(); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); +err_free_mem: + provider->free(mem); + return err; +} + +static uint64_t gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL); +} + +static int do_poll(int fd) +{ + struct pollfd pfd; + int ret; + + pfd.revents = 0; + pfd.fd = fd; + + ret = poll(&pfd, 1, waittime_ms); + if (ret == -1) { + pr_err("poll"); + return -1; + } + + return ret && (pfd.revents & POLLERR); +} + +static int wait_compl(int fd) +{ + int64_t tstop = gettimeofday_ms() + waittime_ms; + char control[CMSG_SPACE(100)] = {}; + struct sock_extended_err *serr; + struct msghdr msg = {}; + struct cmsghdr *cm; + __u32 hi, lo; + int ret; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + while (gettimeofday_ms() < tstop) { + ret = do_poll(fd); + if (ret < 0) + return ret; + if (!ret) + continue; + + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret < 0) { + if (errno == EAGAIN) + continue; + pr_err("recvmsg(MSG_ERRQUEUE)"); + return -1; + } + if (msg.msg_flags & MSG_CTRUNC) { + pr_err("MSG_CTRUNC"); + return -1; + } + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if (cm->cmsg_level != SOL_IP && + cm->cmsg_level != SOL_IPV6) + continue; + if (cm->cmsg_level == SOL_IP && + cm->cmsg_type != IP_RECVERR) + continue; + if (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type != IPV6_RECVERR) + continue; + + serr = (void *)CMSG_DATA(cm); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { + pr_err("wrong origin %u", serr->ee_origin); + return -1; + } + if (serr->ee_errno != 0) { + pr_err("wrong errno %d", serr->ee_errno); + return -1; + } + + hi = serr->ee_data; + lo = serr->ee_info; + + fprintf(stderr, "tx complete [%d,%d]\n", lo, hi); + return 0; + } + } + + pr_err("did not receive tx completion"); + return -1; +} + +static int do_client(struct memory_buffer *mem) +{ + char ctrl_data[CMSG_SPACE(sizeof(__u32))]; + struct sockaddr_in6 server_sin; + struct sockaddr_in6 client_sin; + struct ynl_sock *ys = NULL; + struct iovec iov[MAX_IOV]; + struct msghdr msg = {}; + ssize_t line_size = 0; + struct cmsghdr *cmsg; + char *line = NULL; + int ret, err = -1; + size_t len = 0; + int socket_fd; + __u32 ddmabuf; + int opt = 1; + + ret = parse_address(server_ip, atoi(port), &server_sin); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } + + if (client_ip) { + ret = parse_address(client_ip, atoi(port), &client_sin); + if (ret < 0) { + pr_err("parse client address"); + return ret; + } + } + + socket_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (socket_fd < 0) { + pr_err("create socket"); + return -1; + } + + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, + strlen(ifname) + 1); + if (ret) { + pr_err("bindtodevice"); + goto err_close_socket; + } + + if (bind_tx_queue(ifindex, mem->fd, &ys)) { + pr_err("Failed to bind"); + goto err_close_socket; + } + + if (client_ip) { + ret = bind(socket_fd, &client_sin, sizeof(client_sin)); + if (ret) { + pr_err("bind"); + goto err_unbind; + } + } + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)); + if (ret) { + pr_err("set sock opt"); + goto err_unbind; + } + + fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip, + ntohs(server_sin.sin6_port), ifname); + + ret = connect(socket_fd, &server_sin, sizeof(server_sin)); + if (ret) { + pr_err("connect"); + goto err_unbind; + } + + while (1) { + free(line); + line = NULL; + line_size = getline(&line, &len, stdin); + + if (line_size < 0) + break; + + if (max_chunk) { + msg.msg_iovlen = + (line_size + max_chunk - 1) / max_chunk; + if (msg.msg_iovlen > MAX_IOV) { + pr_err("can't partition %zd bytes into maximum of %d chunks", + line_size, MAX_IOV); + goto err_free_line; + } + + for (int i = 0; i < msg.msg_iovlen; i++) { + iov[i].iov_base = (void *)(i * max_chunk); + iov[i].iov_len = max_chunk; + } + + iov[msg.msg_iovlen - 1].iov_len = + line_size - (msg.msg_iovlen - 1) * max_chunk; + } else { + iov[0].iov_base = 0; + iov[0].iov_len = line_size; + msg.msg_iovlen = 1; + } + + msg.msg_iov = iov; + provider->memcpy_to_device(mem, 0, line, line_size); + + msg.msg_control = ctrl_data; + msg.msg_controllen = sizeof(ctrl_data); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_DEVMEM_DMABUF; + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); + + ddmabuf = tx_dmabuf_id; + + *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf; + + ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY); + if (ret < 0) { + pr_err("Failed sendmsg"); + goto err_free_line; + } + + fprintf(stderr, "sendmsg_ret=%d\n", ret); + + if (ret != line_size) { + pr_err("Did not send all bytes %d vs %zd", ret, line_size); + goto err_free_line; + } + + if (wait_compl(socket_fd)) + goto err_free_line; + } + + fprintf(stderr, "%s: tx ok\n", TEST_PREFIX); + + err = 0; + +err_free_line: + free(line); +err_unbind: + ynl_sock_destroy(ys); +err_close_socket: + close(socket_fd); + return err; +} + +int main(int argc, char *argv[]) +{ + struct memory_buffer *mem; + int is_server = 0, opt; + int ret, err = 1; + + while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) { + switch (opt) { + case 'l': + is_server = 1; + break; + case 's': + server_ip = optarg; + break; + case 'c': + client_ip = optarg; + break; + case 'p': + port = optarg; + break; + case 'v': + do_validation = atoll(optarg); + break; + case 'q': + num_queues = atoi(optarg); + break; + case 't': + start_queue = atoi(optarg); + break; + case 'f': + ifname = optarg; + break; + case 'z': + max_chunk = atoi(optarg); + break; + case '?': + fprintf(stderr, "unknown option: %c\n", optopt); + break; + } + } + + if (!ifname) { + pr_err("Missing -f argument"); + return 1; + } + + ifindex = if_nametoindex(ifname); + + fprintf(stderr, "using ifindex=%u\n", ifindex); + + if (!server_ip && !client_ip) { + if (start_queue < 0 && num_queues < 0) { + num_queues = rxq_num(ifindex); + if (num_queues < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } + /* make sure can bind to multiple queues */ + start_queue = num_queues / 2; + num_queues /= 2; + } + + if (start_queue < 0 || num_queues < 0) { + pr_err("Both -t and -q are required"); + return 1; + } + + return run_devmem_tests(); + } + + if (start_queue < 0 && num_queues < 0) { + num_queues = rxq_num(ifindex); + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } + + num_queues = 1; + start_queue = rxq_num(ifindex) - num_queues; + + if (start_queue < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } + + fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues); + } + + for (; optind < argc; optind++) + fprintf(stderr, "extra arguments: %s\n", argv[optind]); + + if (start_queue < 0) { + pr_err("Missing -t argument"); + return 1; + } + + if (num_queues < 0) { + pr_err("Missing -q argument"); + return 1; + } + + if (!server_ip) { + pr_err("Missing -s argument"); + return 1; + } + + if (!port) { + pr_err("Missing -p argument"); + return 1; + } + + mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return 1; + } + + ret = is_server ? do_server(mem) : do_client(mem); + if (ret) + goto err_free_mem; + + err = 0; + +err_free_mem: + provider->free(mem); + return err; +} diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py new file mode 100755 index 000000000000..c1e943d53f19 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests related to configuration of HW timestamping +""" + +import errno +from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx +from lib.py import NetDrvEnv, EthtoolFamily, NlError + + +def __get_hwtimestamp_support(cfg): + """ Retrieve supported configuration information """ + + try: + tsinfo = cfg.ethnl.tsinfo_get({'header': {'dev-name': cfg.ifname}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported") from e + raise + + ctx = {} + tx = tsinfo.get('tx-types', {}) + rx = tsinfo.get('rx-filters', {}) + + bits = tx.get('bits', {}) + ctx['tx'] = bits.get('bit', []) + bits = rx.get('bits', {}) + ctx['rx'] = bits.get('bit', []) + return ctx + + +def __get_hwtimestamp_config(cfg): + """ Retrieve current TS configuration information """ + + try: + tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via netlink") from e + raise + return tscfg + + +def __set_hwtimestamp_config(cfg, ts): + """ Setup new TS configuration information """ + + ts['header'] = {'dev-name': cfg.ifname} + try: + res = cfg.ethnl.tsconfig_set(ts) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via netlink") from e + raise + return res + + +def test_hwtstamp_tx(cfg): + """ + Test TX timestamp configuration. + The driver should apply provided config and report back proper state. + """ + + orig_tscfg = __get_hwtimestamp_config(cfg) + ts = __get_hwtimestamp_support(cfg) + tx = ts['tx'] + for t in tx: + tscfg = orig_tscfg + tscfg['tx-types']['bits']['bit'] = [t] + res = __set_hwtimestamp_config(cfg, tscfg) + if res is None: + res = __get_hwtimestamp_config(cfg) + ksft_eq(res['tx-types']['bits']['bit'], [t]) + __set_hwtimestamp_config(cfg, orig_tscfg) + + +def test_hwtstamp_rx(cfg): + """ + Test RX timestamp configuration. + The filter configuration is taken from the list of supported filters. + The driver should apply the config without error and report back proper state. + Some extension of the timestamping scope is allowed for PTP filters. + """ + + orig_tscfg = __get_hwtimestamp_config(cfg) + ts = __get_hwtimestamp_support(cfg) + rx = ts['rx'] + for r in rx: + tscfg = orig_tscfg + tscfg['rx-filters']['bits']['bit'] = [r] + res = __set_hwtimestamp_config(cfg, tscfg) + if res is None: + res = __get_hwtimestamp_config(cfg) + if r['index'] == 0 or r['index'] == 1: + ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index']) + else: + # the driver can fallback to some value which has higher coverage for timestamping + ksft_ge(res['rx-filters']['bits']['bit'][0]['index'], r['index']) + __set_hwtimestamp_config(cfg, orig_tscfg) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run([test_hwtstamp_tx, test_hwtstamp_rx], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py new file mode 100755 index 000000000000..2a51b60df8a1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test driver resilience vs page pool allocation failures. +""" + +import errno +import time +import math +import os +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import NetdevFamily, NlError +from lib.py import NetDrvEpEnv +from lib.py import cmd, tool, GenerateTraffic + + +def _write_fail_config(config): + for key, value in config.items(): + path = "/sys/kernel/debug/fail_function/" + with open(path + key, "w", encoding='ascii') as fp: + fp.write(str(value) + "\n") + + +def _enable_pp_allocation_fail(): + if not os.path.exists("/sys/kernel/debug/fail_function"): + raise KsftSkipEx("Kernel built without function error injection (or DebugFS)") + + if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): + _write_fail_config({"inject": "page_pool_alloc_netmems"}) + + _write_fail_config({ + "verbose": 0, + "interval": 511, + "probability": 100, + "times": -1, + }) + + +def _disable_pp_allocation_fail(): + if not os.path.exists("/sys/kernel/debug/fail_function"): + return + + if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): + _write_fail_config({"inject": ""}) + + _write_fail_config({ + "probability": 0, + "times": 0, + }) + + +def test_pp_alloc(cfg, netdevnl): + """ + Configure page pool allocation fail injection while traffic is running. + """ + + def get_stats(): + return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + def check_traffic_flowing(): + stat1 = get_stats() + time.sleep(1) + stat2 = get_stats() + if stat2['rx-packets'] - stat1['rx-packets'] < 4000: + raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets']) + + + try: + stats = get_stats() + except NlError as e: + if e.nl_msg.error == -errno.EOPNOTSUPP: + stats = {} + else: + raise + if 'rx-alloc-fail' not in stats: + raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats") + + set_g = False + traffic = None + try: + traffic = GenerateTraffic(cfg) + + check_traffic_flowing() + + _enable_pp_allocation_fail() + + s1 = get_stats() + time.sleep(3) + s2 = get_stats() + + seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail'] + if seen_fails < 1: + raise KsftSkipEx("Allocation failures not increasing") + pkts = s2['rx-packets'] - s1['rx-packets'] + # Expecting one failure per 512 buffers, 3.1x safety margin + want_fails = math.floor(pkts / 512 / 3.1) + if seen_fails < want_fails: + raise KsftSkipEx("Allocation increasing too slowly", seen_fails, + "packets:", pkts) + ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})") + + # Basic failures are fine, try to wobble some settings to catch extra failures + check_traffic_flowing() + g = tool("ethtool", "-g " + cfg.ifname, json=True)[0] + if 'rx' in g and g["rx"] * 2 <= g["rx-max"]: + new_g = g['rx'] * 2 + elif 'rx' in g: + new_g = g['rx'] // 2 + else: + new_g = None + + if new_g: + set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0 + if set_g: + ksft_pr("ethtool -G change retval: success") + else: + ksft_pr("ethtool -G change retval: did not succeed", new_g) + else: + ksft_pr("ethtool -G change retval: did not try") + + time.sleep(0.1) + check_traffic_flowing() + finally: + _disable_pp_allocation_fail() + if traffic: + traffic.stop() + time.sleep(0.1) + if set_g: + cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}") + + +def main() -> None: + """ Ksft boiler plate main """ + netdevnl = NetdevFamily() + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + + ksft_run([test_pp_alloc], args=(cfg, netdevnl, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py new file mode 100755 index 000000000000..19847f3d4a00 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -0,0 +1,476 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +API level tests for RSS (mostly Netlink vs IOCTL). +""" + +import errno +import glob +import random +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import defer, ethtool, CmdExitFailure +from lib.py import EthtoolFamily, NlError +from lib.py import NetDrvEnv + + +def _require_2qs(cfg): + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + return qcnt + + +def _ethtool_create(cfg, act, opts): + output = ethtool(f"{act} {cfg.ifname} {opts}").stdout + # Output will be something like: "New RSS context is 1" or + # "Added rule with ID 7", we want the integer from the end + return int(output.split()[-1]) + + +def _ethtool_get_cfg(cfg, fl_type, to_nl=False): + descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + if to_nl: + converter = { + "IP SA": "ip-src", + "IP DA": "ip-dst", + "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3", + } + + ret = set() + else: + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + } + + ret = "" + + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + if to_nl: + ret.add(converter[line]) + else: + ret += converter[line] + return ret + + +def test_rxfh_nl_set_fail(cfg): + """ + Test error path of Netlink SET. + """ + _require_2qs(cfg) + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + with ksft_raises(NlError): + ethnl.rss_set({"header": {"dev-name": "lo"}, + "indir": None}) + + with ksft_raises(NlError): + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [100000]}) + ntf = next(ethnl.poll_ntf(duration=0.2), None) + ksft_is(ntf, None) + + +def test_rxfh_nl_set_indir(cfg): + """ + Test setting indirection table via Netlink. + """ + qcnt = _require_2qs(cfg) + + # Test some SETs with a value + reset = defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, "indir": None}) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), {1}) + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [0, 1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + # Make sure we can't set the queue count below max queue used + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 0 rx 1") + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 1 rx 0") + + # Test reset back to default + reset.exec() + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt))) + + +def test_rxfh_nl_set_indir_ctx(cfg): + """ + Test setting indirection table for a custom context via Netlink. + """ + _require_2qs(cfg) + + # Get setting for ctx 0, we'll make sure they don't get clobbered + dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + + # Create context + ctx_id = _ethtool_create(cfg, "-X", "context new") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id, "indir": [1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id}) + ksft_eq(set(rss.get("indir", [-1])), {1}) + + ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(ctx0, dflt) + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id, "indir": [0, 1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id}) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(ctx0, dflt) + + # Make sure we can't set the queue count below max queue used + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 0 rx 1") + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 1 rx 0") + + +def test_rxfh_indir_ntf(cfg): + """ + Check that Netlink notifications are generated when RSS indirection + table was modified. + """ + _require_2qs(cfg) + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1") + reset = defer(ethtool, f"-X {cfg.ifname} default") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(set(ntf["msg"]["indir"]), {1}) + + reset.exec() + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after reset") + ksft_eq(ntf["name"], "rss-ntf") + ksft_is(ntf["msg"].get("context"), None) + ksft_ne(set(ntf["msg"]["indir"]), {1}) + + +def test_rxfh_indir_ctx_ntf(cfg): + """ + Check that Netlink notifications are generated when RSS indirection + table was modified on an additional RSS context. + """ + _require_2qs(cfg) + + ctx_id = _ethtool_create(cfg, "-X", "context new") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(ntf["msg"].get("context"), ctx_id) + ksft_eq(set(ntf["msg"]["indir"]), {1}) + + +def test_rxfh_nl_set_key(cfg): + """ + Test setting hashing key via Netlink. + """ + + dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, + "hkey": dflt["hkey"], "indir": None}) + + # Empty key should error out + with ksft_raises(NlError) as cm: + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hkey": None}) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey') + + # Set key to random + mod = random.randbytes(len(dflt["hkey"])) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hkey": mod}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(rss.get("hkey", [-1]), mod) + + # Set key to random and indir tbl to something at once + mod = random.randbytes(len(dflt["hkey"])) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [0, 1], "hkey": mod}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(rss.get("hkey", [-1]), mod) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + +def test_rxfh_fields(cfg): + """ + Test reading Rx Flow Hash over Netlink. + """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + ethnl = EthtoolFamily() + + cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + one = _ethtool_get_cfg(cfg, fl_type, to_nl=True) + ksft_eq(one, cfg_nl["flow-hash"][fl_type], + comment="Config for " + fl_type) + + +def test_rxfh_fields_set(cfg): + """ Test configuring Rx Flow Hash over Netlink. """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + + # Collect current settings + cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # symmetric hashing is config-order-sensitive make sure we leave + # symmetric mode, or make the flow-hash sym-compatible first + changes = [{"flow-hash": cfg_old["flow-hash"],}, + {"input-xfrm": cfg_old.get("input-xfrm", {}),}] + if cfg_old.get("input-xfrm"): + changes = list(reversed(changes)) + for old in changes: + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old) + + # symmetric hashing prevents some of the configs below + if cfg_old.get("input-xfrm"): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": {}}) + + for fl_type in flow_types: + cur = _ethtool_get_cfg(cfg, fl_type) + if cur == "sdfn": + change_nl = {"ip-src", "ip-dst"} + change_ic = "sd" + else: + change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"} + change_ic = "sdfn" + + cfg.ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {fl_type: change_nl} + }) + reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} " + f"rx-flow-hash {fl_type} {cur}") + + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type], + comment=f"Config for {fl_type} over Netlink") + cfg_ic = _ethtool_get_cfg(cfg, fl_type) + ksft_eq(change_ic, cfg_ic, + comment=f"Config for {fl_type} over IOCTL") + + reset.exec() + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type], + comment=f"Un-config for {fl_type} over Netlink") + cfg_ic = _ethtool_get_cfg(cfg, fl_type) + ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL") + + # Try to set multiple at once, the defer was already installed at the start + change = {"ip-src"} + if change == cfg_old["flow-hash"]["tcp4"]: + change = {"ip-dst"} + cfg.ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {x: change for x in flow_types} + }) + + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + ksft_eq(change, cfg_nl["flow-hash"][fl_type], + comment=f"multi-config for {fl_type} over Netlink") + + +def test_rxfh_fields_set_xfrm(cfg): + """ Test changing Rx Flow Hash vs xfrm_input at once. """ + + def set_rss(cfg, xfrm, fh): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": xfrm, "flow-hash": fh}) + + # Install the reset handler + cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # symmetric hashing is config-order-sensitive make sure we leave + # symmetric mode, or make the flow-hash sym-compatible first + changes = [{"flow-hash": cfg_old["flow-hash"],}, + {"input-xfrm": cfg_old.get("input-xfrm", {}),}] + if cfg_old.get("input-xfrm"): + changes = list(reversed(changes)) + for old in changes: + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old) + + # Make sure we start with input-xfrm off, and tcp4 config non-sym + set_rss(cfg, {}, {}) + set_rss(cfg, {}, {"tcp4": {"ip-src"}}) + + # Setting sym and fixing tcp4 config not expected to pass right now + with ksft_raises(NlError): + set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}}) + # One at a time should work, hopefully + set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}}) + no_support = False + try: + set_rss(cfg, {"sym-xor"}, {}) + except NlError: + try: + set_rss(cfg, {"sym-or-xor"}, {}) + except NlError: + no_support = True + if no_support: + raise KsftSkipEx("no input-xfrm supported") + # Disabling two at once should not work either without kernel changes + with ksft_raises(NlError): + set_rss(cfg, {}, {"tcp4": {"ip-src"}}) + + +def test_rxfh_fields_ntf(cfg): + """ Test Rx Flow Hash notifications. """ + + cur = _ethtool_get_cfg(cfg, "tcp4") + if cur == "sdfn": + change = {"ip-src", "ip-dst"} + else: + change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"} + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {"tcp4": change} + }) + reset = defer(ethtool, + f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after IOCTL change") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change) + ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) + + reset.exec() + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after Netlink change") + ksft_eq(ntf["name"], "rss-ntf") + ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change) + ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) + + +def test_rss_ctx_add(cfg): + """ Test creating an additional RSS context via Netlink """ + + _require_2qs(cfg) + + # Test basic creation + ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}}) + d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete") + ksft_ne(ctx.get("context", 0), 0) + ksft_ne(set(ctx.get("indir", [0])), {0}, + comment="Driver should init the indirection table") + + # Try requesting the ID we just got allocated + with ksft_raises(NlError) as cm: + ctx = cfg.ethnl.rss_create_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx.get("context"), + }) + ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete") + d.exec() + ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY) + + # Test creating with a specified RSS table, and context ID + ctx_id = ctx.get("context") + ctx = cfg.ethnl.rss_create_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx_id, + "indir": [1], + }) + ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete") + ksft_eq(ctx.get("context"), ctx_id) + ksft_eq(set(ctx.get("indir", [0])), {1}) + + +def test_rss_ctx_ntf(cfg): + """ Test notifications for creating additional RSS contexts """ + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + # Create / delete via Netlink + ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}}) + cfg.ethnl.rss_delete_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx["context"], + }) + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[NL] No notification after context creation") + ksft_eq(ntf["name"], "rss-create-ntf") + ksft_eq(ctx, ntf["msg"]) + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[NL] No notification after context deletion") + ksft_eq(ntf["name"], "rss-delete-ntf") + + # Create / deleve via IOCTL + ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new") + ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete") + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[IOCTL] No notification after context creation") + ksft_eq(ntf["name"], "rss-create-ntf") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[IOCTL] No notification after context deletion") + ksft_eq(ntf["name"], "rss-delete-ntf") + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py new file mode 100755 index 000000000000..ed7e405682f0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -0,0 +1,832 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import datetime +import random +import re +from lib.py import ksft_run, ksft_pr, ksft_exit +from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises +from lib.py import NetDrvEpEnv +from lib.py import EthtoolFamily, NetdevFamily +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import rand_port +from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure + + +def _rss_key_str(key): + return ":".join(["{:02x}".format(x) for x in key]) + + +def _rss_key_rand(length): + return [random.randint(0, 255) for _ in range(length)] + + +def _rss_key_check(cfg, data=None, context=0): + if data is None: + data = get_rss(cfg, context=context) + if 'rss-hash-key' not in data: + return + non_zero = [x for x in data['rss-hash-key'] if x != 0] + ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}") + + +def get_rss(cfg, context=0): + return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0] + + +def get_drop_err_sum(cfg): + stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0] + cnt = 0 + for key in ['errors', 'dropped', 'over_errors', 'fifo_errors', + 'length_errors', 'crc_errors', 'missed_errors', + 'frame_errors']: + cnt += stats["stats64"]["rx"][key] + return cnt, stats["stats64"]["tx"]["carrier_changes"] + + +def ethtool_create(cfg, act, opts): + output = ethtool(f"{act} {cfg.ifname} {opts}").stdout + # Output will be something like: "New RSS context is 1" or + # "Added rule with ID 7", we want the integer from the end + return int(output.split()[-1]) + + +def require_ntuple(cfg): + features = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not features["ntuple-filters"]["active"]: + # ntuple is more of a capability than a config knob, don't bother + # trying to enable it (until some driver actually needs it). + raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"])) + + +def require_context_cnt(cfg, need_cnt): + # There's no good API to get the context count, so the tests + # which try to add a lot opportunisitically set the count they + # discovered. Careful with test ordering! + if need_cnt and cfg.context_cnt and cfg.context_cnt < need_cnt: + raise KsftSkipEx(f"Test requires at least {need_cnt} contexts, but device only has {cfg.context_cnt}") + + +# Get Rx packet counts for all queues, as a simple list of integers +# if @prev is specified the prev counts will be subtracted +def _get_rx_cnts(cfg, prev=None): + cfg.wait_hw_stats_settle() + data = cfg.netdevnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, dump=True) + data = [x for x in data if x['queue-type'] == "rx"] + max_q = max([x["queue-id"] for x in data]) + queue_stats = [0] * (max_q + 1) + for q in data: + queue_stats[q["queue-id"]] = q["rx-packets"] + if prev and q["queue-id"] < len(prev): + queue_stats[q["queue-id"]] -= prev[q["queue-id"]] + return queue_stats + + +def _send_traffic_check(cfg, port, name, params): + # params is a dict with 3 possible keys: + # - "target": required, which queues we expect to get iperf traffic + # - "empty": optional, which queues should see no traffic at all + # - "noise": optional, which queues we expect to see low traffic; + # used for queues of the main context, since some background + # OS activity may use those queues while we're testing + # the value for each is a list, or some other iterable containing queue ids. + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + directed = sum(cnts[i] for i in params['target']) + + ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts)) + if params.get('noise'): + ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2, + f"traffic on other queues ({name})':" + str(cnts)) + if params.get('empty'): + ksft_eq(sum(cnts[i] for i in params['empty']), 0, + f"traffic on inactive queues ({name}): " + str(cnts)) + + +def _ntuple_rule_check(cfg, rule_id, ctx_id): + """Check that ntuple rule references RSS context ID""" + text = ethtool(f"-n {cfg.ifname} rule {rule_id}").stdout + pattern = f"RSS Context (ID: )?{ctx_id}" + ksft_true(re.search(pattern, text), "RSS context not referenced in ntuple rule") + + +def test_rss_key_indir(cfg): + """Test basics like updating the main RSS key and indirection table.""" + + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 3: + raise KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") + + data = get_rss(cfg) + want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] + for k in want_keys: + if k not in data: + raise KsftFailEx("ethtool results missing key: " + k) + if not data[k]: + raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}") + + _rss_key_check(cfg, data=data) + key_len = len(data['rss-hash-key']) + + # Set the key + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key)) + + data = get_rss(cfg) + ksft_eq(key, data['rss-hash-key']) + + # Set the indirection table and the key together + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key)) + reset_indir = defer(ethtool, f"-X {cfg.ifname} default") + + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(2, max(data['rss-indirection-table'])) + + # Reset indirection table and set the key + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key)) + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(qcnt - 1, max(data['rss-indirection-table'])) + + # Set the indirection table + ethtool(f"-X {cfg.ifname} equal 2") + data = get_rss(cfg) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(1, max(data['rss-indirection-table'])) + + # Check we only get traffic on the first 2 queues + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + # 2 queues, 20k packets, must be at least 5k per queue + ksft_ge(cnts[0], 5000, "traffic on main context (1/2): " + str(cnts)) + ksft_ge(cnts[1], 5000, "traffic on main context (2/2): " + str(cnts)) + # The other queues should be unused + ksft_eq(sum(cnts[2:]), 0, "traffic on unused queues: " + str(cnts)) + + # Restore, and check traffic gets spread again + reset_indir.exec() + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + if qcnt > 4: + # First two queues get less traffic than all the rest + ksft_lt(sum(cnts[:2]), sum(cnts[2:]), + "traffic distributed: " + str(cnts)) + else: + # When queue count is low make sure third queue got significant pkts + ksft_ge(cnts[2], 3500, "traffic distributed: " + str(cnts)) + + +def test_rss_queue_reconfigure(cfg, main_ctx=True): + """Make sure queue changes can't override requested RSS config. + + By default main RSS table should change to include all queues. + When user sets a specific RSS config the driver should preserve it, + even when queue count changes. Driver should refuse to deactivate + queues used in the user-set RSS config. + """ + + if not main_ctx: + require_ntuple(cfg) + + # Start with 4 queues, an arbitrary known number. + try: + qcnt = len(_get_rx_cnts(cfg)) + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except: + raise KsftSkipEx("Not enough queues for the test or qstat not supported") + + if main_ctx: + ctx_id = 0 + ctx_ref = "" + else: + ctx_id = ethtool_create(cfg, "-X", "context new") + ctx_ref = f"context {ctx_id}" + defer(ethtool, f"-X {cfg.ifname} {ctx_ref} delete") + + # Indirection table should be distributing to all queues. + data = get_rss(cfg, context=ctx_id) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(3, max(data['rss-indirection-table'])) + + # Increase queues, indirection table should be distributing to all queues. + # It's unclear whether tables of additional contexts should be reset, too. + if main_ctx: + ethtool(f"-L {cfg.ifname} combined 5") + data = get_rss(cfg) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(4, max(data['rss-indirection-table'])) + ethtool(f"-L {cfg.ifname} combined 4") + + # Configure the table explicitly + port = rand_port() + ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 0 1") + if main_ctx: + other_key = 'empty' + defer(ethtool, f"-X {cfg.ifname} default") + else: + other_key = 'noise' + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" + ntuple = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + + _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3), + other_key: (1, 2) }) + + # We should be able to increase queues, but table should be left untouched + ethtool(f"-L {cfg.ifname} combined 5") + data = get_rss(cfg, context=ctx_id) + ksft_eq({0, 3}, set(data['rss-indirection-table'])) + + _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3), + other_key: (1, 2, 4) }) + + # Setting queue count to 3 should fail, queue 3 is used + try: + ethtool(f"-L {cfg.ifname} combined 3") + except CmdExitFailure: + pass + else: + raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})") + + if not main_ctx: + ethtool(f"-L {cfg.ifname} combined 4") + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 1" + try: + # this targets queue 4, which doesn't exist + ntuple2 = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") + except CmdExitFailure: + pass + else: + raise Exception(f"Driver didn't prevent us from targeting a nonexistent queue (context {ctx_id})") + # change the table to target queues 0 and 2 + ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0") + # ntuple rule therefore targets queues 1 and 3 + try: + ntuple2 = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + ksft_pr("Driver does not support rss + queue offset") + return + + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") + # should replace existing filter + ksft_eq(ntuple, ntuple2) + _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3), + 'noise' : (0, 2) }) + # Setting queue count to 3 should fail, queue 3 is used + try: + ethtool(f"-L {cfg.ifname} combined 3") + except CmdExitFailure: + pass + else: + raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})") + + +def test_rss_resize(cfg): + """Test resizing of the RSS table. + + Some devices dynamically increase and decrease the size of the RSS + indirection table based on the number of enabled queues. + When that happens driver must maintain the balance of entries + (preferably duplicating the smaller table). + """ + + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels['combined-max'] + qcnt = channels['combined-count'] + + if ch_max < 2: + raise KsftSkipEx(f"Not enough queues for the test: {ch_max}") + + ethtool(f"-L {cfg.ifname} combined 2") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + ethtool(f"-X {cfg.ifname} weight 1 7") + defer(ethtool, f"-X {cfg.ifname} default") + + ethtool(f"-L {cfg.ifname} combined {ch_max}") + data = get_rss(cfg) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(1, max(data['rss-indirection-table'])) + + ksft_eq(7, + data['rss-indirection-table'].count(1) / + data['rss-indirection-table'].count(0), + f"Table imbalance after resize: {data['rss-indirection-table']}") + + +def test_hitless_key_update(cfg): + """Test that flows may be rehashed without impacting traffic. + + Some workloads may want to rehash the flows in response to an imbalance. + Most effective way to do that is changing the RSS key. Check that changing + the key does not cause link flaps or traffic disruption. + + Disrupting traffic for key update is not a bug, but makes the key + update unusable for rehashing under load. + """ + data = get_rss(cfg) + key_len = len(data['rss-hash-key']) + + ethnl = EthtoolFamily() + key = random.randbytes(key_len) + + tgen = GenerateTraffic(cfg) + try: + errors0, carrier0 = get_drop_err_sum(cfg) + t0 = datetime.datetime.now() + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key}) + t1 = datetime.datetime.now() + errors1, carrier1 = get_drop_err_sum(cfg) + finally: + tgen.wait_pkts_and_stop(5000) + + ksft_lt((t1 - t0).total_seconds(), 0.15) + ksft_eq(errors1 - errors1, 0) + ksft_eq(carrier1 - carrier0, 0) + + +def test_rss_context_dump(cfg): + """ + Test dumping RSS contexts. This tests mostly exercises the kernel APIs. + """ + + # Get a random key of the right size + data = get_rss(cfg) + if 'rss-hash-key' in data: + key_data = _rss_key_rand(len(data['rss-hash-key'])) + key = _rss_key_str(key_data) + else: + key_data = [] + key = "ba:ad" + + ids = [] + try: + ids.append(ethtool_create(cfg, "-X", f"context new")) + defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete") + + ids.append(ethtool_create(cfg, "-X", f"context new weight 1 1")) + defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete") + + ids.append(ethtool_create(cfg, "-X", f"context new hkey {key}")) + defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete") + except CmdExitFailure: + if not ids: + raise KsftSkipEx("Unable to add any contexts") + ksft_pr(f"Added only {len(ids)} out of 3 contexts") + + expect_tuples = set([(cfg.ifname, -1)] + [(cfg.ifname, ctx_id) for ctx_id in ids]) + + # Dump all + ctxs = cfg.ethnl.rss_get({}, dump=True) + tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs] + ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump") + ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname]) + ksft_eq(expect_tuples, ctx_tuples) + + # Sanity-check the results + for data in ctxs: + ksft_ne(set(data.get('indir', [1])), {0}, "indir table is all zero") + ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero") + + # More specific checks + if len(ids) > 1 and data.get('context') == ids[1]: + ksft_eq(set(data['indir']), {0, 1}, + "ctx1 - indir table mismatch") + if len(ids) > 2 and data.get('context') == ids[2]: + ksft_eq(data['hkey'], bytes(key_data), "ctx2 - key mismatch") + + # Ifindex filter + ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True) + tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs] + ctx_tuples = set(tuples) + ksft_eq(len(tuples), len(ctx_tuples), "duplicates in context dump") + ksft_eq(expect_tuples, ctx_tuples) + + # Skip ctx 0 + expect_tuples.remove((cfg.ifname, -1)) + + ctxs = cfg.ethnl.rss_get({'start-context': 1}, dump=True) + tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs] + ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump") + ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname]) + ksft_eq(expect_tuples, ctx_tuples) + + # And finally both with ifindex and skip main + ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}, 'start-context': 1}, dump=True) + ctx_tuples = set([(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]) + ksft_eq(expect_tuples, ctx_tuples) + + +def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): + """ + Test separating traffic into RSS contexts. + The queues will be allocated 2 for each context: + ctx0 ctx1 ctx2 ctx3 + [0 1] [2 3] [4 5] [6 7] ... + """ + + require_ntuple(cfg) + + requested_ctx_cnt = ctx_cnt + + # Try to allocate more queues when necessary + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 2 + 2 * ctx_cnt: + try: + ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}") + ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + ports = [] + + # Use queues 0 and 1 for normal traffic + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + for i in range(ctx_cnt): + want_cfg = f"start {2 + i * 2} equal 2" + create_cfg = want_cfg if create_with_cfg else "" + + try: + ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + except CmdExitFailure: + # try to carry on and skip at the end + if i == 0: + raise + ksft_pr(f"Failed to create context {i + 1}, trying to test what we got") + ctx_cnt = i + if cfg.context_cnt is None: + cfg.context_cnt = ctx_cnt + break + + _rss_key_check(cfg, context=ctx_id) + + if not create_with_cfg: + ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}") + _rss_key_check(cfg, context=ctx_id) + + # Sanity check the context we just created + data = get_rss(cfg, ctx_id) + ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data)) + ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data)) + + ports.append(rand_port()) + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}" + ntuple = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + + _ntuple_rule_check(cfg, ntuple, ctx_id) + + for i in range(ctx_cnt): + _send_traffic_check(cfg, ports[i], f"context {i}", + { 'target': (2+i*2, 3+i*2), + 'noise': (0, 1), + 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) }) + + if requested_ctx_cnt != ctx_cnt: + raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") + + +def test_rss_context4(cfg): + test_rss_context(cfg, 4) + + +def test_rss_context32(cfg): + test_rss_context(cfg, 32) + + +def test_rss_context4_create_with_cfg(cfg): + test_rss_context(cfg, 4, create_with_cfg=True) + + +def test_rss_context_queue_reconfigure(cfg): + test_rss_queue_reconfigure(cfg, main_ctx=False) + + +def test_rss_context_out_of_order(cfg, ctx_cnt=4): + """ + Test separating traffic into RSS contexts. + Contexts are removed in semi-random order, and steering re-tested + to make sure removal doesn't break steering to surviving contexts. + Test requires 3 contexts to work. + """ + + require_ntuple(cfg) + require_context_cnt(cfg, 4) + + # Try to allocate more queues when necessary + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 2 + 2 * ctx_cnt: + try: + ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}") + ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + ntuple = [] + ctx = [] + ports = [] + + def remove_ctx(idx): + ntuple[idx].exec() + ntuple[idx] = None + ctx[idx].exec() + ctx[idx] = None + + def check_traffic(): + for i in range(ctx_cnt): + if ctx[i]: + expected = { + 'target': (2+i*2, 3+i*2), + 'noise': (0, 1), + 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) + } + else: + expected = { + 'target': (0, 1), + 'empty': range(2, 2+2*ctx_cnt) + } + + _send_traffic_check(cfg, ports[i], f"context {i}", expected) + + # Use queues 0 and 1 for normal traffic + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + for i in range(ctx_cnt): + ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2") + ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")) + + ports.append(rand_port()) + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")) + + check_traffic() + + # Remove middle context + remove_ctx(ctx_cnt // 2) + check_traffic() + + # Remove first context + remove_ctx(0) + check_traffic() + + # Remove last context + remove_ctx(-1) + check_traffic() + + +def test_rss_context_overlap(cfg, other_ctx=0): + """ + Test contexts overlapping with each other. + Use 4 queues for the main context, but only queues 2 and 3 for context 1. + """ + + require_ntuple(cfg) + if other_ctx: + require_context_cnt(cfg, 2) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + if other_ctx == 0: + ethtool(f"-X {cfg.ifname} equal 4") + defer(ethtool, f"-X {cfg.ifname} default") + else: + other_ctx = ethtool_create(cfg, "-X", "context new") + ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4") + defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete") + + ctx_id = ethtool_create(cfg, "-X", "context new") + ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + port = rand_port() + if other_ctx: + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {other_ctx}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + # Test the main context + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts)) + ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts)) + ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts)) + if other_ctx == 0: + ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) + + # Now create a rule for context 1 and make sure traffic goes to a subset + if other_ctx: + ntuple.exec() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + directed = sum(cnts[2:4]) + ksft_lt(sum(cnts[ :2]), directed / 2, "traffic on main context: " + str(cnts)) + ksft_ge(directed, 20000, "traffic on extra context: " + str(cnts)) + if other_ctx == 0: + ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) + + +def test_rss_context_overlap2(cfg): + test_rss_context_overlap(cfg, True) + + +def test_flow_add_context_missing(cfg): + """ + Test that we are not allowed to add a rule pointing to an RSS context + which was never created. + """ + + require_ntuple(cfg) + + # Find a context which doesn't exist + for ctx_id in range(1, 100): + try: + get_rss(cfg, context=ctx_id) + except CmdExitFailure: + break + + with ksft_raises(CmdExitFailure) as cm: + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port 1234 context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ethtool(f"-N {cfg.ifname} delete {ntuple_id}") + if cm.exception: + ksft_in('Invalid argument', cm.exception.cmd.stderr) + + +def test_delete_rss_context_busy(cfg): + """ + Test that deletion returns -EBUSY when an rss context is being used + by an ntuple filter. + """ + + require_ntuple(cfg) + + # create additional rss context + ctx_id = ethtool_create(cfg, "-X", "context new") + ctx_deleter = defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # utilize context from ntuple filter + port = rand_port() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + # attempt to delete in-use context + try: + ctx_deleter.exec_only() + ctx_deleter.cancel() + raise KsftFailEx(f"deleted context {ctx_id} used by rule {ntuple_id}") + except CmdExitFailure: + pass + + +def test_rss_ntuple_addition(cfg): + """ + Test that the queue offset (ring_cookie) of an ntuple rule is added + to the queue number read from the indirection table. + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + # Use queue 0 for normal traffic + ethtool(f"-X {cfg.ifname} equal 1") + defer(ethtool, f"-X {cfg.ifname} default") + + # create additional rss context + ctx_id = ethtool_create(cfg, "-X", "context new equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # utilize context from ntuple filter + port = rand_port() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 2" + try: + ntuple_id = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + raise KsftSkipEx("Ntuple filter with RSS and nonzero action not supported") + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + _send_traffic_check(cfg, port, f"context {ctx_id}", { 'target': (2, 3), + 'empty' : (1,), + 'noise' : (0,) }) + + +def test_rss_default_context_rule(cfg): + """ + Allocate a port, direct this port to context 0, then create a new RSS + context and steer all TCP traffic to it (context 1). Verify that: + * Traffic to the specific port continues to use queues of the main + context (0/1). + * Traffic to any other TCP port is redirected to the new context + (queues 2/3). + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except Exception as exc: + raise KsftSkipEx("Not enough queues for the test") from exc + + # Use queues 0 and 1 for the main context + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + # Create a new RSS context that uses queues 2 and 3 + ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # Generic low-priority rule: redirect all TCP traffic to the new context. + # Give it an explicit higher location number (lower priority). + flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1" + ethtool(f"-N {cfg.ifname} {flow_generic}") + defer(ethtool, f"-N {cfg.ifname} delete 1") + + # Specific high-priority rule for a random port that should stay on context 0. + # Assign loc 0 so it is evaluated before the generic rule. + port_main = rand_port() + flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0" + ethtool(f"-N {cfg.ifname} {flow_main}") + defer(ethtool, f"-N {cfg.ifname} delete 0") + + _ntuple_rule_check(cfg, 1, ctx_id) + + # Verify that traffic matching the specific rule still goes to queues 0/1 + _send_traffic_check(cfg, port_main, "context 0", + { 'target': (0, 1), + 'empty' : (2, 3) }) + + # And that traffic for any other port is steered to the new context + port_other = rand_port() + _send_traffic_check(cfg, port_other, f"context {ctx_id}", + { 'target': (2, 3), + 'noise' : (0, 1) }) + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.context_cnt = None + cfg.ethnl = EthtoolFamily() + cfg.netdevnl = NetdevFamily() + + ksft_run([test_rss_key_indir, test_rss_queue_reconfigure, + test_rss_resize, test_hitless_key_update, + test_rss_context, test_rss_context4, test_rss_context32, + test_rss_context_dump, test_rss_context_queue_reconfigure, + test_rss_context_overlap, test_rss_context_overlap2, + test_rss_context_out_of_order, test_rss_context4_create_with_cfg, + test_flow_add_context_missing, + test_delete_rss_context_busy, test_rss_ntuple_addition, + test_rss_default_context_rule], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py new file mode 100755 index 000000000000..6fa95fe27c47 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests for RSS hashing on IPv6 Flow Label. +""" + +import glob +import os +import socket +from lib.py import CmdExitFailure +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \ + ksft_not_in, ksft_raises, KsftSkipEx +from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port +from lib.py import NetDrvEpEnv + + +def _check_system(cfg): + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + + for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt", + f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]: + try: + with open(f, 'r') as fp: + setting = fp.read().strip() + # CPU mask will be zeros and commas + if setting.replace("0", "").replace(",", ""): + raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}") + except FileNotFoundError: + pass + + # 1 is the default, if someone changed it we probably shouldn"t mess with it + af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout + if af.strip() != "1": + raise KsftSkipEx("Remote does not have auto_flowlabels enabled") + + +def _ethtool_get_cfg(cfg, fl_type): + descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + "IPv6 Flow Label": "l", + } + + ret = "" + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + ret += converter[line] + return ret + + +def _traffic(cfg, one_sock, one_cpu): + local_port = rand_port(socket.SOCK_DGRAM) + remote_port = rand_port(socket.SOCK_DGRAM) + + sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + sock.bind(("", local_port)) + sock.connect((cfg.remote_addr_v["6"], 0)) + if one_sock: + send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \ + "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-" + else: + send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \ + f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done" + + cpus = set() + with bkg(send, shell=True, host=cfg.remote, exit_wait=True): + for _ in range(20): + fd_read_timeout(sock.fileno(), 1) + cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) + cpus.add(cpu) + + if one_cpu: + ksft_eq(len(cpus), 1, + f"{one_sock=} - expected one CPU, got traffic on: {cpus=}") + else: + ksft_ge(len(cpus), 2, + f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}") + + +def test_rss_flow_label(cfg): + """ + Test hashing on IPv6 flow label. Send traffic over a single socket + and over multiple sockets. Depend on the remote having auto-label + enabled so that it randomizes the label per socket. + """ + + cfg.require_ipver("6") + cfg.require_cmd("socat", remote=True) + _check_system(cfg) + + # Enable flow label hashing for UDP6 + initial = _ethtool_get_cfg(cfg, "udp6") + no_lbl = initial.replace("l", "") + if "l" not in initial: + try: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}") + except CmdExitFailure as exc: + raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc + + defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _traffic(cfg, one_sock=True, one_cpu=True) + _traffic(cfg, one_sock=False, one_cpu=False) + + # Disable it, we should see no hashing (reset was already defer()ed) + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}") + + _traffic(cfg, one_sock=False, one_cpu=True) + + +def _check_v4_flow_types(cfg): + for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]: + try: + cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + ksft_not_in("Flow Label", cur, + comment=f"{fl_type=} has Flow Label:" + cur) + except CmdExitFailure: + # Probably does not support this flow type + pass + + +def test_rss_flow_label_6only(cfg): + """ + Test interactions with IPv4 flow types. It should not be possible to set + IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also + not appear in the IPv4 "current config". + """ + + with ksft_raises(CmdExitFailure) as cm: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl") + ksft_in("Invalid argument", cm.exception.cmd.stderr) + + _check_v4_flow_types(cfg) + + # Try to enable Flow Labels and check again, in case it leaks thru + initial = _ethtool_get_cfg(cfg, "udp6") + changed = initial.replace("l", "") if "l" in initial else initial + "l" + + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}") + restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _check_v4_flow_types(cfg) + restore.exec() + _check_v4_flow_types(cfg) + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + ksft_run([test_rss_flow_label, + test_rss_flow_label_6only], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py new file mode 100755 index 000000000000..72880e388478 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import multiprocessing +import socket +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout +from lib.py import NetDrvEpEnv +from lib.py import EthtoolFamily, NetdevFamily +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import rand_port + + +def traffic(cfg, local_port, remote_port, ipver): + af_inet = socket.AF_INET if ipver == "4" else socket.AF_INET6 + sock = socket.socket(af_inet, socket.SOCK_DGRAM) + sock.bind(("", local_port)) + sock.connect((cfg.remote_addr_v[ipver], remote_port)) + tgt = f"{ipver}:[{cfg.addr_v[ipver]}]:{local_port},sourceport={remote_port}" + cmd("echo a | socat - UDP" + tgt, host=cfg.remote) + fd_read_timeout(sock.fileno(), 5) + return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) + + +def test_rss_input_xfrm(cfg, ipver): + """ + Test symmetric input_xfrm. + If symmetric RSS hash is configured, send traffic twice, swapping the + src/dst UDP ports, and verify that the same queue is receiving the traffic + in both cases (IPs are constant). + """ + + if multiprocessing.cpu_count() < 2: + raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash") + + cfg.require_cmd("socat", local=False, remote=True) + + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + + rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}) + input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {}))) + + # Check for symmetric xor/or-xor + if not input_xfrm: + raise KsftSkipEx("Symmetric RSS hash not requested") + + cpus = set() + successful = 0 + for _ in range(100): + try: + port1 = rand_port(socket.SOCK_DGRAM) + port2 = rand_port(socket.SOCK_DGRAM) + cpu1 = traffic(cfg, port1, port2, ipver) + cpu2 = traffic(cfg, port2, port1, ipver) + cpus.update([cpu1, cpu2]) + ksft_eq( + cpu1, cpu2, comment=f"Received traffic on different cpus with ports ({port1 = }, {port2 = }) while symmetric hash is configured") + + successful += 1 + if successful == 10: + break + except: + continue + else: + raise KsftFailEx("Failed to run traffic") + + ksft_ge(len(cpus), 2, + comment=f"Received traffic on less than two cpus {cpus = }") + + +def test_rss_input_xfrm_ipv4(cfg): + cfg.require_ipver("4") + test_rss_input_xfrm(cfg, "4") + + +def test_rss_input_xfrm_ipv6(cfg): + cfg.require_ipver("6") + test_rss_input_xfrm(cfg, "6") + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netdevnl = NetdevFamily() + + ksft_run([test_rss_input_xfrm_ipv4, test_rss_input_xfrm_ipv6], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/settings b/tools/testing/selftests/drivers/net/hw/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c new file mode 100644 index 000000000000..d23b3b0c20a3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Toeplitz test + * + * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3 + * 2. Compute the rx_hash in software based on the packet contents + * 3. Compare the two + * + * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given. + * + * If '-C $rx_irq_cpu_list' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the rxqueue that RSS would select based on this rx_hash + * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq + * 7. Compare the cpus from 4 and 6 + * + * Else if '-r $rps_bitmap' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap + * 6. Compare the cpus from 4 and 5 + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <net/if.h> +#include <netdb.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysinfo.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#include <ynl.h> +#include "ethtool-user.h" + +#include "kselftest.h" +#include "../../../net/lib/ksft.h" + +#define TOEPLITZ_KEY_MIN_LEN 40 +#define TOEPLITZ_KEY_MAX_LEN 60 + +#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */ +#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN) +#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN) + +#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2)) + +#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */ +#define RSS_MAX_INDIR (1 << 16) + +#define RPS_MAX_CPUS 16UL /* must be a power of 2 */ + +/* configuration options (cmdline arguments) */ +static uint16_t cfg_dport = 8000; +static int cfg_family = AF_INET6; +static char *cfg_ifname = "eth0"; +static int cfg_num_queues; +static int cfg_num_rps_cpus; +static bool cfg_sink; +static int cfg_type = SOCK_STREAM; +static int cfg_timeout_msec = 1000; +static bool cfg_verbose; + +/* global vars */ +static int num_cpus; +static int ring_block_nr; +static int ring_block_sz; + +/* stats */ +static int frames_received; +static int frames_nohash; +static int frames_error; + +#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0) + +/* tpacket ring */ +struct ring_state { + int fd; + char *mmap; + int idx; + int cpu; +}; + +static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */ +static int rps_silo_to_cpu[RPS_MAX_CPUS]; +static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN]; +static unsigned int rss_indir_tbl[RSS_MAX_INDIR]; +static unsigned int rss_indir_tbl_size; +static struct ring_state rings[RSS_MAX_CPUS]; + +static inline uint32_t toeplitz(const unsigned char *four_tuple, + const unsigned char *key) +{ + int i, bit, ret = 0; + uint32_t key32; + + key32 = ntohl(*((uint32_t *)key)); + key += 4; + + for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) { + for (bit = 7; bit >= 0; bit--) { + if (four_tuple[i] & (1 << bit)) + ret ^= key32; + + key32 <<= 1; + key32 |= !!(key[0] & (1 << bit)); + } + key++; + } + + return ret; +} + +/* Compare computed cpu with arrival cpu from packet_fanout_cpu */ +static void verify_rss(uint32_t rx_hash, int cpu) +{ + int queue; + + if (rss_indir_tbl_size) + queue = rss_indir_tbl[rx_hash % rss_indir_tbl_size]; + else + queue = rx_hash % cfg_num_queues; + + log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]); + if (rx_irq_cpus[queue] != cpu) { + log_verbose(". error: rss cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void verify_rps(uint64_t rx_hash, int cpu) +{ + int silo = (rx_hash * cfg_num_rps_cpus) >> 32; + + log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]); + if (rps_silo_to_cpu[silo] != cpu) { + log_verbose(". error: rps cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void log_rxhash(int cpu, uint32_t rx_hash, + const char *addrs, int addr_len) +{ + char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN]; + uint16_t *ports; + + if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) || + !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr))) + error(1, 0, "address parse error"); + + ports = (void *)addrs + (addr_len * 2); + log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]", + cpu, rx_hash, saddr, daddr, + ntohs(ports[0]), ntohs(ports[1])); +} + +/* Compare computed rxhash with rxhash received from tpacket_v3 */ +static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu) +{ + unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0}; + uint32_t rx_hash_sw; + const char *addrs; + int addr_len; + + if (cfg_family == AF_INET) { + addr_len = sizeof(struct in_addr); + addrs = pkt + offsetof(struct iphdr, saddr); + } else { + addr_len = sizeof(struct in6_addr); + addrs = pkt + offsetof(struct ip6_hdr, ip6_src); + } + + memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2)); + rx_hash_sw = toeplitz(four_tuple, toeplitz_key); + + if (cfg_verbose) + log_rxhash(cpu, rx_hash, addrs, addr_len); + + if (rx_hash != rx_hash_sw) { + log_verbose(" != expected 0x%x\n", rx_hash_sw); + frames_error++; + return; + } + + log_verbose(" OK"); + if (cfg_num_queues) + verify_rss(rx_hash, cpu); + else if (cfg_num_rps_cpus) + verify_rps(rx_hash, cpu); + log_verbose("\n"); +} + +static char *recv_frame(const struct ring_state *ring, char *frame) +{ + struct tpacket3_hdr *hdr = (void *)frame; + + if (hdr->hv1.tp_rxhash) + verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash, + ring->cpu); + else + frames_nohash++; + + return frame + hdr->tp_next_offset; +} + +/* A single TPACKET_V3 block can hold multiple frames */ +static bool recv_block(struct ring_state *ring) +{ + struct tpacket_block_desc *block; + char *frame; + int i; + + block = (void *)(ring->mmap + ring->idx * ring_block_sz); + if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) + return false; + + frame = (char *)block; + frame += block->hdr.bh1.offset_to_first_pkt; + + for (i = 0; i < block->hdr.bh1.num_pkts; i++) { + frame = recv_frame(ring, frame); + frames_received++; + } + + block->hdr.bh1.block_status = TP_STATUS_KERNEL; + ring->idx = (ring->idx + 1) % ring_block_nr; + + return true; +} + +/* simple test: sleep once unconditionally and then process all rings */ +static void process_rings(void) +{ + int i; + + usleep(1000 * cfg_timeout_msec); + + for (i = 0; i < num_cpus; i++) + do {} while (recv_block(&rings[i])); + + fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", + frames_received - frames_nohash - frames_error, + frames_nohash, frames_error); +} + +static char *setup_ring(int fd) +{ + struct tpacket_req3 req3 = {0}; + void *ring; + + req3.tp_retire_blk_tov = cfg_timeout_msec / 8; + req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; + + req3.tp_frame_size = 2048; + req3.tp_frame_nr = 1 << 10; + req3.tp_block_nr = 16; + + req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; + req3.tp_block_size /= req3.tp_block_nr; + + if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3))) + error(1, errno, "setsockopt PACKET_RX_RING"); + + ring_block_sz = req3.tp_block_size; + ring_block_nr = req3.tp_block_nr; + + ring = mmap(0, req3.tp_block_size * req3.tp_block_nr, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0); + if (ring == MAP_FAILED) + error(1, 0, "mmap failed"); + + return ring; +} + +static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = ARRAY_SIZE(filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +/* filter on transport protocol and destination port */ +static void set_filter(int fd) +{ + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ + uint8_t proto; + + proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP; + if (cfg_family == AF_INET) + __set_filter(fd, offsetof(struct iphdr, protocol), proto, + sizeof(struct iphdr) + off_dport); + else + __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto, + sizeof(struct ip6_hdr) + off_dport); +} + +/* drop everything: used temporarily during setup */ +static void set_filter_null(int fd) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET + BPF_K, 0), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = ARRAY_SIZE(filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +static int create_ring(char **ring) +{ + struct fanout_args args = { + .id = 1, + .type_flags = PACKET_FANOUT_CPU, + .max_num_members = RSS_MAX_CPUS + }; + struct sockaddr_ll ll = { 0 }; + int fd, val; + + fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket creation failed"); + + val = TPACKET_V3; + if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val))) + error(1, errno, "setsockopt PACKET_VERSION"); + *ring = setup_ring(fd); + + /* block packets until all rings are added to the fanout group: + * else packets can arrive during setup and get misclassified + */ + set_filter_null(fd); + + ll.sll_family = AF_PACKET; + ll.sll_ifindex = if_nametoindex(cfg_ifname); + ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) : + htons(ETH_P_IPV6); + if (bind(fd, (void *)&ll, sizeof(ll))) + error(1, errno, "bind"); + + /* must come after bind: verifies all programs in group match */ + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) { + /* on failure, retry using old API if that is sufficient: + * it has a hard limit of 256 sockets, so only try if + * (a) only testing rxhash, not RSS or (b) <= 256 cpus. + * in this API, the third argument is left implicit. + */ + if (cfg_num_queues || num_cpus > 256 || + setsockopt(fd, SOL_PACKET, PACKET_FANOUT, + &args, sizeof(uint32_t))) + error(1, errno, "setsockopt PACKET_FANOUT cpu"); + } + + return fd; +} + +/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */ +static int setup_sink(void) +{ + int fd, val; + + fd = socket(cfg_family, cfg_type, 0); + if (fd == -1) + error(1, errno, "socket %d.%d", cfg_family, cfg_type); + + val = 1 << 20; + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val))) + error(1, errno, "setsockopt rcvbuf"); + + return fd; +} + +static void setup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + rings[i].cpu = i; + rings[i].fd = create_ring(&rings[i].mmap); + } + + /* accept packets once all rings in the fanout group are up */ + for (i = 0; i < num_cpus; i++) + set_filter(rings[i].fd); +} + +static void cleanup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz)) + error(1, errno, "munmap"); + if (close(rings[i].fd)) + error(1, errno, "close"); + } +} + +static void parse_cpulist(const char *arg) +{ + do { + rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10); + + arg = strchr(arg, ','); + if (!arg) + break; + arg++; // skip ',' + } while (1); +} + +static void show_cpulist(void) +{ + int i; + + for (i = 0; i < cfg_num_queues; i++) + fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]); +} + +static void show_silos(void) +{ + int i; + + for (i = 0; i < cfg_num_rps_cpus; i++) + fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]); +} + +static void parse_toeplitz_key(const char *str, int slen, unsigned char *key) +{ + int i, ret, off; + + if (slen < TOEPLITZ_STR_MIN_LEN || + slen > TOEPLITZ_STR_MAX_LEN + 1) + error(1, 0, "invalid toeplitz key"); + + for (i = 0, off = 0; off < slen; i++, off += 3) { + ret = sscanf(str + off, "%hhx", &key[i]); + if (ret != 1) + error(1, 0, "key parse error at %d off %d len %d", + i, off, slen); + } +} + +static void parse_rps_bitmap(const char *arg) +{ + unsigned long bitmap; + int i; + + bitmap = strtoul(arg, NULL, 0); + + if (bitmap & ~(RPS_MAX_CPUS - 1)) + error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu", + bitmap, RPS_MAX_CPUS - 1); + + for (i = 0; i < RPS_MAX_CPUS; i++) + if (bitmap & 1UL << i) + rps_silo_to_cpu[cfg_num_rps_cpus++] = i; +} + +static void read_rss_dev_info_ynl(void) +{ + struct ethtool_rss_get_req *req; + struct ethtool_rss_get_rsp *rsp; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_ethtool_family, NULL); + if (!ys) + error(1, errno, "ynl_sock_create failed"); + + req = ethtool_rss_get_req_alloc(); + if (!req) + error(1, errno, "ethtool_rss_get_req_alloc failed"); + + ethtool_rss_get_req_set_header_dev_name(req, cfg_ifname); + + rsp = ethtool_rss_get(ys, req); + if (!rsp) + error(1, ys->err.code, "YNL: %s", ys->err.msg); + + if (!rsp->_len.hkey) + error(1, 0, "RSS key not available for %s", cfg_ifname); + + if (rsp->_len.hkey < TOEPLITZ_KEY_MIN_LEN || + rsp->_len.hkey > TOEPLITZ_KEY_MAX_LEN) + error(1, 0, "RSS key length %u out of bounds [%u, %u]", + rsp->_len.hkey, TOEPLITZ_KEY_MIN_LEN, + TOEPLITZ_KEY_MAX_LEN); + + memcpy(toeplitz_key, rsp->hkey, rsp->_len.hkey); + + if (rsp->_count.indir > RSS_MAX_INDIR) + error(1, 0, "RSS indirection table too large (%u > %u)", + rsp->_count.indir, RSS_MAX_INDIR); + + /* If indir table not available we'll fallback to simple modulo math */ + if (rsp->_count.indir) { + memcpy(rss_indir_tbl, rsp->indir, + rsp->_count.indir * sizeof(rss_indir_tbl[0])); + rss_indir_tbl_size = rsp->_count.indir; + + log_verbose("RSS indirection table size: %u\n", + rss_indir_tbl_size); + } + + ethtool_rss_get_rsp_free(rsp); + ethtool_rss_get_req_free(req); + ynl_sock_destroy(ys); +} + +static void parse_opts(int argc, char **argv) +{ + static struct option long_options[] = { + {"dport", required_argument, 0, 'd'}, + {"cpus", required_argument, 0, 'C'}, + {"key", required_argument, 0, 'k'}, + {"iface", required_argument, 0, 'i'}, + {"ipv4", no_argument, 0, '4'}, + {"ipv6", no_argument, 0, '6'}, + {"sink", no_argument, 0, 's'}, + {"tcp", no_argument, 0, 't'}, + {"timeout", required_argument, 0, 'T'}, + {"udp", no_argument, 0, 'u'}, + {"verbose", no_argument, 0, 'v'}, + {"rps", required_argument, 0, 'r'}, + {0, 0, 0, 0} + }; + bool have_toeplitz = false; + int index, c; + + while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) { + switch (c) { + case '4': + cfg_family = AF_INET; + break; + case '6': + cfg_family = AF_INET6; + break; + case 'C': + parse_cpulist(optarg); + break; + case 'd': + cfg_dport = strtol(optarg, NULL, 0); + break; + case 'i': + cfg_ifname = optarg; + break; + case 'k': + parse_toeplitz_key(optarg, strlen(optarg), + toeplitz_key); + have_toeplitz = true; + break; + case 'r': + parse_rps_bitmap(optarg); + break; + case 's': + cfg_sink = true; + break; + case 't': + cfg_type = SOCK_STREAM; + break; + case 'T': + cfg_timeout_msec = strtol(optarg, NULL, 0); + break; + case 'u': + cfg_type = SOCK_DGRAM; + break; + case 'v': + cfg_verbose = true; + break; + + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!have_toeplitz) + read_rss_dev_info_ynl(); + + num_cpus = get_nprocs(); + if (num_cpus > RSS_MAX_CPUS) + error(1, 0, "increase RSS_MAX_CPUS"); + + if (cfg_num_queues && cfg_num_rps_cpus) + error(1, 0, + "Can't supply both RSS cpus ('-C') and RPS map ('-r')"); + if (cfg_verbose) { + show_cpulist(); + show_silos(); + } +} + +int main(int argc, char **argv) +{ + const int min_tests = 10; + int fd_sink = -1; + + parse_opts(argc, argv); + + if (cfg_sink) + fd_sink = setup_sink(); + + setup_rings(); + + /* Signal to test framework that we're ready to receive */ + ksft_ready(); + + process_rings(); + cleanup_rings(); + + if (cfg_sink && close(fd_sink)) + error(1, errno, "close sink"); + + if (frames_received - frames_nohash < min_tests) + error(1, 0, "too few frames for verification"); + + return frames_error; +} diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.py b/tools/testing/selftests/drivers/net/hw/toeplitz.py new file mode 100755 index 000000000000..d2db5ee9e358 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/toeplitz.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Toeplitz Rx hashing test: + - rxhash (the hash value calculation itself); + - RSS mapping from rxhash to rx queue; + - RPS mapping from rxhash to cpu. +""" + +import glob +import os +import socket +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import NetDrvEpEnv, EthtoolFamily, NetdevFamily +from lib.py import cmd, bkg, rand_port, defer +from lib.py import ksft_in +from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, KsftFailEx + +# "define" for the ID of the Toeplitz hash function +ETH_RSS_HASH_TOP = 1 + + +def _check_rps_and_rfs_not_configured(cfg): + """Verify that RPS is not already configured.""" + + for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"): + with open(rps_file, "r", encoding="utf-8") as fp: + val = fp.read().strip() + if set(val) - {"0", ","}: + raise KsftSkipEx(f"RPS already configured on {rps_file}: {val}") + + rfs_file = "/proc/sys/net/core/rps_sock_flow_entries" + with open(rfs_file, "r", encoding="utf-8") as fp: + val = fp.read().strip() + if val != "0": + raise KsftSkipEx(f"RFS already configured {rfs_file}: {val}") + + +def _get_cpu_for_irq(irq): + with open(f"/proc/irq/{irq}/smp_affinity_list", "r", + encoding="utf-8") as fp: + data = fp.read().strip() + if "," in data or "-" in data: + raise KsftFailEx(f"IRQ{irq} is not mapped to a single core: {data}") + return int(data) + + +def _get_irq_cpus(cfg): + """ + Read the list of IRQs for the device Rx queues. + """ + queues = cfg.netnl.queue_get({"ifindex": cfg.ifindex}, dump=True) + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + + # Remap into ID-based dicts + napis = {n["id"]: n for n in napis} + queues = {f"{q['type']}{q['id']}": q for q in queues} + + cpus = [] + for rx in range(9999): + name = f"rx{rx}" + if name not in queues: + break + cpus.append(_get_cpu_for_irq(napis[queues[name]["napi-id"]]["irq"])) + + return cpus + + +def _get_unused_cpus(cfg, count=2): + """ + Get CPUs that are not used by Rx queues. + Returns a list of at least 'count' CPU numbers. + """ + + # Get CPUs used by Rx queues + rx_cpus = set(_get_irq_cpus(cfg)) + + # Get total number of CPUs + num_cpus = os.cpu_count() + + # Find unused CPUs + unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus] + + if len(unused_cpus) < count: + raise KsftSkipEx(f"Need at {count} CPUs not used by Rx queues, found {len(unused_cpus)}") + + return unused_cpus[:count] + + +def _configure_rps(cfg, rps_cpus): + """Configure RPS for all Rx queues.""" + + mask = 0 + for cpu in rps_cpus: + mask |= (1 << cpu) + mask = hex(mask)[2:] + + # Set RPS bitmap for all rx queues + for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"): + with open(rps_file, "w", encoding="utf-8") as fp: + fp.write(mask) + + return mask + + +def _send_traffic(cfg, proto_flag, ipver, port): + """Send 20 packets of requested type.""" + + # Determine protocol and IP version for socat + if proto_flag == "-u": + proto = "UDP" + else: + proto = "TCP" + + baddr = f"[{cfg.addr_v['6']}]" if ipver == "6" else cfg.addr_v["4"] + + # Run socat in a loop to send traffic periodically + # Use sh -c with a loop similar to toeplitz_client.sh + socat_cmd = f""" + for i in `seq 20`; do + echo "msg $i" | socat -{ipver} -t 0.1 - {proto}:{baddr}:{port}; + sleep 0.001; + done + """ + + cmd(socat_cmd, shell=True, host=cfg.remote) + + +def _test_variants(): + for grp in ["", "rss", "rps"]: + for l4 in ["tcp", "udp"]: + for l3 in ["4", "6"]: + name = f"{l4}_ipv{l3}" + if grp: + name = f"{grp}_{name}" + yield KsftNamedVariant(name, "-" + l4[0], l3, grp) + + +@ksft_variants(_test_variants()) +def test(cfg, proto_flag, ipver, grp): + """Run a single toeplitz test.""" + + cfg.require_ipver(ipver) + + # Check that rxhash is enabled + ksft_in("receive-hashing: on", cmd(f"ethtool -k {cfg.ifname}").stdout) + + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # Make sure NIC is configured to use Toeplitz hash, and no key xfrm. + if rss.get('hfunc') != ETH_RSS_HASH_TOP or rss.get('input-xfrm'): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hfunc": ETH_RSS_HASH_TOP, + "input-xfrm": {}}) + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex}, + "hfunc": rss.get('hfunc'), + "input-xfrm": rss.get('input-xfrm', {}) + }) + + port = rand_port(socket.SOCK_DGRAM) + + toeplitz_path = cfg.test_dir / "toeplitz" + rx_cmd = [ + str(toeplitz_path), + "-" + ipver, + proto_flag, + "-d", str(port), + "-i", cfg.ifname, + "-T", "4000", + "-s", + "-v" + ] + + if grp: + _check_rps_and_rfs_not_configured(cfg) + if grp == "rss": + irq_cpus = ",".join([str(x) for x in _get_irq_cpus(cfg)]) + rx_cmd += ["-C", irq_cpus] + ksft_pr(f"RSS using CPUs: {irq_cpus}") + elif grp == "rps": + # Get CPUs not used by Rx queues and configure them for RPS + rps_cpus = _get_unused_cpus(cfg, count=2) + rps_mask = _configure_rps(cfg, rps_cpus) + defer(_configure_rps, cfg, []) + rx_cmd += ["-r", rps_mask] + ksft_pr(f"RPS using CPUs: {rps_cpus}, mask: {rps_mask}") + + # Run rx in background, it will exit once it has seen enough packets + with bkg(" ".join(rx_cmd), ksft_ready=True, exit_wait=True) as rx_proc: + while rx_proc.proc.poll() is None: + _send_traffic(cfg, proto_flag, ipver, port) + + # Check rx result + ksft_pr("Receiver output:") + ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# ')) + if rx_proc.stderr: + ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# ')) + + +def main() -> None: + """Ksft boilerplate main.""" + + with NetDrvEpEnv(__file__) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + ksft_run(cases=[test], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py new file mode 100755 index 000000000000..0998e68ebaf0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Run the tools/testing/selftests/net/csum testsuite.""" + +import fcntl +import socket +import struct +import termios +import time + +from lib.py import ksft_pr, ksft_run, ksft_exit, KsftSkipEx, KsftXfailEx +from lib.py import ksft_eq, ksft_ge, ksft_lt +from lib.py import EthtoolFamily, NetdevFamily, NetDrvEpEnv +from lib.py import bkg, cmd, defer, ethtool, ip, rand_port, wait_port_listen + + +def sock_wait_drain(sock, max_wait=1000): + """Wait for all pending write data on the socket to get ACKed.""" + for _ in range(max_wait): + one = b'\0' * 4 + outq = fcntl.ioctl(sock.fileno(), termios.TIOCOUTQ, one) + outq = struct.unpack("I", outq)[0] + if outq == 0: + break + time.sleep(0.01) + ksft_eq(outq, 0) + + +def tcp_sock_get_retrans(sock): + """Get the number of retransmissions for the TCP socket.""" + info = sock.getsockopt(socket.SOL_TCP, socket.TCP_INFO, 512) + return struct.unpack("I", info[100:104])[0] + + +def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): + cfg.require_cmd("socat", local=False, remote=True) + + port = rand_port() + listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof" + + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: + wait_port_listen(port, host=cfg.remote) + + if ipver == "4": + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((remote_v4, port)) + else: + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + sock.connect((remote_v6, port)) + + # Small send to make sure the connection is working. + sock.send("ping".encode()) + sock_wait_drain(sock) + + # Send 4MB of data, record the LSO packet count. + qstat_old = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + buf = b"0" * 1024 * 1024 * 4 + sock.send(buf) + sock_wait_drain(sock) + qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + # Check that at least 90% of the data was sent as LSO packets. + # System noise may cause false negatives. Also header overheads + # will add up to 5% of extra packes... The check is best effort. + total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"] + total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"] + + # Make sure we have order of magnitude more LSO packets than + # retransmits, in case TCP retransmitted all the LSO packets. + ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4) + sock.close() + + if should_lso: + if cfg.have_stat_super_count: + ksft_ge(qstat_new['tx-hw-gso-packets'] - + qstat_old['tx-hw-gso-packets'], + total_lso_super, + comment="Number of LSO super-packets with LSO enabled") + if cfg.have_stat_wire_count: + ksft_ge(qstat_new['tx-hw-gso-wire-packets'] - + qstat_old['tx-hw-gso-wire-packets'], + total_lso_wire, + comment="Number of LSO wire-packets with LSO enabled") + else: + if cfg.have_stat_super_count: + ksft_lt(qstat_new['tx-hw-gso-packets'] - + qstat_old['tx-hw-gso-packets'], + 15, comment="Number of LSO super-packets with LSO disabled") + if cfg.have_stat_wire_count: + ksft_lt(qstat_new['tx-hw-gso-wire-packets'] - + qstat_old['tx-hw-gso-wire-packets'], + 500, comment="Number of LSO wire-packets with LSO disabled") + + +def build_tunnel(cfg, outer_ipver, tun_info): + local_v4 = NetDrvEpEnv.nsim_v4_pfx + "1" + local_v6 = NetDrvEpEnv.nsim_v6_pfx + "1" + remote_v4 = NetDrvEpEnv.nsim_v4_pfx + "2" + remote_v6 = NetDrvEpEnv.nsim_v6_pfx + "2" + + local_addr = cfg.addr_v[outer_ipver] + remote_addr = cfg.remote_addr_v[outer_ipver] + + tun_type = tun_info[0] + tun_arg = tun_info[1] + ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}") + defer(ip, f"link del {tun_type}-ksft") + ip(f"link set dev {tun_type}-ksft up") + ip(f"addr add {local_v4}/24 dev {tun_type}-ksft") + ip(f"addr add {local_v6}/64 dev {tun_type}-ksft") + + ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {remote_addr} remote {local_addr} dev {cfg.remote_ifname}", + host=cfg.remote) + defer(ip, f"link del {tun_type}-ksft", host=cfg.remote) + ip(f"link set dev {tun_type}-ksft up", host=cfg.remote) + ip(f"addr add {remote_v4}/24 dev {tun_type}-ksft", host=cfg.remote) + ip(f"addr add {remote_v6}/64 dev {tun_type}-ksft", host=cfg.remote) + + return remote_v4, remote_v6 + + +def restore_wanted_features(cfg): + features_cmd = "" + for feature in cfg.hw_features: + setting = "on" if feature in cfg.wanted_features else "off" + features_cmd += f" {feature} {setting}" + try: + ethtool(f"-K {cfg.ifname} {features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure restoring wanted features: {e}") + + +def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): + """Construct specific tests from the common template.""" + def f(cfg): + cfg.require_ipver(outer_ipver) + defer(restore_wanted_features, cfg) + + if not cfg.have_stat_super_count and \ + not cfg.have_stat_wire_count: + raise KsftSkipEx(f"Device does not support LSO queue stats") + + if feature not in cfg.hw_features: + raise KsftSkipEx(f"Device does not support {feature}") + + ipver = outer_ipver + if tun: + remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun) + ipver = inner_ipver + else: + remote_v4 = cfg.remote_addr_v["4"] + remote_v6 = cfg.remote_addr_v["6"] + + # First test without the feature enabled. + ethtool(f"-K {cfg.ifname} {feature} off") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False) + + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") + if feature in cfg.partial_features: + ethtool(f"-K {cfg.ifname} tx-gso-partial on") + if ipver == "4": + ksft_pr("Testing with mangleid enabled") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") + + # Full feature enabled. + ethtool(f"-K {cfg.ifname} {feature} on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) + + f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver + return f + + +def query_nic_features(cfg) -> None: + """Query and cache the NIC features.""" + cfg.have_stat_super_count = False + cfg.have_stat_wire_count = False + + features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + + cfg.wanted_features = set() + for f in features["wanted"]["bits"]["bit"]: + cfg.wanted_features.add(f["name"]) + + cfg.hw_features = set() + hw_all_features_cmd = "" + for f in features["hw"]["bits"]["bit"]: + if f.get("value", False): + feature = f["name"] + cfg.hw_features.add(feature) + hw_all_features_cmd += f" {feature} on" + try: + ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure enabling all hw features: {e}") + ksft_pr("partial gso feature detection may be impacted") + + # Check which features are supported via GSO partial + cfg.partial_features = set() + if 'tx-gso-partial' in cfg.hw_features: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + + no_partial = set() + features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + for f in features["active"]["bits"]["bit"]: + no_partial.add(f["name"]) + cfg.partial_features = cfg.hw_features - no_partial + ethtool(f"-K {cfg.ifname} tx-gso-partial on") + + restore_wanted_features(cfg) + + stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True) + if stats: + if 'tx-hw-gso-packets' in stats[0]: + ksft_pr("Detected qstat for LSO super-packets") + cfg.have_stat_super_count = True + if 'tx-hw-gso-wire-packets' in stats[0]: + ksft_pr("Detected qstat for LSO wire-packets") + cfg.have_stat_wire_count = True + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + + query_nic_features(cfg) + + test_info = ( + # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions) + ("", "4", "tx-tcp-segmentation", None), + ("", "6", "tx-tcp6-segmentation", None), + ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))), + ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))), + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))), + ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))), + ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))), + ) + + cases = [] + for outer_ipver in ["4", "6"]: + for info in test_info: + # Skip if test which only works for a specific IP version + if info[1] and outer_ipver != info[1]: + continue + + if info[3]: + cases += [ + test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver) + for inner_ipver in info[3][2] + ] + else: + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver)) + + ksft_run(cases=cases, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py new file mode 100755 index 000000000000..d19d1d518208 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +# This is intended to be run on a virtio-net guest interface. +# The test binds the XDP socket to the interface without setting +# the fill ring to trigger delayed refill_work. This helps to +# make it easier to reproduce the deadlock when XDP program, +# XDP socket bind/unbind, rx ring resize race with refill_work on +# the buggy kernel. +# +# The Qemu command to setup virtio-net +# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no +# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on + +from lib.py import ksft_exit, ksft_run +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import NetDrvEnv +from lib.py import bkg, ip, cmd, ethtool +import time + +def _get_rx_ring_entries(cfg): + output = ethtool(f"-g {cfg.ifname}", json=True) + return output[0]["rx"] + +def setup_xsk(cfg, xdp_queue_id = 0) -> bkg: + # Probe for support + xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False) + if xdp.ret == 255: + raise KsftSkipEx('AF_XDP unsupported') + elif xdp.ret > 0: + raise KsftFailEx('unable to create AF_XDP socket') + + try: + return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \ + '{xdp_queue_id} -z', ksft_wait=3) + except: + raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \ + 'Please consider adding iommu_platform=on ' \ + 'when setting up virtio-net-pci') + +def check_xdp_bind(cfg): + with setup_xsk(cfg): + ip(f"link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o")) + ip(f"link set dev %s xdp off" % cfg.ifname) + +def check_rx_resize(cfg): + with setup_xsk(cfg): + rx_ring = _get_rx_ring_entries(cfg) + ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2)) + ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring)) + +def main(): + with NetDrvEnv(__file__, nsim_test=False) as cfg: + ksft_run([check_xdp_bind, check_rx_resize], + args=(cfg, )) + ksft_exit() + +if __name__ == "__main__": + main() |
