diff options
Diffstat (limited to 'tools/testing/selftests/drivers/net/netpoll_basic.py')
-rwxr-xr-x | tools/testing/selftests/drivers/net/netpoll_basic.py | 396 |
1 files changed, 396 insertions, 0 deletions
diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py new file mode 100755 index 000000000000..408bd54d6779 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netpoll_basic.py @@ -0,0 +1,396 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Author: Breno Leitao <leitao@debian.org> +""" + This test aims to evaluate the netpoll polling mechanism (as in + netpoll_poll_dev()). It presents a complex scenario where the network + attempts to send a packet but fails, prompting it to poll the NIC from within + the netpoll TX side. + + This has been a crucial path in netpoll that was previously untested. Jakub + suggested using a single RX/TX queue, pushing traffic to the NIC, and then + sending netpoll messages (via netconsole) to trigger the poll. + + In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If + so, the test passes, otherwise it will be skipped. This test is very dependent on + the driver and environment, given we are trying to trigger a tricky scenario. +""" + +import errno +import logging +import os +import random +import string +import threading +import time +from typing import Optional + +from lib.py import ( + bpftrace, + CmdExitFailure, + defer, + ethtool, + GenerateTraffic, + ksft_exit, + ksft_pr, + ksft_run, + KsftFailEx, + KsftSkipEx, + NetDrvEpEnv, + KsftXfailEx, +) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole" +NETCONS_REMOTE_PORT: int = 6666 +NETCONS_LOCAL_PORT: int = 1514 + +# Max number of netcons messages to send. Each iteration will setup +# netconsole and send MAX_WRITES messages +ITERATIONS: int = 20 +# Number of writes to /dev/kmsg per iteration +MAX_WRITES: int = 40 +# MAPS contains the information coming from bpftrace it will have only one +# key: "hits", which tells the number of times netpoll_poll_dev() was called +MAPS: dict[str, int] = {} +# Thread to run bpftrace in parallel +BPF_THREAD: Optional[threading.Thread] = None +# Time bpftrace will be running in parallel. +BPFTRACE_TIMEOUT: int = 10 + + +def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]: + """ + Read the ringsize using ethtool. This will be used to restore it after the test + """ + try: + ethtool_result = ethtool(f"-g {interface_name}", json=True)[0] + rxs = ethtool_result["rx"] + txs = ethtool_result["tx"] + except (KeyError, IndexError) as exception: + raise KsftSkipEx( + f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them." + ) from exception + + return rxs, txs + + +def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool: + """Try to the number of RX and TX ringsize.""" + rxs = ring_size[0] + txs = ring_size[1] + + logging.debug("Setting ring size to %d/%d", rxs, txs) + try: + ethtool(f"-G {interface_name} rx {rxs} tx {txs}") + except CmdExitFailure: + # This might fail on real device, retry with a higher value, + # worst case, keep it as it is. + return False + + return True + + +def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]: + """Read the number of RX, TX and combined queues using ethtool""" + + try: + ethtool_result = ethtool(f"-l {interface_name}", json=True)[0] + rxq = ethtool_result.get("rx", -1) + txq = ethtool_result.get("tx", -1) + combined = ethtool_result.get("combined", -1) + + except IndexError as exception: + raise KsftSkipEx( + f"Failed to read queues numbers: {exception}. Not going to mess with them." + ) from exception + + return rxq, txq, combined + + +def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None: + """Set the number of RX, TX and combined queues using ethtool""" + rxq, txq, combined = queues + + cmdline = f"-L {interface_name}" + + if rxq != -1: + cmdline += f" rx {rxq}" + if txq != -1: + cmdline += f" tx {txq}" + if combined != -1: + cmdline += f" combined {combined}" + + logging.debug("calling: ethtool %s", cmdline) + + try: + ethtool(cmdline) + except CmdExitFailure as exception: + raise KsftSkipEx( + f"Failed to configure RX/TX queues: {exception}. Ethtool not available?" + ) from exception + + +def netcons_generate_random_target_name() -> str: + """Generate a random target name starting with 'netcons'""" + random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8)) + return f"netcons_{random_suffix}" + + +def netcons_create_target( + config_data: dict[str, str], + target_name: str, +) -> None: + """Create a netconsole dynamic target against the interfaces""" + logging.debug("Using netconsole name: %s", target_name) + try: + os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True) + logging.debug( + "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name + ) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise KsftFailEx( + f"Failed to create netconsole target directory: {exception}" + ) from exception + + try: + for key, value in config_data.items(): + path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}" + logging.debug("Writing %s to %s", key, path) + with open(path, "w", encoding="utf-8") as file: + # Always convert to string to write to file + file.write(str(value)) + + # Read all configuration values for debugging purposes + for debug_key in config_data.keys(): + with open( + f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}", + "r", + encoding="utf-8", + ) as file: + content = file.read() + logging.debug( + "%s/%s/%s : %s", + NETCONSOLE_CONFIGFS_PATH, + target_name, + debug_key, + content.strip(), + ) + + except Exception as exception: + raise KsftFailEx( + f"Failed to configure netconsole target: {exception}" + ) from exception + + +def netcons_configure_target( + cfg: NetDrvEpEnv, interface_name: str, target_name: str +) -> None: + """Configure netconsole on the interface with the given target name""" + config_data = { + "extended": "1", + "dev_name": interface_name, + "local_port": NETCONS_LOCAL_PORT, + "remote_port": NETCONS_REMOTE_PORT, + "local_ip": cfg.addr, + "remote_ip": cfg.remote_addr, + "remote_mac": "00:00:00:00:00:00", # Not important for this test + "enabled": "1", + } + + netcons_create_target(config_data, target_name) + logging.debug( + "Created netconsole target: %s on interface %s", target_name, interface_name + ) + + +def netcons_delete_target(name: str) -> None: + """Delete a netconsole dynamic target""" + target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}" + try: + if os.path.exists(target_path): + os.rmdir(target_path) + except OSError as exception: + raise KsftFailEx( + f"Failed to delete netconsole target: {exception}" + ) from exception + + +def netcons_load_module() -> None: + """Try to load the netconsole module""" + os.system("modprobe netconsole") + + +def bpftrace_call() -> None: + """Call bpftrace to find how many times netpoll_poll_dev() is called. + Output is saved in the global variable `maps`""" + + # This is going to update the global variable, that will be seen by the + # main function + global MAPS # pylint: disable=W0603 + + # This will be passed to bpftrace as in bpftrace -e "expr" + expr = "kprobe:netpoll_poll_dev { @hits = count(); }" + + MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True) + logging.debug("BPFtrace output: %s", MAPS) + + +def bpftrace_start(): + """Start a thread to call `call_bpf` in a parallel thread""" + global BPF_THREAD # pylint: disable=W0603 + + BPF_THREAD = threading.Thread(target=bpftrace_call) + BPF_THREAD.start() + if not BPF_THREAD.is_alive(): + raise KsftSkipEx("BPFtrace thread is not alive. Skipping test") + + +def bpftrace_stop() -> None: + """Stop the bpftrace thread""" + if BPF_THREAD: + BPF_THREAD.join() + + +def bpftrace_any_hit(join: bool) -> bool: + """Check if netpoll_poll_dev() was called by checking the global variable `maps`""" + if not BPF_THREAD: + raise KsftFailEx("BPFtrace didn't start") + + if BPF_THREAD.is_alive(): + if join: + # Wait for bpftrace to finish + BPF_THREAD.join() + else: + # bpftrace is still running, so, we will not check the result yet + return False + + logging.debug("MAPS coming from bpftrace = %s", MAPS) + if "hits" not in MAPS.keys(): + raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}") + + logging.debug("Got a total of %d hits", MAPS["hits"]) + return MAPS["hits"] > 0 + + +def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: + """Print messages to the console, trying to trigger a netpoll poll""" + # Start bpftrace in parallel, so, it is watching + # netpoll_poll_dev() while we are sending netconsole messages + bpftrace_start() + defer(bpftrace_stop) + + do_netpoll_flush(cfg, ifname, target_name) + + if bpftrace_any_hit(join=True): + ksft_pr("netpoll_poll_dev() was called. Success") + return + + raise KsftXfailEx("netpoll_poll_dev() was not called during the test...") + + +def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: + """Print messages to the console, trying to trigger a netpoll poll""" + netcons_configure_target(cfg, ifname, target_name) + retry = 0 + + for i in range(int(ITERATIONS)): + if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False): + # bpftrace is done, stop sending messages + break + + msg = f"netcons test #{i}" + with open("/dev/kmsg", "w", encoding="utf-8") as kmsg: + for j in range(MAX_WRITES): + try: + kmsg.write(f"{msg}-{j}\n") + except OSError as exception: + # in some cases, kmsg can be busy, so, we will retry + time.sleep(1) + retry += 1 + if retry < 5: + logging.info("Failed to write to kmsg. Retrying") + # Just retry a few times + continue + raise KsftFailEx( + f"Failed to write to kmsg: {exception}" + ) from exception + + netcons_delete_target(target_name) + netcons_configure_target(cfg, ifname, target_name) + # If we sleep here, we will have a better chance of triggering + # This number is based on a few tests I ran while developing this test + time.sleep(0.4) + + +def configure_network(ifname: str) -> None: + """Configure ring size and queue numbers""" + + # Set defined queues to 1 to force congestion + prev_queues = ethtool_get_queues_cnt(ifname) + logging.debug("RX/TX/combined queues: %s", prev_queues) + # Only set the queues to 1 if they exists in the device. I.e, they are > 0 + ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues)) + defer(ethtool_set_queues_cnt, ifname, prev_queues) + + # Try to set the ring size to some low value. + # Do not fail if the hardware do not accepted desired values + prev_ring_size = ethtool_get_ringsize(ifname) + for size in [(1, 1), (128, 128), (256, 256)]: + if ethtool_set_ringsize(ifname, size): + # hardware accepted the desired ringsize + logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size) + break + defer(ethtool_set_ringsize, ifname, prev_ring_size) + + +def test_netpoll(cfg: NetDrvEpEnv) -> None: + """ + Test netpoll by sending traffic to the interface and then sending + netconsole messages to trigger a poll + """ + + ifname = cfg.ifname + configure_network(ifname) + target_name = netcons_generate_random_target_name() + traffic = None + + try: + traffic = GenerateTraffic(cfg) + do_netpoll_flush_monitored(cfg, ifname, target_name) + finally: + if traffic: + traffic.stop() + + # Revert RX/TX queues + netcons_delete_target(target_name) + + +def test_check_dependencies() -> None: + """Check if the dependencies are met""" + if not os.path.exists(NETCONSOLE_CONFIGFS_PATH): + raise KsftSkipEx( + f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301 + ) + + +def main() -> None: + """Main function to run the test""" + netcons_load_module() + test_check_dependencies() + with NetDrvEpEnv(__file__) as cfg: + ksft_run( + [test_netpoll], + args=(cfg,), + ) + ksft_exit() + + +if __name__ == "__main__": + main() |