diff options
Diffstat (limited to 'tools/testing/selftests/drivers/net/stats.py')
-rwxr-xr-x | tools/testing/selftests/drivers/net/stats.py | 144 |
1 files changed, 137 insertions, 7 deletions
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 7a7b16b180e2..efcc1e10575b 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -1,10 +1,16 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +import errno +import subprocess +import time from lib.py import ksft_run, ksft_exit, ksft_pr -from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx +from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises +from lib.py import KsftSkipEx, KsftXfailEx +from lib.py import ksft_disruptive from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv +from lib.py import cmd, ip, defer ethnl = EthtoolFamily() netfam = NetdevFamily() @@ -17,7 +23,7 @@ def check_pause(cfg) -> None: try: ethnl.pause_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: - if e.error == 95: + if e.error == errno.EOPNOTSUPP: raise KsftXfailEx("pause not supported by the device") raise @@ -32,7 +38,7 @@ def check_fec(cfg) -> None: try: ethnl.fec_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: - if e.error == 95: + if e.error == errno.EOPNOTSUPP: raise KsftXfailEx("FEC not supported by the device") raise @@ -69,7 +75,7 @@ def pkt_byte_sum(cfg) -> None: return 0 for _ in range(10): - rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats'] + rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] if stat_cmp(rtstat, qstat) < 0: raise Exception("RTNL stats are lower, fetched later") qstat = get_qstat(cfg) @@ -107,6 +113,23 @@ def qstat_by_ifindex(cfg) -> None: ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key) ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key) + # Sanity check the dumps + queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True) + # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}} + parsed = {} + for entry in queues: + ifindex = entry["ifindex"] + if ifindex not in parsed: + parsed[ifindex] = {"rx":[], "tx": []} + parsed[ifindex][entry["queue-type"]].append(entry['queue-id']) + # Now, validate + for ifindex, queues in parsed.items(): + for qtype in ['rx', 'tx']: + ksft_eq(len(queues[qtype]), len(set(queues[qtype])), + comment="repeated queue keys") + ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1, + comment="missing queue keys") + # Test invalid dumps # 0 is invalid with ksft_raises(NlError) as cm: @@ -117,7 +140,7 @@ def qstat_by_ifindex(cfg) -> None: # loopback has no stats with ksft_raises(NlError) as cm: netfam.qstats_get({"ifindex": 1}, dump=True) - ksft_eq(cm.exception.nl_msg.error, -95) + ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP) ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') # Try to get stats for lowest unused ifindex but not 0 @@ -133,9 +156,116 @@ def qstat_by_ifindex(cfg) -> None: ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') +@ksft_disruptive +def check_down(cfg) -> None: + try: + qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("qstats not supported by the device") + raise + + ip(f"link set dev {cfg.dev['ifname']} down") + defer(ip, f"link set dev {cfg.dev['ifname']} up") + + qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + for k, v in qstat.items(): + ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down") + + # exercise per-queue API to make sure that "device down" state + # is handled correctly and doesn't crash + netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True) + + +def __run_inf_loop(body): + body = body.strip() + if body[-1] != ';': + body += ';' + + return subprocess.Popen(f"while true; do {body} done", shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + +def __stats_increase_sanely(old, new) -> None: + for k in old.keys(): + ksft_ge(new[k], old[k]) + ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error") + + +def procfs_hammer(cfg) -> None: + """ + Reading stats via procfs only holds the RCU lock, which is not an exclusive + lock, make sure drivers can handle parallel reads of stats. + """ + one = __run_inf_loop("cat /proc/net/dev") + defer(one.kill) + two = __run_inf_loop("cat /proc/net/dev") + defer(two.kill) + + time.sleep(1) + # Make sure the processes are running + ksft_is(one.poll(), None) + ksft_is(two.poll(), None) + + rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + time.sleep(2) + rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + __stats_increase_sanely(rtstat1, rtstat2) + # defers will kill the loops + + +@ksft_disruptive +def procfs_downup_hammer(cfg) -> None: + """ + Reading stats via procfs only holds the RCU lock, drivers often try + to sleep when reading the stats, or don't protect against races. + """ + # Max out the queues, we'll flip between max and 1 + channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + rx_type = 'rx' + else: + rx_type = 'combined' + cur_queue_cnt = channels[f'{rx_type}-count'] + max_queue_cnt = channels[f'{rx_type}-max'] + + cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}") + defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}") + + # Real test stats + stats = __run_inf_loop("cat /proc/net/dev") + defer(stats.kill) + + ipset = f"ip link set dev {cfg.ifname}" + defer(ip, f"link set dev {cfg.ifname} up") + # The "echo -n 1" lets us count iterations below + updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \ + f"ethtool -L {cfg.ifname} {rx_type} 1; " + \ + f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \ + "echo -n 1" + updown = __run_inf_loop(updown) + kill_updown = defer(updown.kill) + + time.sleep(1) + # Make sure the processes are running + ksft_is(stats.poll(), None) + ksft_is(updown.poll(), None) + + rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + # We're looking for crashes, give it extra time + time.sleep(9) + rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + __stats_increase_sanely(rtstat1, rtstat2) + + kill_updown.exec() + stdout, _ = updown.communicate(timeout=5) + ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8'))) + + def main() -> None: - with NetDrvEnv(__file__) as cfg: - ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex], + with NetDrvEnv(__file__, queue_count=100) as cfg: + ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, + check_down, procfs_hammer, procfs_downup_hammer], args=(cfg, )) ksft_exit() |