summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/net
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/net')
-rw-r--r--tools/testing/selftests/net/.gitignore15
-rw-r--r--tools/testing/selftests/net/Makefile284
-rw-r--r--tools/testing/selftests/net/af_unix/.gitignore8
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile14
-rw-r--r--tools/testing/selftests/net/af_unix/config2
-rw-r--r--tools/testing/selftests/net/af_unix/diag_uid.c2
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c169
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c123
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c217
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c110
-rw-r--r--tools/testing/selftests/net/af_unix/so_peek_off.c162
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connect.c2
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connreset.c180
-rwxr-xr-xtools/testing/selftests/net/amt.sh20
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh2
-rwxr-xr-xtools/testing/selftests/net/bareudp.sh51
-rw-r--r--tools/testing/selftests/net/bench/Makefile7
-rw-r--r--tools/testing/selftests/net/bench/page_pool/Makefile17
-rw-r--r--tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c267
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.c394
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.h238
-rwxr-xr-xtools/testing/selftests/net/bench/test_bench_page_pool.sh32
-rw-r--r--tools/testing/selftests/net/bind_bhash.c4
-rw-r--r--tools/testing/selftests/net/bind_timewait.c2
-rw-r--r--tools/testing/selftests/net/bind_wildcard.c2
-rwxr-xr-xtools/testing/selftests/net/bpf_offload.py28
-rwxr-xr-xtools/testing/selftests/net/broadcast_ether_dst.sh83
-rwxr-xr-xtools/testing/selftests/net/broadcast_pmtu.sh47
-rwxr-xr-xtools/testing/selftests/net/busy_poll_test.sh187
-rw-r--r--tools/testing/selftests/net/busy_poller.c368
-rw-r--r--tools/testing/selftests/net/can/.gitignore2
-rw-r--r--tools/testing/selftests/net/can/Makefile11
-rw-r--r--tools/testing/selftests/net/can/config3
-rw-r--r--tools/testing/selftests/net/can/test_raw_filter.c405
-rwxr-xr-xtools/testing/selftests/net/can/test_raw_filter.sh45
-rwxr-xr-xtools/testing/selftests/net/cmsg_ip.sh187
-rwxr-xr-xtools/testing/selftests/net/cmsg_ipv6.sh154
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c135
-rwxr-xr-xtools/testing/selftests/net/cmsg_so_priority.sh151
-rwxr-xr-xtools/testing/selftests/net/cmsg_time.sh35
-rw-r--r--tools/testing/selftests/net/config131
-rwxr-xr-xtools/testing/selftests/net/drop_monitor_tests.sh2
-rw-r--r--tools/testing/selftests/net/epoll_busy_poll.c2
-rwxr-xr-xtools/testing/selftests/net/fcnal-ipv4.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-ipv6.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-other.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh448
-rwxr-xr-xtools/testing/selftests/net/fdb_flush.sh2
-rwxr-xr-xtools/testing/selftests/net/fdb_notify.sh96
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh116
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh442
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh219
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile60
-rw-r--r--tools/testing/selftests/net/forwarding/README17
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_activity_notify.sh170
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh18
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh387
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_igmp.sh80
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh102
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mld.sh81
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh148
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh25
-rw-r--r--tools/testing/selftests/net/forwarding/config29
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh8
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_flat.sh14
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_flat_key.sh14
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh14
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_hier.sh14
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_hier_key.sh14
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh14
-rw-r--r--tools/testing/selftests/net/forwarding/ip6gre_lib.sh80
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh372
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib_sh_test.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh438
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/no_forwarding.sh5
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh29
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_lag.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh40
-rw-r--r--tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh13
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh58
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multicast.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_ets.sh8
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_core.sh84
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh22
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh107
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_core.sh93
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_tbf_root.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh46
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh52
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_port_range.sh46
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_police.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_taprio.sh421
-rw-r--r--tools/testing/selftests/net/forwarding/tsn_lib.sh26
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh15
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh766
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_reserved.sh347
-rwxr-xr-xtools/testing/selftests/net/gre_ipv6_lladdr.sh184
-rw-r--r--tools/testing/selftests/net/gro.c1328
-rwxr-xr-xtools/testing/selftests/net/gro.sh104
-rw-r--r--tools/testing/selftests/net/hsr/Makefile6
-rw-r--r--tools/testing/selftests/net/hsr/config5
-rw-r--r--tools/testing/selftests/net/hsr/hsr_common.sh4
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_ping.sh98
-rw-r--r--tools/testing/selftests/net/hsr/settings1
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh2
-rw-r--r--tools/testing/selftests/net/io_uring_zerocopy_tx.c24
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh1832
-rw-r--r--tools/testing/selftests/net/ioam6_parser.c1087
-rw-r--r--tools/testing/selftests/net/ip_local_port_range.c2
-rwxr-xr-xtools/testing/selftests/net/ip_local_port_range.sh4
-rw-r--r--tools/testing/selftests/net/ipsec.c5
-rwxr-xr-xtools/testing/selftests/net/ipv6_force_forwarding.sh105
-rw-r--r--tools/testing/selftests/net/ipv6_fragmentation.c114
-rwxr-xr-xtools/testing/selftests/net/ipv6_route_update_soft_lockup.sh261
-rw-r--r--tools/testing/selftests/net/lib.sh454
-rw-r--r--tools/testing/selftests/net/lib/.gitignore1
-rw-r--r--tools/testing/selftests/net/lib/Makefile19
-rw-r--r--tools/testing/selftests/net/lib/csum.c16
-rw-r--r--tools/testing/selftests/net/lib/ksft.h56
-rwxr-xr-xtools/testing/selftests/net/lib/ksft_setup_loopback.sh111
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py35
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py210
-rw-r--r--tools/testing/selftests/net/lib/py/netns.py18
-rw-r--r--tools/testing/selftests/net/lib/py/nsim.py3
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py173
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py37
-rw-r--r--tools/testing/selftests/net/lib/sh/defer.sh131
-rw-r--r--tools/testing/selftests/net/lib/xdp_dummy.bpf.c (renamed from tools/testing/selftests/net/xdp_dummy.bpf.c)6
-rw-r--r--tools/testing/selftests/net/lib/xdp_helper.c131
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c680
-rwxr-xr-xtools/testing/selftests/net/link_netns.py141
-rwxr-xr-xtools/testing/selftests/net/lwt_dst_cache_ref_loop.sh246
-rw-r--r--tools/testing/selftests/net/mptcp/.gitignore1
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile31
-rw-r--r--tools/testing/selftests/net/mptcp/config44
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh62
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c119
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh189
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh5
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_diag.c435
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c28
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh1411
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh123
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c44
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh43
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh8
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c35
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh50
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh50
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c24
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh84
-rwxr-xr-xtools/testing/selftests/net/nat6to4.sh15
-rw-r--r--tools/testing/selftests/net/net_helper.sh25
-rwxr-xr-xtools/testing/selftests/net/netdev-l2addr.sh59
-rwxr-xr-xtools/testing/selftests/net/netdevice.sh60
-rw-r--r--tools/testing/selftests/net/netfilter/.gitignore2
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile81
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter.sh10
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter_queue.sh85
-rwxr-xr-xtools/testing/selftests/net/netfilter/bridge_brouter.sh2
-rw-r--r--tools/testing/selftests/net/netfilter/config50
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_clash.sh174
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_dump_flush.c15
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_dump_flush.sh3
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_resize.sh515
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c125
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh51
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_vrf.sh4
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh10
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_nat_edemux.sh58
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_audit.sh57
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh427
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh638
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh248
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_interface_stress.sh157
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat.sh85
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat_zones.sh2
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh250
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh358
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_tproxy_udp.sh262
-rwxr-xr-xtools/testing/selftests/net/netfilter/rpath.sh36
-rw-r--r--tools/testing/selftests/net/netfilter/sctp_collision.c3
-rw-r--r--tools/testing/selftests/net/netfilter/udpclash.c158
-rwxr-xr-xtools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh121
-rw-r--r--tools/testing/selftests/net/netlink-dumps.c263
-rwxr-xr-xtools/testing/selftests/net/netns-name.sh23
-rw-r--r--tools/testing/selftests/net/nettest.c12
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py160
-rw-r--r--tools/testing/selftests/net/openvswitch/Makefile2
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh105
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py2
-rw-r--r--tools/testing/selftests/net/ovpn/.gitignore2
-rw-r--r--tools/testing/selftests/net/ovpn/Makefile34
-rw-r--r--tools/testing/selftests/net/ovpn/common.sh108
-rw-r--r--tools/testing/selftests/net/ovpn/config10
-rw-r--r--tools/testing/selftests/net/ovpn/data64.key5
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c2387
-rw-r--r--tools/testing/selftests/net/ovpn/tcp_peers.txt5
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-chachapoly.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket.sh45
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-float.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-large-mtu.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test.sh117
-rw-r--r--tools/testing/selftests/net/ovpn/udp_peers.txt6
-rw-r--r--tools/testing/selftests/net/packetdrill/Makefile12
-rw-r--r--tools/testing/selftests/net/packetdrill/config11
-rwxr-xr-xtools/testing/selftests/net/packetdrill/defaults.sh64
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh62
-rwxr-xr-xtools/testing/selftests/net/packetdrill/set_sysctls.py38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt31
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt36
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt45
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt72
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt36
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt72
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt43
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt41
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt46
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt49
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt74
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt53
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt40
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt43
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt53
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt44
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt33
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt64
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt62
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt56
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt33
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt34
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt42
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt39
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt36
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt63
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt42
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt34
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt92
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt91
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt145
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt39
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt57
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt43
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt46
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt64
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt69
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt58
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt46
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt120
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt59
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh134
-rw-r--r--tools/testing/selftests/net/proc_net_pktgen.c690
-rw-r--r--tools/testing/selftests/net/psock_fanout.c86
-rw-r--r--tools/testing/selftests/net/psock_lib.h4
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c8
-rw-r--r--tools/testing/selftests/net/rds/.gitignore1
-rw-r--r--tools/testing/selftests/net/rds/Makefile18
-rw-r--r--tools/testing/selftests/net/rds/README.txt41
-rwxr-xr-xtools/testing/selftests/net/rds/config.sh53
-rwxr-xr-xtools/testing/selftests/net/rds/run.sh224
-rwxr-xr-xtools/testing/selftests/net/rds/test.py265
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c4
-rw-r--r--tools/testing/selftests/net/reuseport_addr_any.c36
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c2
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c2
-rwxr-xr-xtools/testing/selftests/net/route_hint.sh79
-rwxr-xr-xtools/testing/selftests/net/rps_default_mask.sh12
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.py30
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh306
-rwxr-xr-xtools/testing/selftests/net/rtnetlink_notification.sh112
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c20
-rw-r--r--tools/testing/selftests/net/sctp_hello.c17
-rwxr-xr-xtools/testing/selftests/net/sctp_vrf.sh73
-rw-r--r--tools/testing/selftests/net/setup_loopback.sh120
-rw-r--r--tools/testing/selftests/net/setup_veth.sh44
-rw-r--r--tools/testing/selftests/net/sk_so_peek_off.c202
-rw-r--r--tools/testing/selftests/net/skf_net_off.c244
-rwxr-xr-xtools/testing/selftests/net/skf_net_off.sh30
-rw-r--r--tools/testing/selftests/net/so_incoming_cpu.c2
-rw-r--r--tools/testing/selftests/net/so_rcv_listener.c168
-rw-r--r--tools/testing/selftests/net/so_txtime.c2
-rw-r--r--tools/testing/selftests/net/socket.c13
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh5
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh5
-rwxr-xr-xtools/testing/selftests/net/srv6_end_flavors_test.sh4
-rwxr-xr-xtools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh79
-rwxr-xr-xtools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh133
-rwxr-xr-xtools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh76
-rwxr-xr-xtools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh85
-rw-r--r--tools/testing/selftests/net/tap.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/Makefile5
-rw-r--r--tools/testing/selftests/net/tcp_ao/bench-lookups.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/config3
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect-deny.c83
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect.c28
-rw-r--r--tools/testing/selftests/net/tcp_ao/icmps-discard.c19
-rw-r--r--tools/testing/selftests/net/tcp_ao/key-management.c94
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/aolib.h295
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c556
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/ftrace.c543
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/kconfig.c31
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/setup.c19
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/sock.c316
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/utils.c26
-rw-r--r--tools/testing/selftests/net/tcp_ao/restore.c101
-rw-r--r--tools/testing/selftests/net/tcp_ao/rst.c49
-rw-r--r--tools/testing/selftests/net/tcp_ao/self-connect.c40
-rw-r--r--tools/testing/selftests/net/tcp_ao/seq-ext.c62
-rw-r--r--tools/testing/selftests/net/tcp_ao/setsockopt-closed.c190
-rw-r--r--tools/testing/selftests/net/tcp_ao/unsigned-md5.c153
-rw-r--r--tools/testing/selftests/net/tcp_fastopen_backup_key.c2
-rw-r--r--tools/testing/selftests/net/tcp_port_share.c258
-rwxr-xr-xtools/testing/selftests/net/test_blackhole_dev.sh11
-rwxr-xr-xtools/testing/selftests/net/test_bridge_backup_port.sh31
-rwxr-xr-xtools/testing/selftests/net/test_bridge_neigh_suppress.sh125
-rwxr-xr-xtools/testing/selftests/net/test_neigh.sh366
-rwxr-xr-xtools/testing/selftests/net/test_so_rcv.sh73
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_fdb_changelink.sh111
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_nh.sh223
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifiltering.sh9
-rw-r--r--tools/testing/selftests/net/tfo.c171
-rwxr-xr-xtools/testing/selftests/net/tfo_passive.sh112
-rw-r--r--tools/testing/selftests/net/tls.c1121
-rw-r--r--tools/testing/selftests/net/toeplitz.c589
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh199
-rwxr-xr-xtools/testing/selftests/net/toeplitz_client.sh28
-rwxr-xr-xtools/testing/selftests/net/traceroute.sh561
-rw-r--r--tools/testing/selftests/net/tun.c2
-rw-r--r--tools/testing/selftests/net/txtimestamp.c52
-rwxr-xr-xtools/testing/selftests/net/txtimestamp.sh12
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh57
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh4
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh4
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh4
-rw-r--r--tools/testing/selftests/net/udpgso.c51
-rwxr-xr-xtools/testing/selftests/net/udpgso_bench.sh3
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c2
-rwxr-xr-xtools/testing/selftests/net/unicast_extensions.sh9
-rwxr-xr-xtools/testing/selftests/net/veth.sh4
-rwxr-xr-xtools/testing/selftests/net/vlan_bridge_binding.sh258
-rwxr-xr-xtools/testing/selftests/net/vlan_hw_filter.sh98
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh98
-rwxr-xr-xtools/testing/selftests/net/xfrm_policy_add_speed.sh83
-rw-r--r--tools/testing/selftests/net/ynl.mk27
409 files changed, 36227 insertions, 6673 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 666ab7d9390b..6930fe926c58 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -2,11 +2,10 @@
bind_bhash
bind_timewait
bind_wildcard
+busy_poller
cmsg_sender
-diag_uid
epoll_busy_poll
fin_ack_lat
-gro
hwtstamp_config
io_uring_zerocopy_tx
ioam6_parser
@@ -15,9 +14,12 @@ ip_local_port_range
ipsec
ipv6_flowlabel
ipv6_flowlabel_mgr
+ipv6_fragmentation
log.txt
msg_zerocopy
+netlink-dumps
nettest
+proc_net_pktgen
psock_fanout
psock_snd
psock_tpacket
@@ -30,22 +32,24 @@ reuseport_bpf_numa
reuseport_dualstack
rxtimestamp
sctp_hello
-scm_pidfd
-scm_rights
sk_bind_sendto_listen
sk_connect_zero_addr
+sk_so_peek_off
+skf_net_off
socket
so_incoming_cpu
so_netns_cookie
so_txtime
+so_rcv_listener
stress_reuseport_listen
tap
tcp_fastopen_backup_key
tcp_inq
tcp_mmap
+tcp_port_share
+tfo
timestamping
tls
-toeplitz
tools
tun
txring_overwrite
@@ -53,4 +57,3 @@ txtimestamp
udpgso
udpgso_bench_rx
udpgso_bench_tx
-unix_connect
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index bc3925200637..b66ba04f19d9 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -1,102 +1,200 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for net selftests
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES)
# Additional include paths needed by kselftest.h
CFLAGS += -I../
-TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
- rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
-TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
-TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
-TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
-TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
-TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh
-TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh
-TEST_PROGS += route_localnet.sh
-TEST_PROGS += reuseaddr_ports_exhausted.sh
-TEST_PROGS += txtimestamp.sh
-TEST_PROGS += vrf-xfrm-tests.sh
-TEST_PROGS += rxtimestamp.sh
-TEST_PROGS += drop_monitor_tests.sh
-TEST_PROGS += vrf_route_leaking.sh
-TEST_PROGS += bareudp.sh
-TEST_PROGS += amt.sh
-TEST_PROGS += unicast_extensions.sh
-TEST_PROGS += udpgro_fwd.sh
-TEST_PROGS += udpgro_frglist.sh
-TEST_PROGS += veth.sh
-TEST_PROGS += ioam6.sh
-TEST_PROGS += gro.sh
-TEST_PROGS += gre_gso.sh
-TEST_PROGS += cmsg_so_mark.sh
-TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
-TEST_PROGS += netns-name.sh
-TEST_PROGS += nl_netdev.py
-TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
-TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
-TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
-TEST_PROGS += srv6_hencap_red_l3vpn_test.sh
-TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
-TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh
-TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh
-TEST_PROGS += srv6_end_flavors_test.sh
-TEST_PROGS += srv6_end_dx4_netfilter_test.sh
-TEST_PROGS += srv6_end_dx6_netfilter_test.sh
-TEST_PROGS += vrf_strict_mode_test.sh
-TEST_PROGS += arp_ndisc_evict_nocarrier.sh
-TEST_PROGS += ndisc_unsolicited_na_test.sh
-TEST_PROGS += arp_ndisc_untracked_subnets.sh
-TEST_PROGS += stress_reuseport_listen.sh
-TEST_PROGS += l2_tos_ttl_inherit.sh
-TEST_PROGS += bind_bhash.sh
-TEST_PROGS += ip_local_port_range.sh
-TEST_PROGS += rps_default_mask.sh
-TEST_PROGS += big_tcp.sh
-TEST_PROGS += netns-sysctl.sh
-TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh
-TEST_GEN_FILES = socket nettest
-TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
-TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
-TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
-TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie
-TEST_GEN_FILES += tcp_fastopen_backup_key
-TEST_GEN_FILES += fin_ack_lat
-TEST_GEN_FILES += reuseaddr_ports_exhausted
-TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
-TEST_GEN_FILES += ipsec
-TEST_GEN_FILES += ioam6_parser
-TEST_GEN_FILES += gro
-TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll
-TEST_GEN_FILES += toeplitz
-TEST_GEN_FILES += cmsg_sender
-TEST_GEN_FILES += stress_reuseport_listen
-TEST_PROGS += test_vxlan_vnifiltering.sh
-TEST_GEN_FILES += io_uring_zerocopy_tx
-TEST_PROGS += io_uring_zerocopy_tx.sh
-TEST_GEN_FILES += bind_bhash
-TEST_GEN_PROGS += sk_bind_sendto_listen
-TEST_GEN_PROGS += sk_connect_zero_addr
-TEST_PROGS += test_ingress_egress_chaining.sh
-TEST_GEN_PROGS += so_incoming_cpu
-TEST_PROGS += sctp_vrf.sh
-TEST_GEN_FILES += sctp_hello
-TEST_GEN_FILES += ip_local_port_range
-TEST_GEN_FILES += bind_wildcard
-TEST_PROGS += test_vxlan_mdb.sh
-TEST_PROGS += test_bridge_neigh_suppress.sh
-TEST_PROGS += test_vxlan_nolocalbypass.sh
-TEST_PROGS += test_bridge_backup_port.sh
-TEST_PROGS += fdb_flush.sh
-TEST_PROGS += fq_band_pktlimit.sh
-TEST_PROGS += vlan_hw_filter.sh
-TEST_PROGS += bpf_offload.py
+TEST_PROGS := \
+ altnames.sh \
+ amt.sh \
+ arp_ndisc_evict_nocarrier.sh \
+ arp_ndisc_untracked_subnets.sh \
+ bareudp.sh \
+ big_tcp.sh \
+ bind_bhash.sh \
+ bpf_offload.py \
+ broadcast_ether_dst.sh \
+ broadcast_pmtu.sh \
+ busy_poll_test.sh \
+ cmsg_ip.sh \
+ cmsg_so_mark.sh \
+ cmsg_so_priority.sh \
+ cmsg_time.sh \
+ drop_monitor_tests.sh \
+ fcnal-ipv4.sh \
+ fcnal-ipv6.sh \
+ fcnal-other.sh \
+ fdb_flush.sh \
+ fdb_notify.sh \
+ fib-onlink-tests.sh \
+ fib_nexthop_multiprefix.sh \
+ fib_nexthop_nongw.sh \
+ fib_nexthops.sh \
+ fib_rule_tests.sh \
+ fib_tests.sh \
+ fin_ack_lat.sh \
+ fq_band_pktlimit.sh \
+ gre_gso.sh \
+ gre_ipv6_lladdr.sh \
+ icmp.sh \
+ icmp_redirect.sh \
+ io_uring_zerocopy_tx.sh \
+ ioam6.sh \
+ ip6_gre_headroom.sh \
+ ip_defrag.sh \
+ ip_local_port_range.sh \
+ ipv6_flowlabel.sh \
+ ipv6_force_forwarding.sh \
+ ipv6_route_update_soft_lockup.sh \
+ l2_tos_ttl_inherit.sh \
+ l2tp.sh \
+ link_netns.py \
+ lwt_dst_cache_ref_loop.sh \
+ msg_zerocopy.sh \
+ nat6to4.sh \
+ ndisc_unsolicited_na_test.sh \
+ netdev-l2addr.sh \
+ netdevice.sh \
+ netns-name.sh \
+ netns-sysctl.sh \
+ nl_netdev.py \
+ pmtu.sh \
+ psock_snd.sh \
+ reuseaddr_ports_exhausted.sh \
+ reuseport_addr_any.sh \
+ route_hint.sh \
+ route_localnet.sh \
+ rps_default_mask.sh \
+ rtnetlink.py \
+ rtnetlink.sh \
+ rtnetlink_notification.sh \
+ run_afpackettests \
+ run_netsocktests \
+ rxtimestamp.sh \
+ sctp_vrf.sh \
+ skf_net_off.sh \
+ so_txtime.sh \
+ srv6_end_dt46_l3vpn_test.sh \
+ srv6_end_dt4_l3vpn_test.sh \
+ srv6_end_dt6_l3vpn_test.sh \
+ srv6_end_dx4_netfilter_test.sh \
+ srv6_end_dx6_netfilter_test.sh \
+ srv6_end_flavors_test.sh \
+ srv6_end_next_csid_l3vpn_test.sh \
+ srv6_end_x_next_csid_l3vpn_test.sh \
+ srv6_hencap_red_l3vpn_test.sh \
+ srv6_hl2encap_red_l2vpn_test.sh \
+ stress_reuseport_listen.sh \
+ tcp_fastopen_backup_key.sh \
+ test_bpf.sh \
+ test_bridge_backup_port.sh \
+ test_bridge_neigh_suppress.sh \
+ test_ingress_egress_chaining.sh \
+ test_neigh.sh \
+ test_so_rcv.sh \
+ test_vxlan_fdb_changelink.sh \
+ test_vxlan_mdb.sh \
+ test_vxlan_nh.sh \
+ test_vxlan_nolocalbypass.sh \
+ test_vxlan_under_vrf.sh \
+ test_vxlan_vnifiltering.sh \
+ tfo_passive.sh \
+ traceroute.sh \
+ txtimestamp.sh \
+ udpgro.sh \
+ udpgro_bench.sh \
+ udpgro_frglist.sh \
+ udpgro_fwd.sh \
+ udpgso.sh \
+ udpgso_bench.sh \
+ unicast_extensions.sh \
+ veth.sh \
+ vlan_bridge_binding.sh \
+ vlan_hw_filter.sh \
+ vrf-xfrm-tests.sh \
+ vrf_route_leaking.sh \
+ vrf_strict_mode_test.sh \
+ xfrm_policy.sh \
+# end of TEST_PROGS
-TEST_FILES := settings
-TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+TEST_PROGS_EXTENDED := \
+ xfrm_policy_add_speed.sh \
+# end of TEST_PROGS_EXTENDED
+
+TEST_GEN_FILES := \
+ bind_bhash \
+ cmsg_sender \
+ fin_ack_lat \
+ hwtstamp_config \
+ io_uring_zerocopy_tx \
+ ioam6_parser \
+ ip_defrag \
+ ip_local_port_range \
+ ipsec \
+ ipv6_flowlabel \
+ ipv6_flowlabel_mgr \
+ msg_zerocopy \
+ nettest \
+ psock_fanout \
+ psock_snd \
+ psock_tpacket \
+ reuseaddr_ports_exhausted \
+ reuseport_addr_any \
+ rxtimestamp \
+ sctp_hello \
+ skf_net_off \
+ so_netns_cookie \
+ so_rcv_listener \
+ so_txtime \
+ socket \
+ stress_reuseport_listen \
+ tcp_fastopen_backup_key \
+ tcp_inq \
+ tcp_mmap \
+ tfo \
+ timestamping \
+ txring_overwrite \
+ txtimestamp \
+ udpgso \
+ udpgso_bench_rx \
+ udpgso_bench_tx \
+# end of TEST_GEN_FILES
+
+TEST_GEN_PROGS := \
+ bind_timewait \
+ bind_wildcard \
+ epoll_busy_poll \
+ ipv6_fragmentation \
+ proc_net_pktgen \
+ reuseaddr_conflict \
+ reuseport_bpf \
+ reuseport_bpf_cpu \
+ reuseport_bpf_numa \
+ reuseport_dualstack \
+ sk_bind_sendto_listen \
+ sk_connect_zero_addr \
+ sk_so_peek_off \
+ so_incoming_cpu \
+ tap \
+ tcp_port_share \
+ tls \
+ tun \
+# end of TEST_GEN_PROGS
+
+TEST_FILES := \
+ fcnal-test.sh \
+ in_netns.sh \
+ lib.sh \
+ settings \
+# end of TEST_FILES
+
+# YNL files, must be before "include ..lib.mk"
+YNL_GEN_FILES := busy_poller
+YNL_GEN_PROGS := netlink-dumps
+TEST_GEN_FILES += $(YNL_GEN_FILES)
+TEST_GEN_PROGS += $(YNL_GEN_PROGS)
TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
@@ -104,6 +202,10 @@ TEST_INCLUDES := forwarding/lib.sh
include ../lib.mk
+# YNL build
+YNL_GENS := netdev
+include ynl.mk
+
$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto
diff --git a/tools/testing/selftests/net/af_unix/.gitignore b/tools/testing/selftests/net/af_unix/.gitignore
new file mode 100644
index 000000000000..240b26740c9e
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/.gitignore
@@ -0,0 +1,8 @@
+diag_uid
+msg_oob
+scm_inq
+scm_pidfd
+scm_rights
+so_peek_off
+unix_connect
+unix_connreset
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 50584479540b..3cd677b72072 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,14 @@
-CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect
+CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end
+
+TEST_GEN_PROGS := \
+ diag_uid \
+ msg_oob \
+ scm_inq \
+ scm_pidfd \
+ scm_rights \
+ so_peek_off \
+ unix_connect \
+ unix_connreset \
+# end of TEST_GEN_PROGS
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config
index 37368567768c..b5429c15a53c 100644
--- a/tools/testing/selftests/net/af_unix/config
+++ b/tools/testing/selftests/net/af_unix/config
@@ -1,3 +1,3 @@
-CONFIG_UNIX=y
CONFIG_AF_UNIX_OOB=y
+CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c
index 79a3dd75590e..da7d50cedee6 100644
--- a/tools/testing/selftests/net/af_unix/diag_uid.c
+++ b/tools/testing/selftests/net/af_unix/diag_uid.c
@@ -14,7 +14,7 @@
#include <sys/types.h>
#include <sys/un.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(diag_uid)
{
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
index 16d0c172eaeb..1b499d56656c 100644
--- a/tools/testing/selftests/net/af_unix/msg_oob.c
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -11,7 +11,7 @@
#include <sys/signalfd.h>
#include <sys/socket.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define BUF_SZ 32
@@ -209,8 +209,8 @@ static void __sendpair(struct __test_metadata *_metadata,
static void __recvpair(struct __test_metadata *_metadata,
FIXTURE_DATA(msg_oob) *self,
- const void *expected_buf, int expected_len,
- int buf_len, int flags)
+ const char *expected_buf, int expected_len,
+ int buf_len, int flags, bool is_sender)
{
int i, ret[2], recv_errno[2], expected_errno = 0;
char recv_buf[2][BUF_SZ] = {};
@@ -221,7 +221,9 @@ static void __recvpair(struct __test_metadata *_metadata,
errno = 0;
for (i = 0; i < 2; i++) {
- ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags);
+ int index = is_sender ? i * 2 : i * 2 + 1;
+
+ ret[i] = recv(self->fd[index], recv_buf[i], buf_len, flags);
recv_errno[i] = errno;
}
@@ -308,6 +310,20 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
ASSERT_EQ(answ[0], answ[1]);
}
+static void __resetpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const FIXTURE_VARIANT(msg_oob) *variant,
+ bool reset)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ close(self->fd[i * 2 + 1]);
+
+ __recvpair(_metadata, self, "", reset ? -ECONNRESET : 0, 1,
+ variant->peek ? MSG_PEEK : 0, true);
+}
+
#define sendpair(buf, len, flags) \
__sendpair(_metadata, self, buf, len, flags)
@@ -316,9 +332,10 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
if (variant->peek) \
__recvpair(_metadata, self, \
expected_buf, expected_len, \
- buf_len, (flags) | MSG_PEEK); \
+ buf_len, (flags) | MSG_PEEK, false); \
__recvpair(_metadata, self, \
- expected_buf, expected_len, buf_len, flags); \
+ expected_buf, expected_len, \
+ buf_len, flags, false); \
} while (0)
#define epollpair(oob_remaining) \
@@ -330,6 +347,9 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
#define setinlinepair() \
__setinlinepair(_metadata, self)
+#define resetpair(reset) \
+ __resetpair(_metadata, self, variant, reset)
+
#define tcp_incompliant \
for (self->tcp_compliant = false; \
self->tcp_compliant == false; \
@@ -344,6 +364,21 @@ TEST_F(msg_oob, non_oob)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(true);
+}
+
+TEST_F(msg_oob, non_oob_no_reset)
+{
+ sendpair("x", 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob)
@@ -355,6 +390,19 @@ TEST_F(msg_oob, oob)
recvpair("x", 1, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
+}
+
+TEST_F(msg_oob, oob_reset)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ resetpair(true);
}
TEST_F(msg_oob, oob_drop)
@@ -370,6 +418,8 @@ TEST_F(msg_oob, oob_drop)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob_ahead)
@@ -385,6 +435,10 @@ TEST_F(msg_oob, oob_ahead)
recvpair("hell", 4, 4, 0);
epollpair(false);
siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
}
TEST_F(msg_oob, oob_break)
@@ -403,6 +457,8 @@ TEST_F(msg_oob, oob_break)
recvpair("", -EAGAIN, 1, 0);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob_ahead_break)
@@ -426,6 +482,8 @@ TEST_F(msg_oob, oob_ahead_break)
recvpair("world", 5, 5, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob_break_drop)
@@ -449,6 +507,8 @@ TEST_F(msg_oob, oob_break_drop)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_break)
@@ -476,6 +536,8 @@ TEST_F(msg_oob, ex_oob_break)
recvpair("ld", 2, 2, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_drop)
@@ -498,6 +560,8 @@ TEST_F(msg_oob, ex_oob_drop)
epollpair(false);
siocatmarkpair(true);
}
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_drop_2)
@@ -523,6 +587,79 @@ TEST_F(msg_oob, ex_oob_drop_2)
epollpair(false);
siocatmarkpair(true);
}
+
+ resetpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ resetpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_ex_oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
+}
+
+TEST_F(msg_oob, ex_oob_ex_oob_oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("z", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
}
TEST_F(msg_oob, ex_oob_ahead_break)
@@ -553,6 +690,10 @@ TEST_F(msg_oob, ex_oob_ahead_break)
recvpair("d", 1, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
}
TEST_F(msg_oob, ex_oob_siocatmark)
@@ -572,6 +713,8 @@ TEST_F(msg_oob, ex_oob_siocatmark)
recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
epollpair(true);
siocatmarkpair(false);
+
+ resetpair(true);
}
TEST_F(msg_oob, inline_oob)
@@ -589,6 +732,8 @@ TEST_F(msg_oob, inline_oob)
recvpair("x", 1, 1, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_oob_break)
@@ -610,6 +755,8 @@ TEST_F(msg_oob, inline_oob_break)
recvpair("o", 1, 1, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_oob_ahead_break)
@@ -638,6 +785,8 @@ TEST_F(msg_oob, inline_oob_ahead_break)
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_break)
@@ -663,6 +812,8 @@ TEST_F(msg_oob, inline_ex_oob_break)
recvpair("rld", 3, 3, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_no_drop)
@@ -684,6 +835,8 @@ TEST_F(msg_oob, inline_ex_oob_no_drop)
recvpair("y", 1, 1, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_drop)
@@ -708,6 +861,8 @@ TEST_F(msg_oob, inline_ex_oob_drop)
epollpair(false);
siocatmarkpair(false);
}
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_siocatmark)
@@ -729,6 +884,8 @@ TEST_F(msg_oob, inline_ex_oob_siocatmark)
recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
epollpair(true);
siocatmarkpair(false);
+
+ resetpair(true);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c
new file mode 100644
index 000000000000..3a86be9bda17
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_inq.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "kselftest_harness.h"
+
+#define NR_CHUNKS 100
+#define MSG_LEN 256
+
+FIXTURE(scm_inq)
+{
+ int fd[2];
+};
+
+FIXTURE_VARIANT(scm_inq)
+{
+ int type;
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, stream)
+{
+ .type = SOCK_STREAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, dgram)
+{
+ .type = SOCK_DGRAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, seqpacket)
+{
+ .type = SOCK_SEQPACKET,
+};
+
+FIXTURE_SETUP(scm_inq)
+{
+ int err;
+
+ err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd);
+ ASSERT_EQ(0, err);
+}
+
+FIXTURE_TEARDOWN(scm_inq)
+{
+ close(self->fd[0]);
+ close(self->fd[1]);
+}
+
+static void send_chunks(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_inq) *self)
+{
+ char buf[MSG_LEN] = {};
+ int i, ret;
+
+ for (i = 0; i < NR_CHUNKS; i++) {
+ ret = send(self->fd[0], buf, sizeof(buf), 0);
+ ASSERT_EQ(sizeof(buf), ret);
+ }
+}
+
+static void recv_chunks(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_inq) *self)
+{
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+ struct msghdr msg = {};
+ struct iovec iov = {};
+ struct cmsghdr *cmsg;
+ char buf[MSG_LEN];
+ int i, ret;
+ int inq;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+
+ for (i = 0; i < NR_CHUNKS; i++) {
+ memset(buf, 0, sizeof(buf));
+ memset(cmsg_buf, 0, sizeof(cmsg_buf));
+
+ ret = recvmsg(self->fd[1], &msg, 0);
+ ASSERT_EQ(MSG_LEN, ret);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(NULL, cmsg);
+ ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len);
+ ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level);
+ ASSERT_EQ(SCM_INQ, cmsg->cmsg_type);
+
+ ret = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, ret);
+ ASSERT_EQ(*(int *)CMSG_DATA(cmsg), inq);
+ }
+}
+
+TEST_F(scm_inq, basic)
+{
+ int err, inq;
+
+ err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int));
+ if (variant->type != SOCK_STREAM) {
+ ASSERT_EQ(-ENOPROTOOPT, -errno);
+ return;
+ }
+
+ ASSERT_EQ(0, err);
+
+ err = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, err);
+ ASSERT_EQ(0, inq);
+
+ send_chunks(_metadata, self);
+ recv_chunks(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c
index 7e534594167e..2c18b92a2603 100644
--- a/tools/testing/selftests/net/af_unix/scm_pidfd.c
+++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c
@@ -15,7 +15,8 @@
#include <sys/types.h>
#include <sys/wait.h>
-#include "../../kselftest_harness.h"
+#include "../../pidfd/pidfd.h"
+#include "kselftest_harness.h"
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
#define log_err(MSG, ...) \
@@ -26,6 +27,8 @@
#define SCM_PIDFD 0x04
#endif
+#define CHILD_EXIT_CODE_OK 123
+
static void child_die()
{
exit(1);
@@ -126,16 +129,64 @@ out:
return result;
}
+struct cmsg_data {
+ struct ucred *ucred;
+ int *pidfd;
+};
+
+static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res)
+{
+ struct cmsghdr *cmsg;
+
+ if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ log_err("recvmsg: truncated");
+ return 1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_PIDFD) {
+ if (cmsg->cmsg_len < sizeof(*res->pidfd)) {
+ log_err("CMSG parse: SCM_PIDFD wrong len");
+ return 1;
+ }
+
+ res->pidfd = (void *)CMSG_DATA(cmsg);
+ }
+
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS) {
+ if (cmsg->cmsg_len < sizeof(*res->ucred)) {
+ log_err("CMSG parse: SCM_CREDENTIALS wrong len");
+ return 1;
+ }
+
+ res->ucred = (void *)CMSG_DATA(cmsg);
+ }
+ }
+
+ if (!res->pidfd) {
+ log_err("CMSG parse: SCM_PIDFD not found");
+ return 1;
+ }
+
+ if (!res->ucred) {
+ log_err("CMSG parse: SCM_CREDENTIALS not found");
+ return 1;
+ }
+
+ return 0;
+}
+
static int cmsg_check(int fd)
{
struct msghdr msg = { 0 };
- struct cmsghdr *cmsg;
+ struct cmsg_data res;
struct iovec iov;
- struct ucred *ucred = NULL;
int data = 0;
char control[CMSG_SPACE(sizeof(struct ucred)) +
CMSG_SPACE(sizeof(int))] = { 0 };
- int *pidfd = NULL;
pid_t parent_pid;
int err;
@@ -158,53 +209,98 @@ static int cmsg_check(int fd)
return 1;
}
- for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
- cmsg = CMSG_NXTHDR(&msg, cmsg)) {
- if (cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SCM_PIDFD) {
- if (cmsg->cmsg_len < sizeof(*pidfd)) {
- log_err("CMSG parse: SCM_PIDFD wrong len");
- return 1;
- }
+ /* send(pfd, "x", sizeof(char), 0) */
+ if (data != 'x') {
+ log_err("recvmsg: data corruption");
+ return 1;
+ }
- pidfd = (void *)CMSG_DATA(cmsg);
- }
+ if (parse_cmsg(&msg, &res)) {
+ log_err("CMSG parse: parse_cmsg() failed");
+ return 1;
+ }
- if (cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SCM_CREDENTIALS) {
- if (cmsg->cmsg_len < sizeof(*ucred)) {
- log_err("CMSG parse: SCM_CREDENTIALS wrong len");
- return 1;
- }
+ /* pidfd from SCM_PIDFD should point to the parent process PID */
+ parent_pid =
+ get_pid_from_fdinfo_file(*res.pidfd, "Pid:", sizeof("Pid:") - 1);
+ if (parent_pid != getppid()) {
+ log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+ close(*res.pidfd);
+ return 1;
+ }
- ucred = (void *)CMSG_DATA(cmsg);
- }
+ close(*res.pidfd);
+ return 0;
+}
+
+static int cmsg_check_dead(int fd, int expected_pid)
+{
+ int err;
+ struct msghdr msg = { 0 };
+ struct cmsg_data res;
+ struct iovec iov;
+ int data = 0;
+ char control[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int))] = { 0 };
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_EXIT,
+ };
+
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ err = recvmsg(fd, &msg, 0);
+ if (err < 0) {
+ log_err("recvmsg");
+ return 1;
}
- /* send(pfd, "x", sizeof(char), 0) */
- if (data != 'x') {
+ if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ log_err("recvmsg: truncated");
+ return 1;
+ }
+
+ /* send(cfd, "y", sizeof(char), 0) */
+ if (data != 'y') {
log_err("recvmsg: data corruption");
return 1;
}
- if (!pidfd) {
- log_err("CMSG parse: SCM_PIDFD not found");
+ if (parse_cmsg(&msg, &res)) {
+ log_err("CMSG parse: parse_cmsg() failed");
return 1;
}
- if (!ucred) {
- log_err("CMSG parse: SCM_CREDENTIALS not found");
+ /*
+ * pidfd from SCM_PIDFD should point to the client_pid.
+ * Let's read exit information and check if it's what
+ * we expect to see.
+ */
+ if (ioctl(*res.pidfd, PIDFD_GET_INFO, &info)) {
+ log_err("%s: ioctl(PIDFD_GET_INFO) failed", __func__);
+ close(*res.pidfd);
return 1;
}
- /* pidfd from SCM_PIDFD should point to the parent process PID */
- parent_pid =
- get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1);
- if (parent_pid != getppid()) {
- log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+ if (!(info.mask & PIDFD_INFO_EXIT)) {
+ log_err("%s: No exit information from ioctl(PIDFD_GET_INFO)", __func__);
+ close(*res.pidfd);
+ return 1;
+ }
+
+ err = WIFEXITED(info.exit_code) ? WEXITSTATUS(info.exit_code) : 1;
+ if (err != CHILD_EXIT_CODE_OK) {
+ log_err("%s: wrong exit_code %d != %d", __func__, err, CHILD_EXIT_CODE_OK);
+ close(*res.pidfd);
return 1;
}
+ close(*res.pidfd);
return 0;
}
@@ -291,6 +387,24 @@ static void fill_sockaddr(struct sock_addr *addr, bool abstract)
memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name));
}
+static int sk_enable_cred_pass(int sk)
+{
+ int on = 0;
+
+ on = 1;
+ if (setsockopt(sk, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSCRED");
+ return 1;
+ }
+
+ if (setsockopt(sk, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSPIDFD");
+ return 1;
+ }
+
+ return 0;
+}
+
static void client(FIXTURE_DATA(scm_pidfd) *self,
const FIXTURE_VARIANT(scm_pidfd) *variant)
{
@@ -299,7 +413,6 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
struct ucred peer_cred;
int peer_pidfd;
pid_t peer_pid;
- int on = 0;
cfd = socket(AF_UNIX, variant->type, 0);
if (cfd < 0) {
@@ -322,14 +435,8 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
child_die();
}
- on = 1;
- if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
- log_err("Failed to set SO_PASSCRED");
- child_die();
- }
-
- if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
- log_err("Failed to set SO_PASSPIDFD");
+ if (sk_enable_cred_pass(cfd)) {
+ log_err("sk_enable_cred_pass() failed");
child_die();
}
@@ -340,6 +447,12 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
child_die();
}
+ /* send something to the parent so it can receive SCM_PIDFD too and validate it */
+ if (send(cfd, "y", sizeof(char), 0) == -1) {
+ log_err("Failed to send(cfd, \"y\", sizeof(char), 0)");
+ child_die();
+ }
+
/* skip further for SOCK_DGRAM as it's not applicable */
if (variant->type == SOCK_DGRAM)
return;
@@ -398,7 +511,13 @@ TEST_F(scm_pidfd, test)
close(self->server);
close(self->startup_pipe[0]);
client(self, variant);
- exit(0);
+
+ /*
+ * It's a bit unusual, but in case of success we return non-zero
+ * exit code (CHILD_EXIT_CODE_OK) and then we expect to read it
+ * from ioctl(PIDFD_GET_INFO) in cmsg_check_dead().
+ */
+ exit(CHILD_EXIT_CODE_OK);
}
close(self->startup_pipe[1]);
@@ -421,9 +540,17 @@ TEST_F(scm_pidfd, test)
ASSERT_NE(-1, err);
}
- close(pfd);
waitpid(self->client_pid, &child_status, 0);
- ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+ /* see comment before exit(CHILD_EXIT_CODE_OK) */
+ ASSERT_EQ(CHILD_EXIT_CODE_OK, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+
+ err = sk_enable_cred_pass(pfd);
+ ASSERT_EQ(0, err);
+
+ err = cmsg_check_dead(pfd, self->client_pid);
+ ASSERT_EQ(0, err);
+
+ close(pfd);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
index d66336256580..d82a79c21c17 100644
--- a/tools/testing/selftests/net/af_unix/scm_rights.c
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -10,7 +10,7 @@
#include <sys/socket.h>
#include <sys/un.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(scm_rights)
{
@@ -23,6 +23,7 @@ FIXTURE_VARIANT(scm_rights)
int type;
int flags;
bool test_listener;
+ bool disabled;
};
FIXTURE_VARIANT_ADD(scm_rights, dgram)
@@ -31,6 +32,16 @@ FIXTURE_VARIANT_ADD(scm_rights, dgram)
.type = SOCK_DGRAM,
.flags = 0,
.test_listener = false,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, dgram_disabled)
+{
+ .name = "UNIX ",
+ .type = SOCK_DGRAM,
+ .flags = 0,
+ .test_listener = false,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream)
@@ -39,6 +50,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream)
.type = SOCK_STREAM,
.flags = 0,
.test_listener = false,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = false,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
@@ -47,6 +68,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
.type = SOCK_STREAM,
.flags = MSG_OOB,
.test_listener = false,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_oob_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = false,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
@@ -55,6 +86,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
.type = SOCK_STREAM,
.flags = 0,
.test_listener = true,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = true,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
@@ -63,6 +104,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
.type = SOCK_STREAM,
.flags = MSG_OOB,
.test_listener = true,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = true,
+ .disabled = true,
};
static int count_sockets(struct __test_metadata *_metadata,
@@ -105,6 +156,9 @@ FIXTURE_SETUP(scm_rights)
ret = unshare(CLONE_NEWNET);
ASSERT_EQ(0, ret);
+ if (variant->disabled)
+ return;
+
ret = count_sockets(_metadata, variant);
ASSERT_EQ(0, ret);
}
@@ -113,6 +167,9 @@ FIXTURE_TEARDOWN(scm_rights)
{
int ret;
+ if (variant->disabled)
+ return;
+
sleep(1);
ret = count_sockets(_metadata, variant);
@@ -121,6 +178,7 @@ FIXTURE_TEARDOWN(scm_rights)
static void create_listeners(struct __test_metadata *_metadata,
FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
int n)
{
struct sockaddr_un addr = {
@@ -140,6 +198,12 @@ static void create_listeners(struct __test_metadata *_metadata,
ret = listen(self->fd[i], -1);
ASSERT_EQ(0, ret);
+ if (variant->disabled) {
+ ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS,
+ &(int){0}, sizeof(int));
+ ASSERT_EQ(0, ret);
+ }
+
addrlen = sizeof(addr);
ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen);
ASSERT_EQ(0, ret);
@@ -164,6 +228,12 @@ static void create_socketpairs(struct __test_metadata *_metadata,
for (i = 0; i < n * 2; i += 2) {
ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i);
ASSERT_EQ(0, ret);
+
+ if (variant->disabled) {
+ ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS,
+ &(int){0}, sizeof(int));
+ ASSERT_EQ(0, ret);
+ }
}
}
@@ -175,7 +245,7 @@ static void __create_sockets(struct __test_metadata *_metadata,
ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0]));
if (variant->test_listener)
- create_listeners(_metadata, self, n);
+ create_listeners(_metadata, self, variant, n);
else
create_socketpairs(_metadata, self, variant, n);
}
@@ -201,20 +271,11 @@ void __send_fd(struct __test_metadata *_metadata,
{
#define MSG "x"
#define MSGLEN 1
- struct {
- struct cmsghdr cmsghdr;
- int fd[2];
- } cmsg = {
- .cmsghdr = {
- .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)),
- .cmsg_level = SOL_SOCKET,
- .cmsg_type = SCM_RIGHTS,
- },
- .fd = {
- self->fd[inflight * 2],
- self->fd[inflight * 2],
- },
+ int fds[2] = {
+ self->fd[inflight * 2],
+ self->fd[inflight * 2],
};
+ char cmsg_buf[CMSG_SPACE(sizeof(fds))];
struct iovec iov = {
.iov_base = MSG,
.iov_len = MSGLEN,
@@ -224,13 +285,26 @@ void __send_fd(struct __test_metadata *_metadata,
.msg_namelen = 0,
.msg_iov = &iov,
.msg_iovlen = 1,
- .msg_control = &cmsg,
- .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)),
+ .msg_control = cmsg_buf,
+ .msg_controllen = sizeof(cmsg_buf),
};
+ struct cmsghdr *cmsg;
int ret;
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
+
ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags);
- ASSERT_EQ(MSGLEN, ret);
+
+ if (variant->disabled) {
+ ASSERT_EQ(-1, ret);
+ ASSERT_EQ(-EPERM, -errno);
+ } else {
+ ASSERT_EQ(MSGLEN, ret);
+ }
}
#define create_sockets(n) \
diff --git a/tools/testing/selftests/net/af_unix/so_peek_off.c b/tools/testing/selftests/net/af_unix/so_peek_off.c
new file mode 100644
index 000000000000..86e7b0fb522d
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/so_peek_off.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(so_peek_off)
+{
+ int fd[2]; /* 0: sender, 1: receiver */
+};
+
+FIXTURE_VARIANT(so_peek_off)
+{
+ int type;
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, stream)
+{
+ .type = SOCK_STREAM,
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, dgram)
+{
+ .type = SOCK_DGRAM,
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, seqpacket)
+{
+ .type = SOCK_SEQPACKET,
+};
+
+FIXTURE_SETUP(so_peek_off)
+{
+ struct timeval timeout = {
+ .tv_sec = 5,
+ .tv_usec = 0,
+ };
+ int ret;
+
+ ret = socketpair(AF_UNIX, variant->type, 0, self->fd);
+ ASSERT_EQ(0, ret);
+
+ ret = setsockopt(self->fd[1], SOL_SOCKET, SO_RCVTIMEO_NEW,
+ &timeout, sizeof(timeout));
+ ASSERT_EQ(0, ret);
+
+ ret = setsockopt(self->fd[1], SOL_SOCKET, SO_PEEK_OFF,
+ &(int){0}, sizeof(int));
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(so_peek_off)
+{
+ close_range(self->fd[0], self->fd[1], 0);
+}
+
+#define sendeq(fd, str, flags) \
+ do { \
+ int bytes, len = strlen(str); \
+ \
+ bytes = send(fd, str, len, flags); \
+ ASSERT_EQ(len, bytes); \
+ } while (0)
+
+#define recveq(fd, str, buflen, flags) \
+ do { \
+ char buf[(buflen) + 1] = {}; \
+ int bytes; \
+ \
+ bytes = recv(fd, buf, buflen, flags); \
+ ASSERT_NE(-1, bytes); \
+ ASSERT_STREQ(str, buf); \
+ } while (0)
+
+#define async \
+ for (pid_t pid = (pid = fork(), \
+ pid < 0 ? \
+ __TH_LOG("Failed to start async {}"), \
+ _metadata->exit_code = KSFT_FAIL, \
+ __bail(1, _metadata), \
+ 0xdead : \
+ pid); \
+ !pid; exit(0))
+
+TEST_F(so_peek_off, single_chunk)
+{
+ sendeq(self->fd[0], "aaaabbbb", 0);
+
+ recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_F(so_peek_off, two_chunks)
+{
+ sendeq(self->fd[0], "aaaa", 0);
+ sendeq(self->fd[0], "bbbb", 0);
+
+ recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_F(so_peek_off, two_chunks_blocking)
+{
+ async {
+ usleep(1000);
+ sendeq(self->fd[0], "aaaa", 0);
+ }
+
+ recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+
+ async {
+ usleep(1000);
+ sendeq(self->fd[0], "bbbb", 0);
+ }
+
+ /* goto again; -> goto redo; in unix_stream_read_generic(). */
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_F(so_peek_off, two_chunks_overlap)
+{
+ sendeq(self->fd[0], "aaaa", 0);
+ recveq(self->fd[1], "aa", 2, MSG_PEEK);
+
+ sendeq(self->fd[0], "bbbb", 0);
+
+ if (variant->type == SOCK_STREAM) {
+ /* SOCK_STREAM tries to fill the buffer. */
+ recveq(self->fd[1], "aabb", 4, MSG_PEEK);
+ recveq(self->fd[1], "bb", 100, MSG_PEEK);
+ } else {
+ /* SOCK_DGRAM and SOCK_SEQPACKET returns at the skb boundary. */
+ recveq(self->fd[1], "aa", 100, MSG_PEEK);
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+ }
+}
+
+TEST_F(so_peek_off, two_chunks_overlap_blocking)
+{
+ async {
+ usleep(1000);
+ sendeq(self->fd[0], "aaaa", 0);
+ }
+
+ recveq(self->fd[1], "aa", 2, MSG_PEEK);
+
+ async {
+ usleep(1000);
+ sendeq(self->fd[0], "bbbb", 0);
+ }
+
+ /* Even SOCK_STREAM does not wait if at least one byte is read. */
+ recveq(self->fd[1], "aa", 100, MSG_PEEK);
+
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c
index d799fd8f5c7c..870ca96fa8ea 100644
--- a/tools/testing/selftests/net/af_unix/unix_connect.c
+++ b/tools/testing/selftests/net/af_unix/unix_connect.c
@@ -10,7 +10,7 @@
#include <sys/socket.h>
#include <sys/un.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(unix_connect)
{
diff --git a/tools/testing/selftests/net/af_unix/unix_connreset.c b/tools/testing/selftests/net/af_unix/unix_connreset.c
new file mode 100644
index 000000000000..08c1de8f5a98
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/unix_connreset.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Selftest for AF_UNIX socket close and ECONNRESET behaviour.
+ *
+ * This test verifies:
+ * 1. SOCK_STREAM returns EOF when the peer closes normally.
+ * 2. SOCK_STREAM returns ECONNRESET if peer closes with unread data.
+ * 3. SOCK_SEQPACKET returns EOF when the peer closes normally.
+ * 4. SOCK_SEQPACKET returns ECONNRESET if the peer closes with unread data.
+ * 5. SOCK_DGRAM does not return ECONNRESET when the peer closes.
+ *
+ * These tests document the intended Linux behaviour.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include "../../kselftest_harness.h"
+
+#define SOCK_PATH "/tmp/af_unix_connreset.sock"
+
+static void remove_socket_file(void)
+{
+ unlink(SOCK_PATH);
+}
+
+FIXTURE(unix_sock)
+{
+ int server;
+ int client;
+ int child;
+};
+
+FIXTURE_VARIANT(unix_sock)
+{
+ int socket_type;
+ const char *name;
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, stream) {
+ .socket_type = SOCK_STREAM,
+ .name = "SOCK_STREAM",
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, dgram) {
+ .socket_type = SOCK_DGRAM,
+ .name = "SOCK_DGRAM",
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, seqpacket) {
+ .socket_type = SOCK_SEQPACKET,
+ .name = "SOCK_SEQPACKET",
+};
+
+FIXTURE_SETUP(unix_sock)
+{
+ struct sockaddr_un addr = {};
+ int err;
+
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, SOCK_PATH);
+ remove_socket_file();
+
+ self->server = socket(AF_UNIX, variant->socket_type, 0);
+ ASSERT_LT(-1, self->server);
+
+ err = bind(self->server, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(0, err);
+
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET) {
+ err = listen(self->server, 1);
+ ASSERT_EQ(0, err);
+ }
+
+ self->client = socket(AF_UNIX, variant->socket_type | SOCK_NONBLOCK, 0);
+ ASSERT_LT(-1, self->client);
+
+ err = connect(self->client, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(0, err);
+}
+
+FIXTURE_TEARDOWN(unix_sock)
+{
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET)
+ close(self->child);
+
+ close(self->client);
+ close(self->server);
+ remove_socket_file();
+}
+
+/* Test 1: peer closes normally */
+TEST_F(unix_sock, eof)
+{
+ char buf[16] = {};
+ ssize_t n;
+
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET) {
+ self->child = accept(self->server, NULL, NULL);
+ ASSERT_LT(-1, self->child);
+
+ close(self->child);
+ } else {
+ close(self->server);
+ }
+
+ n = recv(self->client, buf, sizeof(buf), 0);
+
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET) {
+ ASSERT_EQ(0, n);
+ } else {
+ ASSERT_EQ(-1, n);
+ ASSERT_EQ(EAGAIN, errno);
+ }
+}
+
+/* Test 2: peer closes with unread data */
+TEST_F(unix_sock, reset_unread_behavior)
+{
+ char buf[16] = {};
+ ssize_t n;
+
+ /* Send data that will remain unread */
+ send(self->client, "hello", 5, 0);
+
+ if (variant->socket_type == SOCK_DGRAM) {
+ /* No real connection, just close the server */
+ close(self->server);
+ } else {
+ self->child = accept(self->server, NULL, NULL);
+ ASSERT_LT(-1, self->child);
+
+ /* Peer closes before client reads */
+ close(self->child);
+ }
+
+ n = recv(self->client, buf, sizeof(buf), 0);
+ ASSERT_EQ(-1, n);
+
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET) {
+ ASSERT_EQ(ECONNRESET, errno);
+ } else {
+ ASSERT_EQ(EAGAIN, errno);
+ }
+}
+
+/* Test 3: closing unaccepted (embryo) server socket should reset client. */
+TEST_F(unix_sock, reset_closed_embryo)
+{
+ char buf[16] = {};
+ ssize_t n;
+
+ if (variant->socket_type == SOCK_DGRAM) {
+ snprintf(_metadata->results->reason,
+ sizeof(_metadata->results->reason),
+ "Test only applies to SOCK_STREAM and SOCK_SEQPACKET");
+ exit(KSFT_XFAIL);
+ }
+
+ /* Close server without accept()ing */
+ close(self->server);
+
+ n = recv(self->client, buf, sizeof(buf), 0);
+
+ ASSERT_EQ(-1, n);
+ ASSERT_EQ(ECONNRESET, errno);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
index d458b45c775b..3ef209cacb8e 100755
--- a/tools/testing/selftests/net/amt.sh
+++ b/tools/testing/selftests/net/amt.sh
@@ -194,15 +194,21 @@ test_remote_ip()
send_mcast_torture4()
{
- ip netns exec "${SOURCE}" bash -c \
- 'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001'
+ for i in `seq 10`; do
+ ip netns exec "${SOURCE}" bash -c \
+ 'cat /dev/urandom | head -c 100M | nc -w 1 -u 239.0.0.1 4001'
+ echo -n "."
+ done
}
send_mcast_torture6()
{
- ip netns exec "${SOURCE}" bash -c \
- 'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001'
+ for i in `seq 10`; do
+ ip netns exec "${SOURCE}" bash -c \
+ 'cat /dev/urandom | head -c 100M | nc -w 1 -u ff0e::5:6 6001'
+ echo -n "."
+ done
}
check_features()
@@ -278,10 +284,12 @@ wait $pid || err=$?
if [ $err -eq 1 ]; then
ERR=1
fi
+printf "TEST: %-50s" "IPv4 amt traffic forwarding torture"
send_mcast_torture4
-printf "TEST: %-60s [ OK ]\n" "IPv4 amt traffic forwarding torture"
+printf " [ OK ]\n"
+printf "TEST: %-50s" "IPv6 amt traffic forwarding torture"
send_mcast_torture6
-printf "TEST: %-60s [ OK ]\n" "IPv6 amt traffic forwarding torture"
+printf " [ OK ]\n"
sleep 5
if [ "${ERR}" -eq 1 ]; then
echo "Some tests failed." >&2
diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
index 92eb880c52f2..00758f00efbf 100755
--- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
+++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
@@ -75,7 +75,7 @@ setup_v4() {
ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
if [ $? -ne 0 ]; then
cleanup_v4
- echo "failed"
+ echo "failed; is the system using MACAddressPolicy=persistent ?"
exit 1
fi
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index f366cadbc5e8..d9e5b967f815 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Test various bareudp tunnel configurations.
@@ -106,26 +106,16 @@
# | |
# +-----------------------------------------------------------------------+
+. ./lib.sh
+
ERR=4 # Return 4 by default, which is the SKIP code for kselftest
PING6="ping"
PAUSE_ON_FAIL="no"
-readonly NS0=$(mktemp -u ns0-XXXXXXXX)
-readonly NS1=$(mktemp -u ns1-XXXXXXXX)
-readonly NS2=$(mktemp -u ns2-XXXXXXXX)
-readonly NS3=$(mktemp -u ns3-XXXXXXXX)
-
# Exit the script after having removed the network namespaces it created
-#
-# Parameters:
-#
-# * The list of network namespaces to delete before exiting.
-#
exit_cleanup()
{
- for ns in "$@"; do
- ip netns delete "${ns}" 2>/dev/null || true
- done
+ cleanup_all_ns
if [ "${ERR}" -eq 4 ]; then
echo "Error: Setting up the testing environment failed." >&2
@@ -140,17 +130,7 @@ exit_cleanup()
# namespaces created by this script are deleted.
create_namespaces()
{
- ip netns add "${NS0}" || exit_cleanup
- ip netns add "${NS1}" || exit_cleanup "${NS0}"
- ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}"
- ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}"
-}
-
-# The trap function handler
-#
-exit_cleanup_all()
-{
- exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}"
+ setup_ns NS0 NS1 NS2 NS3 || exit_cleanup
}
# Configure a network interface using a host route
@@ -188,10 +168,6 @@ iface_config()
#
setup_underlay()
{
- for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do
- ip -netns "${ns}" link set dev lo up
- done;
-
ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}"
ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}"
ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}"
@@ -234,14 +210,6 @@ setup_overlay_ipv4()
ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
-
- # The intermediate namespaces don't have routes for the reverse path,
- # as it will be handled by tc. So we need to ensure that rp_filter is
- # not going to block the traffic.
- ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
- ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
}
setup_overlay_ipv6()
@@ -521,13 +489,10 @@ done
check_features
-# Create namespaces before setting up the exit trap.
-# Otherwise, exit_cleanup_all() could delete namespaces that were not created
-# by this script.
-create_namespaces
-
set -e
-trap exit_cleanup_all EXIT
+trap exit_cleanup EXIT
+
+create_namespaces
setup_underlay
setup_overlay_ipv4
diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile
new file mode 100644
index 000000000000..2546c45e42f7
--- /dev/null
+++ b/tools/testing/selftests/net/bench/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_MODS_DIR := page_pool
+
+TEST_PROGS += test_bench_page_pool.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile
new file mode 100644
index 000000000000..0549a16ba275
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/Makefile
@@ -0,0 +1,17 @@
+BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+obj-m += bench_page_pool.o
+bench_page_pool-y += bench_page_pool_simple.o time_bench.o
+
+all:
+ +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules
+
+clean:
+ +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean
diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
new file mode 100644
index 000000000000..cb6468adbda4
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmark module for page_pool.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/interrupt.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <net/page_pool/helpers.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+#define MY_POOL_SIZE 1024
+
+/* Makes tests selectable. Useful for perf-record to analyze a single test.
+ * Hint: Bash shells support writing binary number like: $((2#101010)
+ *
+ * # modprobe bench_page_pool_simple run_flags=$((2#100))
+ */
+static unsigned long run_flags = 0xFFFFFFFF;
+module_param(run_flags, ulong, 0);
+MODULE_PARM_DESC(run_flags, "Limit which bench test that runs");
+
+/* Count the bit number from the enum */
+enum benchmark_bit {
+ bit_run_bench_baseline,
+ bit_run_bench_no_softirq01,
+ bit_run_bench_no_softirq02,
+ bit_run_bench_no_softirq03,
+};
+
+#define bit(b) (1 << (b))
+#define enabled(b) ((run_flags & (bit(b))))
+
+/* notice time_bench is limited to U32_MAX nr loops */
+static unsigned long loops = 10000000;
+module_param(loops, ulong, 0);
+MODULE_PARM_DESC(loops, "Specify loops bench will run");
+
+/* Timing at the nanosec level, we need to know the overhead
+ * introduced by the for loop itself
+ */
+static int time_bench_for_loop(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ int i;
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+ }
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+static int time_bench_atomic_inc(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ atomic_t cnt;
+ int i;
+
+ atomic_set(&cnt, 0);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ atomic_inc(&cnt);
+ barrier(); /* avoid compiler to optimize this loop */
+ }
+ loops_cnt = atomic_read(&cnt);
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum
+ * overhead of taking+releasing a spinlock, to know the cycles that can be saved
+ * by e.g. amortizing this via bulking.
+ */
+static int time_bench_lock(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ spinlock_t lock;
+ int i;
+
+ spin_lock_init(&lock);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ spin_lock(&lock);
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+ spin_unlock(&lock);
+ }
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+/* Helper for filling some page's into ptr_ring */
+static void pp_fill_ptr_ring(struct page_pool *pp, int elems)
+{
+ /* GFP_ATOMIC needed when under run softirq */
+ gfp_t gfp_mask = GFP_ATOMIC;
+ struct page **array;
+ int i;
+
+ array = kcalloc(elems, sizeof(struct page *), gfp_mask);
+
+ for (i = 0; i < elems; i++)
+ array[i] = page_pool_alloc_pages(pp, gfp_mask);
+ for (i = 0; i < elems; i++)
+ page_pool_put_page(pp, array[i], -1, false);
+
+ kfree(array);
+}
+
+enum test_type { type_fast_path, type_ptr_ring, type_page_allocator };
+
+/* Depends on compile optimizing this function */
+static int time_bench_page_pool(struct time_bench_record *rec, void *data,
+ enum test_type type, const char *func)
+{
+ uint64_t loops_cnt = 0;
+ gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */
+ int i, err;
+
+ struct page_pool *pp;
+ struct page *page;
+
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = 0,
+ .pool_size = MY_POOL_SIZE,
+ .nid = NUMA_NO_NODE,
+ .dev = NULL, /* Only use for DMA mapping */
+ .dma_dir = DMA_BIDIRECTIONAL,
+ };
+
+ pp = page_pool_create(&pp_params);
+ if (IS_ERR(pp)) {
+ err = PTR_ERR(pp);
+ pr_warn("%s: Error(%d) creating page_pool\n", func, err);
+ goto out;
+ }
+ pp_fill_ptr_ring(pp, 64);
+
+ if (in_serving_softirq())
+ pr_warn("%s(): in_serving_softirq fast-path\n", func);
+ else
+ pr_warn("%s(): Cannot use page_pool fast-path\n", func);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ /* Common fast-path alloc that depend on in_serving_softirq() */
+ page = page_pool_alloc_pages(pp, gfp_mask);
+ if (!page)
+ break;
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+
+ /* The benchmarks purpose it to test different return paths.
+ * Compiler should inline optimize other function calls out
+ */
+ if (type == type_fast_path) {
+ /* Fast-path recycling e.g. XDP_DROP use-case */
+ page_pool_recycle_direct(pp, page);
+
+ } else if (type == type_ptr_ring) {
+ /* Normal return path */
+ page_pool_put_page(pp, page, -1, false);
+
+ } else if (type == type_page_allocator) {
+ /* Test if not pages are recycled, but instead
+ * returned back into systems page allocator
+ */
+ get_page(page); /* cause no-recycling */
+ page_pool_put_page(pp, page, -1, false);
+ put_page(page);
+ } else {
+ BUILD_BUG();
+ }
+ }
+ time_bench_stop(rec, loops_cnt);
+out:
+ page_pool_destroy(pp);
+ return loops_cnt;
+}
+
+static int time_bench_page_pool01_fast_path(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_fast_path, __func__);
+}
+
+static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_ptr_ring, __func__);
+}
+
+static int time_bench_page_pool03_slow(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_page_allocator, __func__);
+}
+
+static int run_benchmark_tests(void)
+{
+ uint32_t nr_loops = loops;
+
+ /* Baseline tests */
+ if (enabled(bit_run_bench_baseline)) {
+ time_bench_loop(nr_loops * 10, 0, "for_loop", NULL,
+ time_bench_for_loop);
+ time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL,
+ time_bench_atomic_inc);
+ time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock);
+ }
+
+ /* This test cannot activate correct code path, due to no-softirq ctx */
+ if (enabled(bit_run_bench_no_softirq01))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL,
+ time_bench_page_pool01_fast_path);
+ if (enabled(bit_run_bench_no_softirq02))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL,
+ time_bench_page_pool02_ptr_ring);
+ if (enabled(bit_run_bench_no_softirq03))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL,
+ time_bench_page_pool03_slow);
+
+ return 0;
+}
+
+static int __init bench_page_pool_simple_module_init(void)
+{
+ if (verbose)
+ pr_info("Loaded\n");
+
+ if (loops > U32_MAX) {
+ pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops,
+ U32_MAX);
+ return -ECHRNG;
+ }
+
+ run_benchmark_tests();
+
+ return 0;
+}
+module_init(bench_page_pool_simple_module_init);
+
+static void __exit bench_page_pool_simple_module_exit(void)
+{
+ if (verbose)
+ pr_info("Unloaded\n");
+}
+module_exit(bench_page_pool_simple_module_exit);
+
+MODULE_DESCRIPTION("Benchmark of page_pool simple cases");
+MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c
new file mode 100644
index 000000000000..073bb36ec5f2
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/time.h>
+
+#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */
+
+/* For concurrency testing */
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+
+/** TSC (Time-Stamp Counter) based **
+ * See: linux/time_bench.h
+ * tsc_start_clock() and tsc_stop_clock()
+ */
+
+/** Wall-clock based **
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ */
+#define PERF_FORMAT \
+ (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \
+ PERF_FORMAT_TOTAL_TIME_RUNNING)
+
+struct raw_perf_event {
+ uint64_t config; /* event */
+ uint64_t config1; /* umask */
+ struct perf_event *save;
+ char *desc;
+};
+
+/* if HT is enable a maximum of 4 events (5 if one is instructions
+ * retired can be specified, if HT is disabled a maximum of 8 (9 if
+ * one is instructions retired) can be specified.
+ *
+ * From Table 19-1. Architectural Performance Events
+ * Architectures Software Developer’s Manual Volume 3: System Programming
+ * Guide
+ */
+struct raw_perf_event perf_events[] = {
+ { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" },
+ { 0xc0, 0x00, NULL, "Instruction Retired" }
+};
+
+#define NUM_EVTS (ARRAY_SIZE(perf_events))
+
+/* WARNING: PMU config is currently broken!
+ */
+bool time_bench_PMU_config(bool enable)
+{
+ int i;
+ struct perf_event_attr perf_conf;
+ struct perf_event *perf_event;
+ int cpu;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+ pr_info("DEBUG: cpu:%d\n", cpu);
+ preempt_enable();
+
+ memset(&perf_conf, 0, sizeof(struct perf_event_attr));
+ perf_conf.type = PERF_TYPE_RAW;
+ perf_conf.size = sizeof(struct perf_event_attr);
+ perf_conf.read_format = PERF_FORMAT;
+ perf_conf.pinned = 1;
+ perf_conf.exclude_user = 1; /* No userspace events */
+ perf_conf.exclude_kernel = 0; /* Only kernel events */
+
+ for (i = 0; i < NUM_EVTS; i++) {
+ perf_conf.disabled = enable;
+ //perf_conf.disabled = (i == 0) ? 1 : 0;
+ perf_conf.config = perf_events[i].config;
+ perf_conf.config1 = perf_events[i].config1;
+ if (verbose)
+ pr_info("%s() enable PMU counter: %s\n",
+ __func__, perf_events[i].desc);
+ perf_event = perf_event_create_kernel_counter(&perf_conf, cpu,
+ NULL /* task */,
+ NULL /* overflow_handler*/,
+ NULL /* context */);
+ if (perf_event) {
+ perf_events[i].save = perf_event;
+ pr_info("%s():DEBUG perf_event success\n", __func__);
+
+ perf_event_enable(perf_event);
+ } else {
+ pr_info("%s():DEBUG perf_event is NULL\n", __func__);
+ }
+ }
+
+ return true;
+}
+
+/** Generic functions **
+ */
+
+/* Calculate stats, store results in record */
+bool time_bench_calc_stats(struct time_bench_record *rec)
+{
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+ uint64_t ns_per_call_tmp_rem = 0;
+ uint32_t ns_per_call_remainder = 0;
+ uint64_t pmc_ipc_tmp_rem = 0;
+ uint32_t pmc_ipc_remainder = 0;
+ uint32_t pmc_ipc_div = 0;
+ uint32_t invoked_cnt_precision = 0;
+ uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */
+
+ if (rec->flags & TIME_BENCH_LOOP) {
+ if (rec->invoked_cnt < 1000) {
+ pr_err("ERR: need more(>1000) loops(%llu) for timing\n",
+ rec->invoked_cnt);
+ return false;
+ }
+ if (rec->invoked_cnt > ((1ULL << 32) - 1)) {
+ /* div_u64_rem() can only support div with 32bit*/
+ pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n",
+ rec->invoked_cnt);
+ return false;
+ }
+ invoked_cnt = (uint32_t)rec->invoked_cnt;
+ }
+
+ /* TSC (Time-Stamp Counter) records */
+ if (rec->flags & TIME_BENCH_TSC) {
+ rec->tsc_interval = rec->tsc_stop - rec->tsc_start;
+ if (rec->tsc_interval == 0) {
+ pr_err("ABORT: timing took ZERO TSC time\n");
+ return false;
+ }
+ /* Calculate stats */
+ if (rec->flags & TIME_BENCH_LOOP)
+ rec->tsc_cycles = rec->tsc_interval / invoked_cnt;
+ else
+ rec->tsc_cycles = rec->tsc_interval;
+ }
+
+ /* Wall-clock time calc */
+ if (rec->flags & TIME_BENCH_WALLCLOCK) {
+ rec->time_start = rec->ts_start.tv_nsec +
+ (NANOSEC_PER_SEC * rec->ts_start.tv_sec);
+ rec->time_stop = rec->ts_stop.tv_nsec +
+ (NANOSEC_PER_SEC * rec->ts_stop.tv_sec);
+ rec->time_interval = rec->time_stop - rec->time_start;
+ if (rec->time_interval == 0) {
+ pr_err("ABORT: timing took ZERO wallclock time\n");
+ return false;
+ }
+ /* Calculate stats */
+ /*** Division in kernel it tricky ***/
+ /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */
+ /* remainder only correct because NANOSEC_PER_SEC is 10^9 */
+ rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC,
+ &rec->time_sec_remainder);
+ //TODO: use existing struct timespec records instead of div?
+
+ if (rec->flags & TIME_BENCH_LOOP) {
+ /*** Division in kernel it tricky ***/
+ /* Orig: ns = ((double)time_interval / invoked_cnt); */
+ /* First get quotient */
+ rec->ns_per_call_quotient =
+ div_u64_rem(rec->time_interval, invoked_cnt,
+ &ns_per_call_remainder);
+ /* Now get decimals .xxx precision (incorrect roundup)*/
+ ns_per_call_tmp_rem = ns_per_call_remainder;
+ invoked_cnt_precision = invoked_cnt / 1000;
+ if (invoked_cnt_precision > 0) {
+ rec->ns_per_call_decimal =
+ div_u64_rem(ns_per_call_tmp_rem,
+ invoked_cnt_precision,
+ &ns_per_call_remainder);
+ }
+ }
+ }
+
+ /* Performance Monitor Unit (PMU) counters */
+ if (rec->flags & TIME_BENCH_PMU) {
+ //FIXME: Overflow handling???
+ rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start;
+ rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start;
+
+ /* Calc Instruction Per Cycle (IPC) */
+ /* First get quotient */
+ rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk,
+ &pmc_ipc_remainder);
+ /* Now get decimals .xxx precision (incorrect roundup)*/
+ pmc_ipc_tmp_rem = pmc_ipc_remainder;
+ pmc_ipc_div = rec->pmc_clk / 1000;
+ if (pmc_ipc_div > 0) {
+ rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem,
+ pmc_ipc_div,
+ &pmc_ipc_remainder);
+ }
+ }
+
+ return true;
+}
+
+/* Generic function for invoking a loop function and calculating
+ * execution time stats. The function being called/timed is assumed
+ * to perform a tight loop, and update the timing record struct.
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+ int (*func)(struct time_bench_record *record, void *data))
+{
+ struct time_bench_record rec;
+
+ /* Setup record */
+ memset(&rec, 0, sizeof(rec)); /* zero func might not update all */
+ rec.version_abi = 1;
+ rec.loops = loops;
+ rec.step = step;
+ rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK);
+
+ /*** Loop function being timed ***/
+ if (!func(&rec, data)) {
+ pr_err("ABORT: function being timed failed\n");
+ return false;
+ }
+
+ if (rec.invoked_cnt < loops)
+ pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n",
+ rec.invoked_cnt, loops);
+
+ /* Calculate stats */
+ time_bench_calc_stats(&rec);
+
+ pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+ txt, rec.tsc_cycles, rec.ns_per_call_quotient,
+ rec.ns_per_call_decimal, rec.step, rec.time_sec,
+ rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt,
+ rec.tsc_interval);
+ if (rec.flags & TIME_BENCH_PMU)
+ pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n",
+ txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient,
+ rec.pmc_ipc_decimal);
+ return true;
+}
+
+/* Function getting invoked by kthread */
+static int invoke_test_on_cpu_func(void *private)
+{
+ struct time_bench_cpu *cpu = private;
+ struct time_bench_sync *sync = cpu->sync;
+ cpumask_t newmask = CPU_MASK_NONE;
+ void *data = cpu->data;
+
+ /* Restrict CPU */
+ cpumask_set_cpu(cpu->rec.cpu, &newmask);
+ set_cpus_allowed_ptr(current, &newmask);
+
+ /* Synchronize start of concurrency test */
+ atomic_inc(&sync->nr_tests_running);
+ wait_for_completion(&sync->start_event);
+
+ /* Start benchmark function */
+ if (!cpu->bench_func(&cpu->rec, data)) {
+ pr_err("ERROR: function being timed failed on CPU:%d(%d)\n",
+ cpu->rec.cpu, smp_processor_id());
+ } else {
+ if (verbose)
+ pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu,
+ smp_processor_id());
+ }
+ cpu->did_bench_run = true;
+
+ /* End test */
+ atomic_dec(&sync->nr_tests_running);
+ /* Wait for kthread_stop() telling us to stop */
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+ return 0;
+}
+
+void time_bench_print_stats_cpumask(const char *desc,
+ struct time_bench_cpu *cpu_tasks,
+ const struct cpumask *mask)
+{
+ uint64_t average = 0;
+ int cpu;
+ int step = 0;
+ struct sum {
+ uint64_t tsc_cycles;
+ int records;
+ } sum = { 0 };
+
+ /* Get stats */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+ struct time_bench_record *rec = &c->rec;
+
+ /* Calculate stats */
+ time_bench_calc_stats(rec);
+
+ pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+ desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient,
+ rec->ns_per_call_decimal, rec->step, rec->time_sec,
+ rec->time_sec_remainder, rec->time_interval,
+ rec->invoked_cnt, rec->tsc_interval);
+
+ /* Collect average */
+ sum.records++;
+ sum.tsc_cycles += rec->tsc_cycles;
+ step = rec->step;
+ }
+
+ if (sum.records) /* avoid div-by-zero */
+ average = sum.tsc_cycles / sum.records;
+ pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc,
+ average, sum.records, step);
+}
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+ const struct cpumask *mask, /* Support masking outsome CPUs*/
+ struct time_bench_sync *sync,
+ struct time_bench_cpu *cpu_tasks,
+ int (*func)(struct time_bench_record *record, void *data))
+{
+ int cpu, running = 0;
+
+ if (verbose) // DEBUG
+ pr_warn("%s() Started on CPU:%d\n", __func__,
+ smp_processor_id());
+
+ /* Reset sync conditions */
+ atomic_set(&sync->nr_tests_running, 0);
+ init_completion(&sync->start_event);
+
+ /* Spawn off jobs on all CPUs */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+ running++;
+ c->sync = sync; /* Send sync variable along */
+ c->data = data; /* Send opaque along */
+
+ /* Init benchmark record */
+ memset(&c->rec, 0, sizeof(struct time_bench_record));
+ c->rec.version_abi = 1;
+ c->rec.loops = loops;
+ c->rec.step = step;
+ c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC |
+ TIME_BENCH_WALLCLOCK);
+ c->rec.cpu = cpu;
+ c->bench_func = func;
+ c->task = kthread_run(invoke_test_on_cpu_func, c,
+ "time_bench%d", cpu);
+ if (IS_ERR(c->task)) {
+ pr_err("%s(): Failed to start test func\n", __func__);
+ return; /* Argh, what about cleanup?! */
+ }
+ }
+
+ /* Wait until all processes are running */
+ while (atomic_read(&sync->nr_tests_running) < running) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+ /* Kick off all CPU concurrently on completion event */
+ complete_all(&sync->start_event);
+
+ /* Wait for CPUs to finish */
+ while (atomic_read(&sync->nr_tests_running)) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+
+ /* Stop the kthreads */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+ kthread_stop(c->task);
+ }
+
+ if (verbose) // DEBUG - happens often, finish on another CPU
+ pr_warn("%s() Finished on CPU:%d\n", __func__,
+ smp_processor_id());
+}
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h
new file mode 100644
index 000000000000..e113fcf341dc
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ * for licensing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_TIME_BENCH_H
+#define _LINUX_TIME_BENCH_H
+
+/* Main structure used for recording a benchmark run */
+struct time_bench_record {
+ uint32_t version_abi;
+ uint32_t loops; /* Requested loop invocations */
+ uint32_t step; /* option for e.g. bulk invocations */
+
+ uint32_t flags; /* Measurements types enabled */
+#define TIME_BENCH_LOOP BIT(0)
+#define TIME_BENCH_TSC BIT(1)
+#define TIME_BENCH_WALLCLOCK BIT(2)
+#define TIME_BENCH_PMU BIT(3)
+
+ uint32_t cpu; /* Used when embedded in time_bench_cpu */
+
+ /* Records */
+ uint64_t invoked_cnt; /* Returned actual invocations */
+ uint64_t tsc_start;
+ uint64_t tsc_stop;
+ struct timespec64 ts_start;
+ struct timespec64 ts_stop;
+ /* PMU counters for instruction and cycles
+ * instructions counter including pipelined instructions
+ */
+ uint64_t pmc_inst_start;
+ uint64_t pmc_inst_stop;
+ /* CPU unhalted clock counter */
+ uint64_t pmc_clk_start;
+ uint64_t pmc_clk_stop;
+
+ /* Result records */
+ uint64_t tsc_interval;
+ uint64_t time_start, time_stop, time_interval; /* in nanosec */
+ uint64_t pmc_inst, pmc_clk;
+
+ /* Derived result records */
+ uint64_t tsc_cycles; // +decimal?
+ uint64_t ns_per_call_quotient, ns_per_call_decimal;
+ uint64_t time_sec;
+ uint32_t time_sec_remainder;
+ uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */
+};
+
+/* For synchronizing parallel CPUs to run concurrently */
+struct time_bench_sync {
+ atomic_t nr_tests_running;
+ struct completion start_event;
+};
+
+/* Keep track of CPUs executing our bench function.
+ *
+ * Embed a time_bench_record for storing info per cpu
+ */
+struct time_bench_cpu {
+ struct time_bench_record rec;
+ struct time_bench_sync *sync; /* back ptr */
+ struct task_struct *task;
+ /* "data" opaque could have been placed in time_bench_sync,
+ * but to avoid any false sharing, place it per CPU
+ */
+ void *data;
+ /* Support masking outsome CPUs, mark if it ran */
+ bool did_bench_run;
+ /* int cpu; // note CPU stored in time_bench_record */
+ int (*bench_func)(struct time_bench_record *record, void *data);
+};
+
+/*
+ * Below TSC assembler code is not compatible with other archs, and
+ * can also fail on guests if cpu-flags are not correct.
+ *
+ * The way TSC reading is used, many iterations, does not require as
+ * high accuracy as described below (in Intel Doc #324264).
+ *
+ * Considering changing to use get_cycles() (#include <asm/timex.h>).
+ */
+
+/** TSC (Time-Stamp Counter) based **
+ * Recommend reading, to understand details of reading TSC accurately:
+ * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel"
+ *
+ * Consider getting exclusive ownership of CPU by using:
+ * unsigned long flags;
+ * preempt_disable();
+ * raw_local_irq_save(flags);
+ * _your_code_
+ * raw_local_irq_restore(flags);
+ * preempt_enable();
+ *
+ * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx"
+ * RDTSC only change "%rax" and "%rdx" but
+ * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx)
+ */
+static __always_inline uint64_t tsc_start_clock(void)
+{
+ /* See: Intel Doc #324264 */
+ unsigned int hi, lo;
+
+ asm volatile("CPUID\n\t"
+ "RDTSC\n\t"
+ "mov %%edx, %0\n\t"
+ "mov %%eax, %1\n\t"
+ : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+ //FIXME: on 32bit use clobbered %eax + %edx
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+static __always_inline uint64_t tsc_stop_clock(void)
+{
+ /* See: Intel Doc #324264 */
+ unsigned int hi, lo;
+
+ asm volatile("RDTSCP\n\t"
+ "mov %%edx, %0\n\t"
+ "mov %%eax, %1\n\t"
+ "CPUID\n\t"
+ : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+/** Wall-clock based **
+ *
+ * use: getnstimeofday()
+ * getnstimeofday(&rec->ts_start);
+ * getnstimeofday(&rec->ts_stop);
+ *
+ * API changed see: Documentation/core-api/timekeeping.rst
+ * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday
+ *
+ * We should instead use: ktime_get_real_ts64() is a direct
+ * replacement, but consider using monotonic time (ktime_get_ts64())
+ * and/or a ktime_t based interface (ktime_get()/ktime_get_real()).
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ *
+ * Needed for calculating: Instructions Per Cycle (IPC)
+ * - The IPC number tell how efficient the CPU pipelining were
+ */
+//lookup: perf_event_create_kernel_counter()
+
+bool time_bench_PMU_config(bool enable);
+
+/* Raw reading via rdpmc() using fixed counters
+ *
+ * From: https://github.com/andikleen/simple-pmu
+ */
+enum {
+ FIXED_SELECT = (1U << 30), /* == 0x40000000 */
+ FIXED_INST_RETIRED_ANY = 0,
+ FIXED_CPU_CLK_UNHALTED_CORE = 1,
+ FIXED_CPU_CLK_UNHALTED_REF = 2,
+};
+
+static __always_inline unsigned int long long p_rdpmc(unsigned int in)
+{
+ unsigned int d, a;
+
+ asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory");
+ return ((unsigned long long)d << 32) | a;
+}
+
+/* These PMU counter needs to be enabled, but I don't have the
+ * configure code implemented. My current hack is running:
+ * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko
+ */
+/* Reading all pipelined instruction */
+static __always_inline unsigned long long pmc_inst(void)
+{
+ return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY);
+}
+
+/* Reading CPU clock cycles */
+static __always_inline unsigned long long pmc_clk(void)
+{
+ return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE);
+}
+
+/* Raw reading via MSR rdmsr() is likely wrong
+ * FIXME: How can I know which raw MSR registers are conf for what?
+ */
+#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */
+#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */
+#define MSR_IA32_PCM2 0x400000C3
+static inline uint64_t msr_inst(unsigned long long *msr_result)
+{
+ return rdmsrq_safe(MSR_IA32_PCM0, msr_result);
+}
+
+/** Generic functions **
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+ int (*func)(struct time_bench_record *rec, void *data));
+bool time_bench_calc_stats(struct time_bench_record *rec);
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+ const struct cpumask *mask, /* Support masking outsome CPUs*/
+ struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks,
+ int (*func)(struct time_bench_record *record, void *data));
+void time_bench_print_stats_cpumask(const char *desc,
+ struct time_bench_cpu *cpu_tasks,
+ const struct cpumask *mask);
+
+//FIXME: use rec->flags to select measurement, should be MACRO
+static __always_inline void time_bench_start(struct time_bench_record *rec)
+{
+ //getnstimeofday(&rec->ts_start);
+ ktime_get_real_ts64(&rec->ts_start);
+ if (rec->flags & TIME_BENCH_PMU) {
+ rec->pmc_inst_start = pmc_inst();
+ rec->pmc_clk_start = pmc_clk();
+ }
+ rec->tsc_start = tsc_start_clock();
+}
+
+static __always_inline void time_bench_stop(struct time_bench_record *rec,
+ uint64_t invoked_cnt)
+{
+ rec->tsc_stop = tsc_stop_clock();
+ if (rec->flags & TIME_BENCH_PMU) {
+ rec->pmc_inst_stop = pmc_inst();
+ rec->pmc_clk_stop = pmc_clk();
+ }
+ //getnstimeofday(&rec->ts_stop);
+ ktime_get_real_ts64(&rec->ts_stop);
+ rec->invoked_cnt = invoked_cnt;
+}
+
+#endif /* _LINUX_TIME_BENCH_H */
diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
new file mode 100755
index 000000000000..7b8b18cfedce
--- /dev/null
+++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+set -e
+
+DRIVER="./page_pool/bench_page_pool.ko"
+result=""
+
+function run_test()
+{
+ rmmod "bench_page_pool.ko" || true
+ insmod $DRIVER > /dev/null 2>&1
+ result=$(dmesg | tail -10)
+ echo "$result"
+
+ echo
+ echo "Fast path results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+ echo
+ echo "ptr_ring results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+ echo
+ echo "slow path results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+}
+
+run_test
+
+exit 0
diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c
index 57ff67a3751e..da04b0b19b73 100644
--- a/tools/testing/selftests/net/bind_bhash.c
+++ b/tools/testing/selftests/net/bind_bhash.c
@@ -75,7 +75,7 @@ static void *setup(void *arg)
int *array = (int *)arg;
for (i = 0; i < MAX_CONNECTIONS; i++) {
- sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+ sock_fd = bind_socket(SO_REUSEPORT, setup_addr);
if (sock_fd < 0) {
ret = sock_fd;
pthread_exit(&ret);
@@ -103,7 +103,7 @@ int main(int argc, const char *argv[])
setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4;
- listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+ listener_fd = bind_socket(SO_REUSEPORT, setup_addr);
if (listen(listener_fd, 100) < 0) {
perror("listen failed");
return -1;
diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c
index cb9fdf51ea59..40126f9b901e 100644
--- a/tools/testing/selftests/net/bind_timewait.c
+++ b/tools/testing/selftests/net/bind_timewait.c
@@ -4,7 +4,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(bind_timewait)
{
diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
index b7b54d646b93..7d11548b2c61 100644
--- a/tools/testing/selftests/net/bind_wildcard.c
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -4,7 +4,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static const __u32 in4addr_any = INADDR_ANY;
static const __u32 in4addr_loopback = INADDR_LOOPBACK;
diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py
index 3efe44f6e92a..c856d266c8f3 100755
--- a/tools/testing/selftests/net/bpf_offload.py
+++ b/tools/testing/selftests/net/bpf_offload.py
@@ -184,8 +184,8 @@ def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True):
progs = [ p for p in progs if not p['orphaned'] ]
if expected is not None:
if len(progs) != expected:
- fail(True, "%d BPF programs loaded, expected %d" %
- (len(progs), expected))
+ fail(True, "%d BPF programs loaded, expected %d\nLoaded Progs:\n%s" %
+ (len(progs), expected, pp.pformat(progs)))
return progs
def bpftool_map_list(expected=None, ns=""):
@@ -207,20 +207,24 @@ def bpftool_prog_list_wait(expected=0, n_retry=20):
raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
def bpftool_map_list_wait(expected=0, n_retry=20, ns=""):
+ nmaps = None
for i in range(n_retry):
maps = bpftool_map_list(ns=ns)
- if len(maps) == expected:
+ nmaps = len(maps)
+ if nmaps == expected:
return maps
time.sleep(0.05)
raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None,
- fail=True, include_stderr=False):
+ fail=True, include_stderr=False, dev_bind=None):
args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name)
if prog_type is not None:
args += " type " + prog_type
if dev is not None:
args += " dev " + dev
+ elif dev_bind is not None:
+ args += " xdpmeta_dev " + dev_bind
if len(maps):
args += " map " + " map ".join(maps)
@@ -594,8 +598,9 @@ def check_extack_nsim(output, reference, args):
check_extack(output, "netdevsim: " + reference, args)
def check_no_extack(res, needle):
- fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"),
- "Found '%s' in command output, leaky extack?" % (needle))
+ haystack = (res[1] + res[2]).strip()
+ fail(haystack.count(needle) or haystack.count("Warning:"),
+ "Unexpected command output, leaky extack? ('%s', '%s')" % (needle, haystack))
def check_verifier_log(output, reference):
lines = output.split("\n")
@@ -707,6 +712,7 @@ _, base_maps = bpftool("map")
base_map_names = [
'pid_iter.rodata', # created on each bpftool invocation
'libbpf_det_bind', # created on each bpftool invocation
+ 'libbpf_global',
]
# Check netdevsim
@@ -979,6 +985,16 @@ try:
rm("/sys/fs/bpf/offload")
sim.wait_for_flush()
+ bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/devbound",
+ dev_bind=sim['ifname'])
+ devbound = bpf_pinned("/sys/fs/bpf/devbound")
+ start_test("Test dev-bound program in generic mode...")
+ ret, _, err = sim.set_xdp(devbound, "generic", fail=False, include_stderr=True)
+ fail(ret == 0, "devbound program in generic mode allowed")
+ check_extack(err, "Can't attach device-bound programs in generic mode.", args)
+ rm("/sys/fs/bpf/devbound")
+ sim.wait_for_flush()
+
start_test("Test XDP load failure...")
sim.dfs["dev/bpf_bind_verifier_accept"] = 0
ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload",
diff --git a/tools/testing/selftests/net/broadcast_ether_dst.sh b/tools/testing/selftests/net/broadcast_ether_dst.sh
new file mode 100755
index 000000000000..334a7eca8a80
--- /dev/null
+++ b/tools/testing/selftests/net/broadcast_ether_dst.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Author: Brett A C Sheffield <bacs@librecast.net>
+# Author: Oscar Maes <oscmaes92@gmail.com>
+#
+# Ensure destination ethernet field is correctly set for
+# broadcast packets
+
+source lib.sh
+
+CLIENT_IP4="192.168.0.1"
+GW_IP4="192.168.0.2"
+
+setup() {
+ setup_ns CLIENT_NS SERVER_NS
+
+ ip -net "${SERVER_NS}" link add link1 type veth \
+ peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}"/24 dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+
+ ip -net "${CLIENT_NS}" route add default via "${GW_IP4}"
+ ip netns exec "${CLIENT_NS}" arp -s "${GW_IP4}" 00:11:22:33:44:55
+}
+
+cleanup() {
+ rm -f "${CAPFILE}" "${OUTPUT}"
+ ip -net "${SERVER_NS}" link del link1
+ cleanup_ns "${CLIENT_NS}" "${SERVER_NS}"
+}
+
+test_broadcast_ether_dst() {
+ local rc=0
+ CAPFILE=$(mktemp -u cap.XXXXXXXXXX)
+ OUTPUT=$(mktemp -u out.XXXXXXXXXX)
+
+ echo "Testing ethernet broadcast destination"
+
+ # start tcpdump listening for icmp
+ # tcpdump will exit after receiving a single packet
+ # timeout will kill tcpdump if it is still running after 2s
+ timeout 2s ip netns exec "${CLIENT_NS}" \
+ tcpdump -i link0 -c 1 -w "${CAPFILE}" icmp &> "${OUTPUT}" &
+ pid=$!
+ slowwait 1 grep -qs "listening" "${OUTPUT}"
+
+ # send broadcast ping
+ ip netns exec "${CLIENT_NS}" \
+ ping -W0.01 -c1 -b 255.255.255.255 &> /dev/null
+
+ # wait for tcpdump for exit after receiving packet
+ wait "${pid}"
+
+ # compare ethernet destination field to ff:ff:ff:ff:ff:ff
+ ether_dst=$(tcpdump -r "${CAPFILE}" -tnne 2>/dev/null | \
+ awk '{sub(/,/,"",$3); print $3}')
+ if [[ "${ether_dst}" == "ff:ff:ff:ff:ff:ff" ]]; then
+ echo "[ OK ]"
+ rc="${ksft_pass}"
+ else
+ echo "[FAIL] expected dst ether addr to be ff:ff:ff:ff:ff:ff," \
+ "got ${ether_dst}"
+ rc="${ksft_fail}"
+ fi
+
+ return "${rc}"
+}
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+ echo "SKIP: Could not run test without tcpdump tool"
+ exit "${ksft_skip}"
+fi
+
+trap cleanup EXIT
+
+setup
+test_broadcast_ether_dst
+
+exit $?
diff --git a/tools/testing/selftests/net/broadcast_pmtu.sh b/tools/testing/selftests/net/broadcast_pmtu.sh
new file mode 100755
index 000000000000..726eb5d25839
--- /dev/null
+++ b/tools/testing/selftests/net/broadcast_pmtu.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Ensures broadcast route MTU is respected
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP4="192.168.0.1/24"
+CLIENT_BROADCAST_ADDRESS="192.168.0.255"
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP4="192.168.0.2/24"
+
+setup() {
+ ip netns add "${CLIENT_NS}"
+ ip netns add "${SERVER_NS}"
+
+ ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" link set link0 mtu 9000
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}" dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+ ip -net "${SERVER_NS}" link set link1 mtu 1500
+ ip -net "${SERVER_NS}" addr add "${SERVER_IP4}" dev link1
+
+ read -r -a CLIENT_BROADCAST_ENTRY <<< "$(ip -net "${CLIENT_NS}" route show table local type broadcast)"
+ ip -net "${CLIENT_NS}" route del "${CLIENT_BROADCAST_ENTRY[@]}"
+ ip -net "${CLIENT_NS}" route add "${CLIENT_BROADCAST_ENTRY[@]}" mtu 1500
+
+ ip net exec "${SERVER_NS}" sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0
+}
+
+cleanup() {
+ ip -net "${SERVER_NS}" link del link1
+ ip netns del "${CLIENT_NS}"
+ ip netns del "${SERVER_NS}"
+}
+
+trap cleanup EXIT
+
+setup &&
+ echo "Testing for broadcast route MTU" &&
+ ip net exec "${CLIENT_NS}" ping -f -M want -q -c 1 -s 8000 -w 1 -b "${CLIENT_BROADCAST_ADDRESS}" > /dev/null 2>&1
+
+exit $?
+
diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh
new file mode 100755
index 000000000000..5ec1c85c1623
--- /dev/null
+++ b/tools/testing/selftests/net/busy_poll_test.sh
@@ -0,0 +1,187 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+source lib.sh
+
+NSIM_SV_ID=$((256 + RANDOM % 256))
+NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
+NSIM_CL_ID=$((512 + RANDOM % 256))
+NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+SERVER_IP=192.168.1.1
+CLIENT_IP=192.168.1.2
+SERVER_PORT=48675
+
+# busy poll config
+MAX_EVENTS=8
+BUSY_POLL_USECS=0
+BUSY_POLL_BUDGET=16
+PREFER_BUSY_POLL=1
+
+# IRQ deferral config
+NAPI_DEFER_HARD_IRQS=100
+GRO_FLUSH_TIMEOUT=50000
+SUSPEND_TIMEOUT=20000000
+
+NAPI_THREADED_MODE_BUSY_POLL=2
+
+setup_ns()
+{
+ set -e
+ ip netns add nssv
+ ip netns add nscl
+
+ NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_SV_SYS/net -exec basename {} \;)
+ NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_CL_SYS/net -exec basename {} \;)
+
+ # ensure the server has 1 queue
+ ethtool -L $NSIM_SV_NAME combined 1 2>/dev/null
+
+ ip link set $NSIM_SV_NAME netns nssv
+ ip link set $NSIM_CL_NAME netns nscl
+
+ ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME
+ ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME
+
+ ip netns exec nssv ip link set dev $NSIM_SV_NAME up
+ ip netns exec nscl ip link set dev $NSIM_CL_NAME up
+
+ set +e
+}
+
+cleanup_ns()
+{
+ ip netns del nscl
+ ip netns del nssv
+}
+
+test_busypoll()
+{
+ suspend_value=${1:-0}
+ napi_threaded_value=${2:-0}
+ prefer_busy_poll_value=${3:-$PREFER_BUSY_POLL}
+
+ tmp_file=$(mktemp)
+ out_file=$(mktemp)
+
+ # fill a test file with random data
+ dd if=/dev/urandom of=${tmp_file} bs=1M count=1 2> /dev/null
+
+ timeout -k 1s 30s ip netns exec nssv ./busy_poller \
+ -p${SERVER_PORT} \
+ -b${SERVER_IP} \
+ -m${MAX_EVENTS} \
+ -u${BUSY_POLL_USECS} \
+ -P${prefer_busy_poll_value} \
+ -g${BUSY_POLL_BUDGET} \
+ -i${NSIM_SV_IFIDX} \
+ -s${suspend_value} \
+ -t${napi_threaded_value} \
+ -o${out_file}&
+
+ wait_local_port_listen nssv ${SERVER_PORT} tcp
+
+ ip netns exec nscl socat -u $tmp_file TCP:${SERVER_IP}:${SERVER_PORT}
+
+ wait
+
+ tmp_file_md5sum=$(md5sum $tmp_file | cut -f1 -d' ')
+ out_file_md5sum=$(md5sum $out_file | cut -f1 -d' ')
+
+ if [ "$tmp_file_md5sum" = "$out_file_md5sum" ]; then
+ res=0
+ else
+ echo "md5sum mismatch"
+ echo "input file md5sum: ${tmp_file_md5sum}";
+ echo "output file md5sum: ${out_file_md5sum}";
+ res=1
+ fi
+
+ rm $out_file $tmp_file
+
+ return $res
+}
+
+test_busypoll_with_suspend()
+{
+ test_busypoll ${SUSPEND_TIMEOUT}
+
+ return $?
+}
+
+test_busypoll_with_napi_threaded()
+{
+ # Only enable napi threaded poll. Set suspend timeout and prefer busy
+ # poll to 0.
+ test_busypoll 0 ${NAPI_THREADED_MODE_BUSY_POLL} 0
+
+ return $?
+}
+
+###
+### Code start
+###
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_SV_FD=$((256 + RANDOM % 256))
+exec {NSIM_SV_FD}</var/run/netns/nssv
+NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex)
+
+NSIM_CL_FD=$((256 + RANDOM % 256))
+exec {NSIM_CL_FD}</var/run/netns/nscl
+NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex)
+
+echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \
+ $NSIM_DEV_SYS_LINK
+
+if [ $? -ne 0 ]; then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup_ns
+ exit 1
+fi
+
+test_busypoll
+if [ $? -ne 0 ]; then
+ echo "test_busypoll failed"
+ cleanup_ns
+ exit 1
+fi
+
+test_busypoll_with_suspend
+if [ $? -ne 0 ]; then
+ echo "test_busypoll_with_suspend failed"
+ cleanup_ns
+ exit 1
+fi
+
+test_busypoll_with_napi_threaded
+if [ $? -ne 0 ]; then
+ echo "test_busypoll_with_napi_threaded failed"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit 0
diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c
new file mode 100644
index 000000000000..3a81f9c94795
--- /dev/null
+++ b/tools/testing/selftests/net/busy_poller.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <linux/genetlink.h>
+#include <linux/netlink.h>
+
+#include "netdev-user.h"
+
+/* The below ifdef blob is required because:
+ *
+ * - sys/epoll.h does not (yet) have the ioctl definitions included. So,
+ * systems with older glibcs will not have them available. However,
+ * sys/epoll.h does include the type definition for epoll_data, which is
+ * needed by the user program (e.g. epoll_event.data.fd)
+ *
+ * - linux/eventpoll.h does not define the epoll_data type, it is simply an
+ * opaque __u64. It does, however, include the ioctl definition.
+ *
+ * Including both headers is impossible (types would be redefined), so I've
+ * opted instead to take sys/epoll.h, and include the blob below.
+ *
+ * Someday, when glibc is globally up to date, the blob below can be removed.
+ */
+#if !defined(EPOLL_IOC_TYPE)
+struct epoll_params {
+ uint32_t busy_poll_usecs;
+ uint16_t busy_poll_budget;
+ uint8_t prefer_busy_poll;
+
+ /* pad the struct to a multiple of 64bits */
+ uint8_t __pad;
+};
+
+#define EPOLL_IOC_TYPE 0x8A
+#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
+#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
+#endif
+
+static uint16_t cfg_port = 8000;
+static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY };
+static char *cfg_outfile;
+static int cfg_max_events = 8;
+static uint32_t cfg_ifindex;
+
+/* busy poll params */
+static uint32_t cfg_busy_poll_usecs;
+static uint16_t cfg_busy_poll_budget;
+static uint8_t cfg_prefer_busy_poll;
+
+/* NAPI params */
+static uint32_t cfg_defer_hard_irqs;
+static uint64_t cfg_gro_flush_timeout;
+static uint64_t cfg_irq_suspend_timeout;
+static enum netdev_napi_threaded cfg_napi_threaded_poll = NETDEV_NAPI_THREADED_DISABLED;
+
+static void usage(const char *filepath)
+{
+ error(1, 0,
+ "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -t<napi_threaded_poll> -i<ifindex>",
+ filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ unsigned long long tmp;
+ int ret;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+
+ while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:t:")) != -1) {
+ /* most options take integer values, except o and b, so reduce
+ * code duplication a bit for the common case by calling
+ * strtoull here and leave bounds checking and casting per
+ * option below.
+ */
+ if (c != 'o' && c != 'b')
+ tmp = strtoull(optarg, NULL, 0);
+
+ switch (c) {
+ case 'u':
+ if (tmp == ULLONG_MAX || tmp > UINT32_MAX)
+ error(1, ERANGE, "busy_poll_usecs too large");
+
+ cfg_busy_poll_usecs = (uint32_t)tmp;
+ break;
+ case 'P':
+ if (tmp == ULLONG_MAX || tmp > 1)
+ error(1, ERANGE,
+ "prefer busy poll should be 0 or 1");
+
+ cfg_prefer_busy_poll = (uint8_t)tmp;
+ break;
+ case 'g':
+ if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
+ error(1, ERANGE,
+ "busy poll budget must be [0, UINT16_MAX]");
+
+ cfg_busy_poll_budget = (uint16_t)tmp;
+ break;
+ case 'p':
+ if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
+ error(1, ERANGE, "port must be <= 65535");
+
+ cfg_port = (uint16_t)tmp;
+ break;
+ case 'b':
+ ret = inet_aton(optarg, &cfg_bind_addr);
+ if (ret == 0)
+ error(1, errno,
+ "bind address %s invalid", optarg);
+ break;
+ case 'o':
+ cfg_outfile = strdup(optarg);
+ if (!cfg_outfile)
+ error(1, 0, "outfile invalid");
+ break;
+ case 'm':
+ if (tmp == ULLONG_MAX || tmp > INT_MAX)
+ error(1, ERANGE,
+ "max events must be > 0 and <= INT_MAX");
+
+ cfg_max_events = (int)tmp;
+ break;
+ case 'd':
+ if (tmp == ULLONG_MAX || tmp > INT32_MAX)
+ error(1, ERANGE,
+ "defer_hard_irqs must be <= INT32_MAX");
+
+ cfg_defer_hard_irqs = (uint32_t)tmp;
+ break;
+ case 'r':
+ if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
+ error(1, ERANGE,
+ "gro_flush_timeout must be < UINT64_MAX");
+
+ cfg_gro_flush_timeout = (uint64_t)tmp;
+ break;
+ case 's':
+ if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
+ error(1, ERANGE,
+ "irq_suspend_timeout must be < ULLONG_MAX");
+
+ cfg_irq_suspend_timeout = (uint64_t)tmp;
+ break;
+ case 'i':
+ if (tmp == ULLONG_MAX || tmp > INT_MAX)
+ error(1, ERANGE,
+ "ifindex must be <= INT_MAX");
+
+ cfg_ifindex = (int)tmp;
+ break;
+ case 't':
+ if (tmp > 2)
+ error(1, ERANGE, "napi threaded poll value must be 0-2");
+
+ cfg_napi_threaded_poll = (enum netdev_napi_threaded)tmp;
+ break;
+ }
+ }
+
+ if (!cfg_ifindex)
+ usage(argv[0]);
+
+ if (optind != argc)
+ usage(argv[0]);
+}
+
+static void epoll_ctl_add(int epfd, int fd, uint32_t events)
+{
+ struct epoll_event ev;
+
+ ev.events = events;
+ ev.data.fd = fd;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1)
+ error(1, errno, "epoll_ctl add fd: %d", fd);
+}
+
+static void setnonblock(int sockfd)
+{
+ int flags;
+
+ flags = fcntl(sockfd, F_GETFL, 0);
+
+ if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1)
+ error(1, errno, "unable to set socket to nonblocking mode");
+}
+
+static void write_chunk(int fd, char *buf, ssize_t buflen)
+{
+ ssize_t remaining = buflen;
+ char *buf_offset = buf;
+ ssize_t writelen = 0;
+ ssize_t write_result;
+
+ while (writelen < buflen) {
+ write_result = write(fd, buf_offset, remaining);
+ if (write_result == -1)
+ error(1, errno, "unable to write data to outfile");
+
+ writelen += write_result;
+ remaining -= write_result;
+ buf_offset += write_result;
+ }
+}
+
+static void setup_queue(void)
+{
+ struct netdev_napi_get_list *napi_list = NULL;
+ struct netdev_napi_get_req_dump *req = NULL;
+ struct netdev_napi_set_req *set_req = NULL;
+ struct ynl_sock *ys;
+ struct ynl_error yerr;
+ uint32_t napi_id = 0;
+
+ ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!ys)
+ error(1, 0, "YNL: %s", yerr.msg);
+
+ req = netdev_napi_get_req_dump_alloc();
+ netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex);
+ napi_list = netdev_napi_get_dump(ys, req);
+
+ /* assume there is 1 NAPI configured and take the first */
+ if (napi_list->obj._present.id)
+ napi_id = napi_list->obj.id;
+ else
+ error(1, 0, "napi ID not present?");
+
+ set_req = netdev_napi_set_req_alloc();
+ netdev_napi_set_req_set_id(set_req, napi_id);
+ netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs);
+ netdev_napi_set_req_set_gro_flush_timeout(set_req,
+ cfg_gro_flush_timeout);
+ netdev_napi_set_req_set_irq_suspend_timeout(set_req,
+ cfg_irq_suspend_timeout);
+
+ if (cfg_napi_threaded_poll)
+ netdev_napi_set_req_set_threaded(set_req, cfg_napi_threaded_poll);
+
+ if (netdev_napi_set(ys, set_req))
+ error(1, 0, "can't set NAPI params: %s\n", yerr.msg);
+
+ netdev_napi_get_list_free(napi_list);
+ netdev_napi_get_req_dump_free(req);
+ netdev_napi_set_req_free(set_req);
+ ynl_sock_destroy(ys);
+}
+
+static void run_poller(void)
+{
+ struct epoll_event events[cfg_max_events];
+ struct epoll_params epoll_params = {0};
+ struct sockaddr_in server_addr;
+ int i, epfd, nfds;
+ ssize_t readlen;
+ int outfile_fd;
+ char buf[1024];
+ int sockfd;
+ int conn;
+ int val;
+
+ outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644);
+ if (outfile_fd == -1)
+ error(1, errno, "unable to open outfile: %s", cfg_outfile);
+
+ sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (sockfd == -1)
+ error(1, errno, "unable to create listen socket");
+
+ server_addr.sin_family = AF_INET;
+ server_addr.sin_port = htons(cfg_port);
+ server_addr.sin_addr = cfg_bind_addr;
+
+ /* these values are range checked during parse_opts, so casting is safe
+ * here
+ */
+ epoll_params.busy_poll_usecs = cfg_busy_poll_usecs;
+ epoll_params.busy_poll_budget = cfg_busy_poll_budget;
+ epoll_params.prefer_busy_poll = cfg_prefer_busy_poll;
+ epoll_params.__pad = 0;
+
+ val = 1;
+ if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)))
+ error(1, errno, "poller setsockopt reuseaddr");
+
+ setnonblock(sockfd);
+
+ if (bind(sockfd, (struct sockaddr *)&server_addr,
+ sizeof(struct sockaddr_in)))
+ error(0, errno, "poller bind to port: %d\n", cfg_port);
+
+ if (listen(sockfd, 1))
+ error(1, errno, "poller listen");
+
+ epfd = epoll_create1(0);
+ if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1)
+ error(1, errno, "unable to set busy poll params");
+
+ epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET);
+
+ for (;;) {
+ nfds = epoll_wait(epfd, events, cfg_max_events, -1);
+ for (i = 0; i < nfds; i++) {
+ if (events[i].data.fd == sockfd) {
+ conn = accept(sockfd, NULL, NULL);
+ if (conn == -1)
+ error(1, errno,
+ "accepting incoming connection failed");
+
+ setnonblock(conn);
+ epoll_ctl_add(epfd, conn,
+ EPOLLIN | EPOLLET | EPOLLRDHUP |
+ EPOLLHUP);
+ } else if (events[i].events & EPOLLIN) {
+ for (;;) {
+ readlen = read(events[i].data.fd, buf,
+ sizeof(buf));
+ if (readlen > 0)
+ write_chunk(outfile_fd, buf,
+ readlen);
+ else
+ break;
+ }
+ } else {
+ /* spurious event ? */
+ }
+ if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) {
+ epoll_ctl(epfd, EPOLL_CTL_DEL,
+ events[i].data.fd, NULL);
+ close(events[i].data.fd);
+ close(outfile_fd);
+ return;
+ }
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ parse_opts(argc, argv);
+ setup_queue();
+ run_poller();
+
+ if (cfg_outfile)
+ free(cfg_outfile);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/can/.gitignore b/tools/testing/selftests/net/can/.gitignore
new file mode 100644
index 000000000000..764a53fc837f
--- /dev/null
+++ b/tools/testing/selftests/net/can/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+test_raw_filter
diff --git a/tools/testing/selftests/net/can/Makefile b/tools/testing/selftests/net/can/Makefile
new file mode 100644
index 000000000000..5b82e60a03e7
--- /dev/null
+++ b/tools/testing/selftests/net/can/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+
+TEST_PROGS := test_raw_filter.sh
+
+TEST_GEN_FILES := test_raw_filter
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/can/config b/tools/testing/selftests/net/can/config
new file mode 100644
index 000000000000..188f79796670
--- /dev/null
+++ b/tools/testing/selftests/net/can/config
@@ -0,0 +1,3 @@
+CONFIG_CAN=m
+CONFIG_CAN_DEV=m
+CONFIG_CAN_VCAN=m
diff --git a/tools/testing/selftests/net/can/test_raw_filter.c b/tools/testing/selftests/net/can/test_raw_filter.c
new file mode 100644
index 000000000000..bb8ae8854273
--- /dev/null
+++ b/tools/testing/selftests/net/can/test_raw_filter.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+/*
+ * Copyright (c) 2011 Volkswagen Group Electronic Research
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <net/if.h>
+#include <linux/if.h>
+
+#include <linux/can.h>
+#include <linux/can/raw.h>
+
+#include "kselftest_harness.h"
+
+#define ID 0x123
+
+char CANIF[IFNAMSIZ];
+
+static int send_can_frames(int sock, int testcase)
+{
+ struct can_frame frame;
+
+ frame.can_dlc = 1;
+ frame.data[0] = testcase;
+
+ frame.can_id = ID;
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ frame.can_id = (ID | CAN_RTR_FLAG);
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ frame.can_id = (ID | CAN_EFF_FLAG);
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ frame.can_id = (ID | CAN_EFF_FLAG | CAN_RTR_FLAG);
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ return 0;
+
+write_err:
+ perror("write");
+ return 1;
+}
+
+FIXTURE(can_filters) {
+ int sock;
+};
+
+FIXTURE_SETUP(can_filters)
+{
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+ int recv_own_msgs = 1;
+ int s, ret;
+
+ s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+ ASSERT_GE(s, 0)
+ TH_LOG("failed to create CAN_RAW socket: %d", errno);
+
+ strncpy(ifr.ifr_name, CANIF, sizeof(ifr.ifr_name));
+ ret = ioctl(s, SIOCGIFINDEX, &ifr);
+ ASSERT_GE(ret, 0)
+ TH_LOG("failed SIOCGIFINDEX: %d", errno);
+
+ addr.can_family = AF_CAN;
+ addr.can_ifindex = ifr.ifr_ifindex;
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
+ &recv_own_msgs, sizeof(recv_own_msgs));
+
+ ret = bind(s, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(ret, 0)
+ TH_LOG("failed bind socket: %d", errno);
+
+ self->sock = s;
+}
+
+FIXTURE_TEARDOWN(can_filters)
+{
+ close(self->sock);
+}
+
+FIXTURE_VARIANT(can_filters) {
+ int testcase;
+ canid_t id;
+ canid_t mask;
+ int exp_num_rx;
+ canid_t exp_flags[];
+};
+
+/* Receive all frames when filtering for the ID in standard frame format */
+FIXTURE_VARIANT_ADD(can_filters, base) {
+ .testcase = 1,
+ .id = ID,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Ignore EFF flag in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_eff) {
+ .testcase = 2,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Ignore RTR flag in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_rtr) {
+ .testcase = 3,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Ignore EFF and RTR flags in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_effrtr) {
+ .testcase = 4,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF frames when expecting no EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff) {
+ .testcase = 5,
+ .id = ID,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only EFF frames when filter id and filter mask include EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_eff) {
+ .testcase = 6,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF frames when expecting no EFF flag, ignoring RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_rtr) {
+ .testcase = 7,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only EFF frames when filter id and filter mask include EFF flag,
+ * ignoring RTR flag
+ */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_effrtr) {
+ .testcase = 8,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive no remote frames when filtering for no RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr) {
+ .testcase = 9,
+ .id = ID,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_EFF_FLAG,
+ },
+};
+
+/* Receive no remote frames when filtering for no RTR flag, ignoring EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_eff) {
+ .testcase = 10,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_EFF_FLAG,
+ },
+};
+
+/* Receive only remote frames when filter includes RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_rtr) {
+ .testcase = 11,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only remote frames when filter includes RTR flag, ignoring EFF
+ * flag
+ */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_effrtr) {
+ .testcase = 12,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF data frame when filtering for no flags */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr) {
+ .testcase = 13,
+ .id = ID,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ 0,
+ },
+};
+
+/* Receive only EFF data frame when filtering for EFF but no RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_eff) {
+ .testcase = 14,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ },
+};
+
+/* Receive only SFF remote frame when filtering for RTR but no EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_rtr) {
+ .testcase = 15,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only EFF remote frame when filtering for EFF and RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_effrtr) {
+ .testcase = 16,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF data frame when filtering for no EFF flag and no RTR flag
+ * but based on EFF mask
+ */
+FIXTURE_VARIANT_ADD(can_filters, eff) {
+ .testcase = 17,
+ .id = ID,
+ .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ 0,
+ },
+};
+
+/* Receive only EFF data frame when filtering for EFF flag and no RTR flag but
+ * based on EFF mask
+ */
+FIXTURE_VARIANT_ADD(can_filters, eff_eff) {
+ .testcase = 18,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ },
+};
+
+/* This test verifies that the raw CAN filters work, by checking if only frames
+ * with the expected set of flags are received. For each test case, the given
+ * filter (id and mask) is added and four CAN frames are sent with every
+ * combination of set/unset EFF/RTR flags.
+ */
+TEST_F(can_filters, test_filter)
+{
+ struct can_filter rfilter;
+ int ret;
+
+ rfilter.can_id = variant->id;
+ rfilter.can_mask = variant->mask;
+ setsockopt(self->sock, SOL_CAN_RAW, CAN_RAW_FILTER,
+ &rfilter, sizeof(rfilter));
+
+ TH_LOG("filters: can_id = 0x%08X can_mask = 0x%08X",
+ rfilter.can_id, rfilter.can_mask);
+
+ ret = send_can_frames(self->sock, variant->testcase);
+ ASSERT_EQ(ret, 0)
+ TH_LOG("failed to send CAN frames");
+
+ for (int i = 0; i <= variant->exp_num_rx; i++) {
+ struct can_frame frame;
+ struct timeval tv = {
+ .tv_sec = 0,
+ .tv_usec = 50000, /* 50ms timeout */
+ };
+ fd_set rdfs;
+
+ FD_ZERO(&rdfs);
+ FD_SET(self->sock, &rdfs);
+
+ ret = select(self->sock + 1, &rdfs, NULL, NULL, &tv);
+ ASSERT_GE(ret, 0)
+ TH_LOG("failed select for frame %d, err: %d)", i, errno);
+
+ ret = FD_ISSET(self->sock, &rdfs);
+ if (i == variant->exp_num_rx) {
+ ASSERT_EQ(ret, 0)
+ TH_LOG("too many frames received");
+ } else {
+ ASSERT_NE(ret, 0)
+ TH_LOG("too few frames received");
+
+ ret = read(self->sock, &frame, sizeof(frame));
+ ASSERT_GE(ret, 0)
+ TH_LOG("failed to read frame %d, err: %d", i, errno);
+
+ TH_LOG("rx: can_id = 0x%08X rx = %d", frame.can_id, i);
+
+ ASSERT_EQ(ID, frame.can_id & CAN_SFF_MASK)
+ TH_LOG("received wrong can_id");
+ ASSERT_EQ(variant->testcase, frame.data[0])
+ TH_LOG("received wrong test case");
+
+ ASSERT_EQ(frame.can_id & ~CAN_ERR_MASK,
+ variant->exp_flags[i])
+ TH_LOG("received unexpected flags");
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ char *ifname = getenv("CANIF");
+
+ if (!ifname) {
+ printf("CANIF environment variable must contain the test interface\n");
+ return KSFT_FAIL;
+ }
+
+ strncpy(CANIF, ifname, sizeof(CANIF) - 1);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/net/can/test_raw_filter.sh b/tools/testing/selftests/net/can/test_raw_filter.sh
new file mode 100755
index 000000000000..276d6c06ac95
--- /dev/null
+++ b/tools/testing/selftests/net/can/test_raw_filter.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ test_raw_filter
+"
+
+net_dir=$(dirname $0)/..
+source $net_dir/lib.sh
+
+export CANIF=${CANIF:-"vcan0"}
+BITRATE=${BITRATE:-500000}
+
+setup()
+{
+ if [[ $CANIF == vcan* ]]; then
+ ip link add name $CANIF type vcan || exit $ksft_skip
+ else
+ ip link set dev $CANIF type can bitrate $BITRATE || exit $ksft_skip
+ fi
+ ip link set dev $CANIF up
+ pwd
+}
+
+cleanup()
+{
+ ip link set dev $CANIF down
+ if [[ $CANIF == vcan* ]]; then
+ ip link delete $CANIF
+ fi
+}
+
+test_raw_filter()
+{
+ ./test_raw_filter
+ check_err $?
+ log_test "test_raw_filter"
+}
+
+trap cleanup EXIT
+setup
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/cmsg_ip.sh b/tools/testing/selftests/net/cmsg_ip.sh
new file mode 100755
index 000000000000..b55680e081ad
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_ip.sh
@@ -0,0 +1,187 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+IP4=172.16.0.1/24
+TGT4=172.16.0.2
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+TMPF=$(mktemp --suffix ".pcap")
+
+cleanup()
+{
+ rm -f $TMPF
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+tcpdump -h | grep immediate-mode >> /dev/null
+if [ $? -ne 0 ]; then
+ echo "SKIP - tcpdump with --immediate-mode option required"
+ exit $ksft_skip
+fi
+
+# Namespaces
+setup_ns NS
+NSEXE="ip netns exec $NS"
+
+$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP4 dev dummy0
+ip -netns $NS addr add $IP6 dev dummy0
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+ ((TOTAL++))
+ if [ $1 -ne $2 ]; then
+ echo " Case $3 returned $1, expected $2"
+ ((BAD++))
+ fi
+}
+
+# IPV6_DONTFRAG
+for ovr in setsock cmsg both diff; do
+ for df in 0 1; do
+ for p in u U i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "U" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-F $df"
+ [ $ovr == "cmsg" ] && m="-f $df"
+ [ $ovr == "both" ] && m="-F $df -f $df"
+ [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df"
+
+ $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234
+ check_result $? $df "DONTFRAG $prot $ovr"
+ done
+ done
+done
+
+# IP_TOS + IPV6_TCLASS
+
+test_dscp() {
+ local -r IPVER=$1
+ local -r TGT=$2
+ local -r MATCH=$3
+
+ local -r TOS=0x10
+ local -r TOS2=0x20
+ local -r ECN=0x3
+
+ ip $IPVER -netns $NS rule add tos $TOS lookup 300
+ ip $IPVER -netns $NS route add table 300 prohibit any
+
+ for ovr in setsock cmsg both diff; do
+ for p in u U i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "U" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-C"
+ [ $ovr == "cmsg" ] && m="-c"
+ [ $ovr == "both" ] && m="-C $((TOS2)) -c"
+ [ $ovr == "diff" ] && m="-C $((TOS )) -c"
+
+ $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+ BG=$!
+ sleep 0.05
+
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234
+ check_result $? 0 "$MATCH $prot $ovr - pass"
+
+ while [ -d /proc/$BG ]; do
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234
+ done
+
+ tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $TOS2" >> /dev/null
+ check_result $? 0 "$MATCH $prot $ovr - packet data"
+ rm $TMPF
+
+ [ $ovr == "both" ] && m="-C $((TOS )) -c"
+ [ $ovr == "diff" ] && m="-C $((TOS2)) -c"
+
+ # Match prohibit rule: expect failure
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS)) -s $TGT 1234
+ check_result $? 1 "$MATCH $prot $ovr - rejection"
+
+ # Match prohibit rule: IPv4 masks ECN: expect failure
+ if [[ "$IPVER" == "-4" ]]; then
+ $NSEXE ./cmsg_sender $IPVER -p $p $m "$((TOS | ECN))" -s $TGT 1234
+ check_result $? 1 "$MATCH $prot $ovr - rejection (ECN)"
+ fi
+ done
+ done
+}
+
+test_dscp -4 $TGT4 tos
+test_dscp -6 $TGT6 class
+
+# IP_TTL + IPV6_HOPLIMIT
+test_ttl_hoplimit() {
+ local -r IPVER=$1
+ local -r TGT=$2
+ local -r MATCH=$3
+
+ local -r LIM=4
+
+ for ovr in setsock cmsg both diff; do
+ for p in u U i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "U" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-L"
+ [ $ovr == "cmsg" ] && m="-l"
+ [ $ovr == "both" ] && m="-L $LIM -l"
+ [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l"
+
+ $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+ BG=$!
+ sleep 0.05
+
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234
+ check_result $? 0 "$MATCH $prot $ovr - pass"
+
+ while [ -d /proc/$BG ]; do
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234
+ done
+
+ tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $LIM[^0-9]" >> /dev/null
+ check_result $? 0 "$MATCH $prot $ovr - packet data"
+ rm $TMPF
+ done
+ done
+}
+
+test_ttl_hoplimit -4 $TGT4 ttl
+test_ttl_hoplimit -6 $TGT6 hlim
+
+# IPV6 exthdr
+for p in u U i r; do
+ # Very basic "does it crash" test
+ for h in h d r; do
+ $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234
+ check_result $? 0 "ExtHdr $prot $ovr - pass"
+ done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+ echo "FAIL - $BAD/$TOTAL cases failed"
+ exit 1
+else
+ echo "OK"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
deleted file mode 100755
index 8bc23fb4c82b..000000000000
--- a/tools/testing/selftests/net/cmsg_ipv6.sh
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source lib.sh
-
-IP6=2001:db8:1::1/64
-TGT6=2001:db8:1::2
-TMPF=$(mktemp --suffix ".pcap")
-
-cleanup()
-{
- rm -f $TMPF
- cleanup_ns $NS
-}
-
-trap cleanup EXIT
-
-tcpdump -h | grep immediate-mode >> /dev/null
-if [ $? -ne 0 ]; then
- echo "SKIP - tcpdump with --immediate-mode option required"
- exit $ksft_skip
-fi
-
-# Namespaces
-setup_ns NS
-NSEXE="ip netns exec $NS"
-
-$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
-
-# Connectivity
-ip -netns $NS link add type dummy
-ip -netns $NS link set dev dummy0 up
-ip -netns $NS addr add $IP6 dev dummy0
-
-# Test
-BAD=0
-TOTAL=0
-
-check_result() {
- ((TOTAL++))
- if [ $1 -ne $2 ]; then
- echo " Case $3 returned $1, expected $2"
- ((BAD++))
- fi
-}
-
-# IPV6_DONTFRAG
-for ovr in setsock cmsg both diff; do
- for df in 0 1; do
- for p in u i r; do
- [ $p == "u" ] && prot=UDP
- [ $p == "i" ] && prot=ICMP
- [ $p == "r" ] && prot=RAW
-
- [ $ovr == "setsock" ] && m="-F $df"
- [ $ovr == "cmsg" ] && m="-f $df"
- [ $ovr == "both" ] && m="-F $df -f $df"
- [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df"
-
- $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234
- check_result $? $df "DONTFRAG $prot $ovr"
- done
- done
-done
-
-# IPV6_TCLASS
-TOS=0x10
-TOS2=0x20
-
-ip -6 -netns $NS rule add tos $TOS lookup 300
-ip -6 -netns $NS route add table 300 prohibit any
-
-for ovr in setsock cmsg both diff; do
- for p in u i r; do
- [ $p == "u" ] && prot=UDP
- [ $p == "i" ] && prot=ICMP
- [ $p == "r" ] && prot=RAW
-
- [ $ovr == "setsock" ] && m="-C"
- [ $ovr == "cmsg" ] && m="-c"
- [ $ovr == "both" ] && m="-C $((TOS2)) -c"
- [ $ovr == "diff" ] && m="-C $((TOS )) -c"
-
- $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
- BG=$!
- sleep 0.05
-
- $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
- check_result $? 0 "TCLASS $prot $ovr - pass"
-
- while [ -d /proc/$BG ]; do
- $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
- done
-
- tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null
- check_result $? 0 "TCLASS $prot $ovr - packet data"
- rm $TMPF
-
- [ $ovr == "both" ] && m="-C $((TOS )) -c"
- [ $ovr == "diff" ] && m="-C $((TOS2)) -c"
-
- $NSEXE ./cmsg_sender -6 -p $p $m $((TOS)) -s $TGT6 1234
- check_result $? 1 "TCLASS $prot $ovr - rejection"
- done
-done
-
-# IPV6_HOPLIMIT
-LIM=4
-
-for ovr in setsock cmsg both diff; do
- for p in u i r; do
- [ $p == "u" ] && prot=UDP
- [ $p == "i" ] && prot=ICMP
- [ $p == "r" ] && prot=RAW
-
- [ $ovr == "setsock" ] && m="-L"
- [ $ovr == "cmsg" ] && m="-l"
- [ $ovr == "both" ] && m="-L $LIM -l"
- [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l"
-
- $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
- BG=$!
- sleep 0.05
-
- $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
- check_result $? 0 "HOPLIMIT $prot $ovr - pass"
-
- while [ -d /proc/$BG ]; do
- $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
- done
-
- tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null
- check_result $? 0 "HOPLIMIT $prot $ovr - packet data"
- rm $TMPF
- done
-done
-
-# IPV6 exthdr
-for p in u i r; do
- # Very basic "does it crash" test
- for h in h d r; do
- $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234
- check_result $? 0 "ExtHdr $prot $ovr - pass"
- done
-done
-
-# Summary
-if [ $BAD -ne 0 ]; then
- echo "FAIL - $BAD/$TOTAL cases failed"
- exit 1
-else
- echo "OK"
- exit 0
-fi
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 876c2db02a63..67a72b1a2f3d 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -16,7 +16,7 @@
#include <linux/udp.h>
#include <sys/socket.h>
-#include "../kselftest.h"
+#include "kselftest.h"
enum {
ERN_SUCCESS = 0,
@@ -33,6 +33,7 @@ enum {
ERN_RECVERR,
ERN_CMSG_RD,
ERN_CMSG_RCV,
+ ERN_SEND_MORE,
};
struct option_cmsg_u32 {
@@ -46,6 +47,7 @@ struct options {
const char *service;
unsigned int size;
unsigned int num_pkt;
+ bool msg_more;
struct {
unsigned int mark;
unsigned int dontfrag;
@@ -59,6 +61,7 @@ struct options {
unsigned int proto;
} sock;
struct option_cmsg_u32 mark;
+ struct option_cmsg_u32 priority;
struct {
bool ena;
unsigned int delay;
@@ -71,7 +74,7 @@ struct options {
struct option_cmsg_u32 tclass;
struct option_cmsg_u32 hlimit;
struct option_cmsg_u32 exthdr;
- } v6;
+ } cmsg;
} opt = {
.size = 13,
.num_pkt = 1,
@@ -93,21 +96,24 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
"\t\t-S send() size\n"
"\t\t-4/-6 Force IPv4 / IPv6 only\n"
"\t\t-p prot Socket protocol\n"
- "\t\t (u = UDP (default); i = ICMP; r = RAW)\n"
+ "\t\t (u = UDP (default); i = ICMP; r = RAW;\n"
+ "\t\t U = UDP with MSG_MORE)\n"
"\n"
"\t\t-m val Set SO_MARK with given value\n"
"\t\t-M val Set SO_MARK via setsockopt\n"
+ "\t\t-P val Set SO_PRIORITY via setsockopt\n"
+ "\t\t-Q val Set SO_PRIORITY via cmsg\n"
"\t\t-d val Set SO_TXTIME with given delay (usec)\n"
"\t\t-t Enable time stamp reporting\n"
"\t\t-f val Set don't fragment via cmsg\n"
"\t\t-F val Set don't fragment via setsockopt\n"
- "\t\t-c val Set TCLASS via cmsg\n"
- "\t\t-C val Set TCLASS via setsockopt\n"
- "\t\t-l val Set HOPLIMIT via cmsg\n"
- "\t\t-L val Set HOPLIMIT via setsockopt\n"
+ "\t\t-c val Set TOS/TCLASS via cmsg\n"
+ "\t\t-C val Set TOS/TCLASS via setsockopt\n"
+ "\t\t-l val Set TTL/HOPLIMIT via cmsg\n"
+ "\t\t-L val Set TTL/HOPLIMIT via setsockopt\n"
"\t\t-H type Add an IPv6 header option\n"
- "\t\t (h = HOP; d = DST; r = RTDST)"
- "");
+ "\t\t (h = HOP; d = DST; r = RTDST)\n"
+ "\n");
exit(ERN_HELP);
}
@@ -115,7 +121,7 @@ static void cs_parse_args(int argc, char *argv[])
{
int o;
- while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) {
+ while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:Q:")) != -1) {
switch (o) {
case 's':
opt.silent_send = true;
@@ -130,8 +136,11 @@ static void cs_parse_args(int argc, char *argv[])
opt.sock.family = AF_INET6;
break;
case 'p':
- if (*optarg == 'u' || *optarg == 'U') {
+ if (*optarg == 'u') {
opt.sock.proto = IPPROTO_UDP;
+ } else if (*optarg == 'U') {
+ opt.sock.proto = IPPROTO_UDP;
+ opt.msg_more = true;
} else if (*optarg == 'i' || *optarg == 'I') {
opt.sock.proto = IPPROTO_ICMP;
} else if (*optarg == 'r') {
@@ -148,6 +157,10 @@ static void cs_parse_args(int argc, char *argv[])
opt.mark.ena = true;
opt.mark.val = atoi(optarg);
break;
+ case 'Q':
+ opt.priority.ena = true;
+ opt.priority.val = atoi(optarg);
+ break;
case 'M':
opt.sockopt.mark = atoi(optarg);
break;
@@ -162,37 +175,37 @@ static void cs_parse_args(int argc, char *argv[])
opt.ts.ena = true;
break;
case 'f':
- opt.v6.dontfrag.ena = true;
- opt.v6.dontfrag.val = atoi(optarg);
+ opt.cmsg.dontfrag.ena = true;
+ opt.cmsg.dontfrag.val = atoi(optarg);
break;
case 'F':
opt.sockopt.dontfrag = atoi(optarg);
break;
case 'c':
- opt.v6.tclass.ena = true;
- opt.v6.tclass.val = atoi(optarg);
+ opt.cmsg.tclass.ena = true;
+ opt.cmsg.tclass.val = atoi(optarg);
break;
case 'C':
opt.sockopt.tclass = atoi(optarg);
break;
case 'l':
- opt.v6.hlimit.ena = true;
- opt.v6.hlimit.val = atoi(optarg);
+ opt.cmsg.hlimit.ena = true;
+ opt.cmsg.hlimit.val = atoi(optarg);
break;
case 'L':
opt.sockopt.hlimit = atoi(optarg);
break;
case 'H':
- opt.v6.exthdr.ena = true;
+ opt.cmsg.exthdr.ena = true;
switch (optarg[0]) {
case 'h':
- opt.v6.exthdr.val = IPV6_HOPOPTS;
+ opt.cmsg.exthdr.val = IPV6_HOPOPTS;
break;
case 'd':
- opt.v6.exthdr.val = IPV6_DSTOPTS;
+ opt.cmsg.exthdr.val = IPV6_DSTOPTS;
break;
case 'r':
- opt.v6.exthdr.val = IPV6_RTHDRDSTOPTS;
+ opt.cmsg.exthdr.val = IPV6_RTHDRDSTOPTS;
break;
default:
printf("Error: hdr type: %s\n", optarg);
@@ -253,11 +266,21 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
SOL_SOCKET, SO_MARK, &opt.mark);
ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
- SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag);
- ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
- SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass);
- ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
- SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit);
+ SOL_SOCKET, SO_PRIORITY, &opt.priority);
+
+ if (opt.sock.family == AF_INET) {
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IP, IP_TOS, &opt.cmsg.tclass);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IP, IP_TTL, &opt.cmsg.hlimit);
+ } else {
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IPV6, IPV6_DONTFRAG, &opt.cmsg.dontfrag);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IPV6, IPV6_TCLASS, &opt.cmsg.tclass);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IPV6, IPV6_HOPLIMIT, &opt.cmsg.hlimit);
+ }
if (opt.txtime.ena) {
__u64 txtime;
@@ -288,14 +311,14 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
*(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED |
SOF_TIMESTAMPING_TX_SOFTWARE;
}
- if (opt.v6.exthdr.ena) {
+ if (opt.cmsg.exthdr.ena) {
cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
cmsg_len += CMSG_SPACE(8);
if (cbuf_sz < cmsg_len)
error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
cmsg->cmsg_level = SOL_IPV6;
- cmsg->cmsg_type = opt.v6.exthdr.val;
+ cmsg->cmsg_type = opt.cmsg.exthdr.val;
cmsg->cmsg_len = CMSG_LEN(8);
*(__u64 *)CMSG_DATA(cmsg) = 0;
}
@@ -396,23 +419,35 @@ static void ca_set_sockopts(int fd)
setsockopt(fd, SOL_SOCKET, SO_MARK,
&opt.sockopt.mark, sizeof(opt.sockopt.mark)))
error(ERN_SOCKOPT, errno, "setsockopt SO_MARK");
- if (opt.sockopt.dontfrag &&
- setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG,
- &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag)))
- error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG");
- if (opt.sockopt.tclass &&
- setsockopt(fd, SOL_IPV6, IPV6_TCLASS,
- &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
- error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS");
- if (opt.sockopt.hlimit &&
- setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
- &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
- error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
if (opt.sockopt.priority &&
setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
&opt.sockopt.priority, sizeof(opt.sockopt.priority)))
error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
+ if (opt.sock.family == AF_INET) {
+ if (opt.sockopt.tclass &&
+ setsockopt(fd, SOL_IP, IP_TOS,
+ &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
+ error(ERN_SOCKOPT, errno, "setsockopt IP_TOS");
+ if (opt.sockopt.hlimit &&
+ setsockopt(fd, SOL_IP, IP_TTL,
+ &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
+ error(ERN_SOCKOPT, errno, "setsockopt IP_TTL");
+ } else {
+ if (opt.sockopt.dontfrag &&
+ setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG,
+ &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag)))
+ error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG");
+ if (opt.sockopt.tclass &&
+ setsockopt(fd, SOL_IPV6, IPV6_TCLASS,
+ &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
+ error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS");
+ if (opt.sockopt.hlimit &&
+ setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
+ &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
+ error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
+ }
+
if (opt.txtime.ena) {
struct sock_txtime so_txtime = {
.clockid = CLOCK_MONOTONIC,
@@ -456,7 +491,8 @@ int main(int argc, char *argv[])
if (err) {
fprintf(stderr, "Can't resolve address [%s]:%s\n",
opt.host, opt.service);
- return ERN_SOCK_CREATE;
+ err = ERN_SOCK_CREATE;
+ goto err_free_buff;
}
if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP)
@@ -465,8 +501,8 @@ int main(int argc, char *argv[])
fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto);
if (fd < 0) {
fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
- freeaddrinfo(ai);
- return ERN_RESOLVE;
+ err = ERN_RESOLVE;
+ goto err_free_info;
}
if (opt.sock.proto == IPPROTO_ICMP) {
@@ -502,7 +538,7 @@ int main(int argc, char *argv[])
cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
for (i = 0; i < opt.num_pkt; i++) {
- err = sendmsg(fd, &msg, 0);
+ err = sendmsg(fd, &msg, opt.msg_more ? MSG_MORE : 0);
if (err < 0) {
if (!opt.silent_send)
fprintf(stderr, "send failed: %s\n", strerror(errno));
@@ -513,6 +549,14 @@ int main(int argc, char *argv[])
err = ERN_SEND_SHORT;
goto err_out;
}
+ if (opt.msg_more) {
+ err = write(fd, NULL, 0);
+ if (err < 0) {
+ fprintf(stderr, "send more: %s\n", strerror(errno));
+ err = ERN_SEND_MORE;
+ goto err_out;
+ }
+ }
}
err = ERN_SUCCESS;
@@ -531,6 +575,9 @@ int main(int argc, char *argv[])
err_out:
close(fd);
+err_free_info:
freeaddrinfo(ai);
+err_free_buff:
+ free(buf);
return err;
}
diff --git a/tools/testing/selftests/net/cmsg_so_priority.sh b/tools/testing/selftests/net/cmsg_so_priority.sh
new file mode 100755
index 000000000000..ee07d8653262
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_so_priority.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+readonly KSFT_SKIP=4
+
+IP4=192.0.2.1/24
+TGT4=192.0.2.2
+TGT4_RAW=192.0.2.3
+IP6=2001:db8::1/64
+TGT6=2001:db8::2
+TGT6_RAW=2001:db8::3
+PORT=1234
+TOTAL_TESTS=0
+FAILED_TESTS=0
+
+if ! command -v jq &> /dev/null; then
+ echo "SKIP cmsg_so_priroity.sh test: jq is not installed." >&2
+ exit "$KSFT_SKIP"
+fi
+
+check_result() {
+ ((TOTAL_TESTS++))
+ if [ "$1" -ne 0 ]; then
+ ((FAILED_TESTS++))
+ fi
+}
+
+cleanup()
+{
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+setup_ns NS
+
+create_filter() {
+ local handle=$1
+ local vlan_prio=$2
+ local ip_type=$3
+ local proto=$4
+ local dst_ip=$5
+ local ip_proto
+
+ if [[ "$proto" == "u" ]]; then
+ ip_proto="udp"
+ elif [[ "$ip_type" == "ipv4" && "$proto" == "i" ]]; then
+ ip_proto="icmp"
+ elif [[ "$ip_type" == "ipv6" && "$proto" == "i" ]]; then
+ ip_proto="icmpv6"
+ fi
+
+ tc -n $NS filter add dev dummy1 \
+ egress pref 1 handle "$handle" proto 802.1q \
+ flower vlan_prio "$vlan_prio" vlan_ethtype "$ip_type" \
+ dst_ip "$dst_ip" ${ip_proto:+ip_proto $ip_proto} \
+ action pass
+}
+
+ip -n $NS link set dev lo up
+ip -n $NS link add name dummy1 up type dummy
+
+ip -n $NS link add link dummy1 name dummy1.10 up type vlan id 10 \
+ egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+ip -n $NS address add $IP4 dev dummy1.10
+ip -n $NS address add $IP6 dev dummy1.10 nodad
+
+ip netns exec $NS sysctl -wq net.ipv4.ping_group_range='0 2147483647'
+
+ip -n $NS neigh add $TGT4 lladdr 00:11:22:33:44:55 nud permanent \
+ dev dummy1.10
+ip -n $NS neigh add $TGT6 lladdr 00:11:22:33:44:55 nud permanent \
+ dev dummy1.10
+ip -n $NS neigh add $TGT4_RAW lladdr 00:11:22:33:44:66 nud permanent \
+ dev dummy1.10
+ip -n $NS neigh add $TGT6_RAW lladdr 00:11:22:33:44:66 nud permanent \
+ dev dummy1.10
+
+tc -n $NS qdisc add dev dummy1 clsact
+
+FILTER_COUNTER=10
+
+for i in 4 6; do
+ for proto in u i r; do
+ echo "Test IPV$i, prot: $proto"
+ for priority in {0..7}; do
+ if [[ $i == 4 && $proto == "r" ]]; then
+ TGT=$TGT4_RAW
+ elif [[ $i == 6 && $proto == "r" ]]; then
+ TGT=$TGT6_RAW
+ elif [ $i == 4 ]; then
+ TGT=$TGT4
+ else
+ TGT=$TGT6
+ fi
+
+ handle="${FILTER_COUNTER}${priority}"
+
+ create_filter $handle $priority ipv$i $proto $TGT
+
+ pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \
+ | jq ".[] | select(.options.handle == ${handle}) | \
+ .options.actions[0].stats.packets")
+
+ if [[ $pkts == 0 ]]; then
+ check_result 0
+ else
+ echo "prio $priority: expected 0, got $pkts"
+ check_result 1
+ fi
+
+ ip netns exec $NS ./cmsg_sender -$i -Q $priority \
+ -p $proto $TGT $PORT
+
+ pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \
+ | jq ".[] | select(.options.handle == ${handle}) | \
+ .options.actions[0].stats.packets")
+ if [[ $pkts == 1 ]]; then
+ check_result 0
+ else
+ echo "prio $priority -Q: expected 1, got $pkts"
+ check_result 1
+ fi
+
+ ip netns exec $NS ./cmsg_sender -$i -P $priority \
+ -p $proto $TGT $PORT
+
+ pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \
+ | jq ".[] | select(.options.handle == ${handle}) | \
+ .options.actions[0].stats.packets")
+ if [[ $pkts == 2 ]]; then
+ check_result 0
+ else
+ echo "prio $priority -P: expected 2, got $pkts"
+ check_result 1
+ fi
+ done
+ FILTER_COUNTER=$((FILTER_COUNTER + 10))
+ done
+done
+
+if [ $FAILED_TESTS -ne 0 ]; then
+ echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed"
+ exit 1
+else
+ echo "OK - All $TOTAL_TESTS tests passed"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh
index 1d7e756644bc..478af0aefa97 100755
--- a/tools/testing/selftests/net/cmsg_time.sh
+++ b/tools/testing/selftests/net/cmsg_time.sh
@@ -34,13 +34,28 @@ BAD=0
TOTAL=0
check_result() {
+ local ret=$1
+ local got=$2
+ local exp=$3
+ local case=$4
+ local xfail=$5
+ local xf=
+ local inc=
+
+ if [ "$xfail" == "xfail" ]; then
+ xf="(XFAIL)"
+ inc=0
+ else
+ inc=1
+ fi
+
((TOTAL++))
- if [ $1 -ne 0 ]; then
- echo " Case $4 returned $1, expected 0"
- ((BAD++))
+ if [ $ret -ne 0 ]; then
+ echo " Case $case returned $ret, expected 0 $xf"
+ ((BAD+=inc))
elif [ "$2" != "$3" ]; then
- echo " Case $4 returned '$2', expected '$3'"
- ((BAD++))
+ echo " Case $case returned '$got', expected '$exp' $xf"
+ ((BAD+=inc))
fi
}
@@ -66,14 +81,14 @@ for i in "-4 $TGT4" "-6 $TGT6"; do
awk '/SND/ { if ($3 > 1000) print "OK"; }')
check_result $? "$ts" "OK" "$prot - TXTIME abs"
- [ "$KSFT_MACHINE_SLOW" = yes ] && delay=8000 || delay=1000
+ [ "$KSFT_MACHINE_SLOW" = yes ] && xfail=xfail
- ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d $delay |
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
awk '/SND/ {snd=$3}
/SCHED/ {sch=$3}
- END { if (snd - sch > '$((delay/2))') print "OK";
- else print snd, "-", sch, "<", '$((delay/2))'; }')
- check_result $? "$ts" "OK" "$prot - TXTIME rel"
+ END { if (snd - sch > 500) print "OK";
+ else print snd, "-", sch, "<", 500; }')
+ check_result $? "$ts" "OK" "$prot - TXTIME rel" $xfail
done
done
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 5b9baf708950..1e1f253118f5 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,109 +1,130 @@
-CONFIG_USER_NS=y
-CONFIG_NET_NS=y
+CONFIG_AMT=m
+CONFIG_BAREUDP=m
CONFIG_BONDING=m
CONFIG_BPF_SYSCALL=y
-CONFIG_TEST_BPF=m
-CONFIG_NUMA=y
-CONFIG_RPS=y
-CONFIG_SYSFS=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_NET_VRF=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_IPV6=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_VETH=y
-CONFIG_NET_IPVTI=y
-CONFIG_IPV6_VTI=y
-CONFIG_DUMMY=y
-CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_BRIDGE=y
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_CAN=m
+CONFIG_CAN_DEV=m
+CONFIG_CAN_VXCAN=m
+CONFIG_CRYPTO_ARIA=y
CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_VLAN_8021Q=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SM4_GENERIC=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_BTF_MODULES=n
+CONFIG_DUMMY=y
CONFIG_GENEVE=m
CONFIG_IFB=y
CONFIG_INET_DIAG=y
CONFIG_INET_ESP=y
CONFIG_INET_ESP_OFFLOAD=y
-CONFIG_NET_FOU=y
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_NF_CONNTRACK=m
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_SIT=y
-CONFIG_IP_DCCP=m
-CONFIG_NF_NAT=m
+CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_SCTP=m
+CONFIG_IPV6=y
CONFIG_IPV6_GRE=m
+CONFIG_IPV6_ILA=m
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPV6_SIT=y
+CONFIG_IPV6_VTI=y
+CONFIG_IPVLAN=m
+CONFIG_KALLSYMS=y
+CONFIG_L2TP=m
CONFIG_L2TP_ETH=m
CONFIG_L2TP_IP=m
-CONFIG_L2TP=m
CONFIG_L2TP_V3=y
CONFIG_MACSEC=m
CONFIG_MACVLAN=y
CONFIG_MACVTAP=y
CONFIG_MPLS=y
+CONFIG_MPLS_IPTUNNEL=m
+CONFIG_MPLS_ROUTING=m
CONFIG_MPTCP=y
-CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_NAT=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GACT=m
+CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_U32=m
-CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NETDEVSIM=m
+CONFIG_NET_DROP_MONITOR=m
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
+CONFIG_NET_FOU=y
+CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_DEMUX=m
CONFIG_NET_IPIP=y
+CONFIG_NET_IPVTI=y
+CONFIG_NETKIT=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_FQ=m
-CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_NETEM=y
CONFIG_NET_SCH_PRIO=m
-CONFIG_NFT_COMPAT=m
+CONFIG_NET_VRF=y
+CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_OVS=y
CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_NAT=m
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_NAT=m
+CONFIG_NUMA=y
CONFIG_OPENVSWITCH=m
CONFIG_OPENVSWITCH_GENEVE=m
CONFIG_OPENVSWITCH_GRE=m
CONFIG_OPENVSWITCH_VXLAN=m
+CONFIG_PROC_SYSCTL=y
CONFIG_PSAMPLE=m
+CONFIG_RPS=y
+CONFIG_SYSFS=y
CONFIG_TCP_MD5SIG=y
CONFIG_TEST_BLACKHOLE_DEV=m
-CONFIG_KALLSYMS=y
+CONFIG_TEST_BPF=m
CONFIG_TLS=m
CONFIG_TRACEPOINTS=y
-CONFIG_NET_DROP_MONITOR=m
-CONFIG_NETDEVSIM=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_NET_ACT_TUNNEL_KEY=m
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_BAREUDP=m
-CONFIG_IPV6_IOAM6_LWTUNNEL=y
-CONFIG_CRYPTO_SM4_GENERIC=y
-CONFIG_AMT=m
CONFIG_TUN=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
CONFIG_VXLAN=m
-CONFIG_IP_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_CRYPTO_ARIA=y
CONFIG_XFRM_INTERFACE=m
CONFIG_XFRM_USER=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh
index 7c4818c971fc..507d0a82f5f0 100755
--- a/tools/testing/selftests/net/drop_monitor_tests.sh
+++ b/tools/testing/selftests/net/drop_monitor_tests.sh
@@ -77,7 +77,7 @@ sw_drops_test()
rm ${dir}/packets.pcap
- { kill %% && wait %%; } 2>/dev/null
+ kill_process %%
timeout 5 dwdump -o sw -w ${dir}/packets.pcap
(( $(tshark -r ${dir}/packets.pcap \
-Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0))
diff --git a/tools/testing/selftests/net/epoll_busy_poll.c b/tools/testing/selftests/net/epoll_busy_poll.c
index 16e457c2f877..adf8dd0b5e0b 100644
--- a/tools/testing/selftests/net/epoll_busy_poll.c
+++ b/tools/testing/selftests/net/epoll_busy_poll.c
@@ -23,7 +23,7 @@
#include <sys/ioctl.h>
#include <sys/socket.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
/* if the headers haven't been updated, we need to define some things */
#if !defined(EPOLL_IOC_TYPE)
diff --git a/tools/testing/selftests/net/fcnal-ipv4.sh b/tools/testing/selftests/net/fcnal-ipv4.sh
new file mode 100755
index 000000000000..82f9c867c3e8
--- /dev/null
+++ b/tools/testing/selftests/net/fcnal-ipv4.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+./fcnal-test.sh -t ipv4
diff --git a/tools/testing/selftests/net/fcnal-ipv6.sh b/tools/testing/selftests/net/fcnal-ipv6.sh
new file mode 100755
index 000000000000..ab1fc7aa3caf
--- /dev/null
+++ b/tools/testing/selftests/net/fcnal-ipv6.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+./fcnal-test.sh -t ipv6
diff --git a/tools/testing/selftests/net/fcnal-other.sh b/tools/testing/selftests/net/fcnal-other.sh
new file mode 100755
index 000000000000..a840cf80b32e
--- /dev/null
+++ b/tools/testing/selftests/net/fcnal-other.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+./fcnal-test.sh -t other
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 386ebd829df5..844a580ae74e 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -189,7 +189,7 @@ show_hint()
kill_procs()
{
killall nettest ping ping6 >/dev/null 2>&1
- sleep 1
+ slowwait 2 sh -c 'test -z "$(pgrep '"'^(nettest|ping|ping6)$'"')"'
}
set_ping_group()
@@ -424,6 +424,8 @@ create_ns()
ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.accept_dad=0
}
# create veth pair to connect namespaces and apply addresses.
@@ -875,7 +877,7 @@ ipv4_tcp_md5_novrf()
# basic use case
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: Single address config"
@@ -883,7 +885,7 @@ ipv4_tcp_md5_novrf()
log_start
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: Server no config, client uses password"
@@ -891,7 +893,7 @@ ipv4_tcp_md5_novrf()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: Client uses wrong password"
@@ -899,7 +901,7 @@ ipv4_tcp_md5_novrf()
log_start
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: Client address does not match address configured with password"
@@ -910,7 +912,7 @@ ipv4_tcp_md5_novrf()
# client in prefix
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: Prefix config"
@@ -918,7 +920,7 @@ ipv4_tcp_md5_novrf()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: Prefix config, client uses wrong password"
@@ -926,7 +928,7 @@ ipv4_tcp_md5_novrf()
log_start
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
}
@@ -943,7 +945,7 @@ ipv4_tcp_md5()
# basic use case
log_start
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Single address config"
@@ -951,7 +953,7 @@ ipv4_tcp_md5()
log_start
show_hint "Should timeout since server does not have MD5 auth"
run_cmd nettest -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Server no config, client uses password"
@@ -959,7 +961,7 @@ ipv4_tcp_md5()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Client uses wrong password"
@@ -967,7 +969,7 @@ ipv4_tcp_md5()
log_start
show_hint "Should timeout since server config differs from client"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
@@ -978,7 +980,7 @@ ipv4_tcp_md5()
# client in prefix
log_start
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Prefix config"
@@ -986,7 +988,7 @@ ipv4_tcp_md5()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
@@ -994,7 +996,7 @@ ipv4_tcp_md5()
log_start
show_hint "Should timeout since client address is outside of prefix"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
@@ -1005,14 +1007,14 @@ ipv4_tcp_md5()
log_start
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
log_start
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
@@ -1020,7 +1022,7 @@ ipv4_tcp_md5()
show_hint "Should timeout since client in default VRF uses VRF password"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
@@ -1028,21 +1030,21 @@ ipv4_tcp_md5()
show_hint "Should timeout since client in VRF uses default VRF password"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
log_start
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
log_start
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
@@ -1050,7 +1052,7 @@ ipv4_tcp_md5()
show_hint "Should timeout since client in default VRF uses VRF password"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
@@ -1058,7 +1060,7 @@ ipv4_tcp_md5()
show_hint "Should timeout since client in VRF uses default VRF password"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
@@ -1082,14 +1084,14 @@ test_ipv4_md5_vrf__vrf_server__no_bind_ifindex()
log_start
show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection"
log_start
show_hint "Binding both the socket and the key is not required but it works"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection"
}
@@ -1103,25 +1105,25 @@ test_ipv4_md5_vrf__global_server__bind_ifindex0()
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection"
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection"
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection"
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection"
@@ -1193,7 +1195,7 @@ ipv4_tcp_novrf()
do
log_start
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Global server"
done
@@ -1201,7 +1203,7 @@ ipv4_tcp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -1221,13 +1223,13 @@ ipv4_tcp_novrf()
do
log_start
run_cmd_nsb nettest -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -r ${a} -0 ${NSA_IP}
log_test_addr ${a} $? 0 "Client"
log_start
run_cmd_nsb nettest -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 0 "Client, device bind"
@@ -1249,7 +1251,7 @@ ipv4_tcp_novrf()
do
log_start
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -0 ${a} -1 ${a}
log_test_addr ${a} $? 0 "Global server, local connection"
done
@@ -1257,7 +1259,7 @@ ipv4_tcp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -1266,7 +1268,7 @@ ipv4_tcp_novrf()
log_start
show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
run_cmd nettest -s -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a}
log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
done
@@ -1274,7 +1276,7 @@ ipv4_tcp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -0 ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 0 "Global server, device client, local connection"
@@ -1283,7 +1285,7 @@ ipv4_tcp_novrf()
log_start
show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 1 "Global server, device client, local connection"
done
@@ -1291,7 +1293,7 @@ ipv4_tcp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local connection"
@@ -1323,19 +1325,19 @@ ipv4_tcp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 1 "Global server"
log_start
run_cmd nettest -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "VRF server"
log_start
run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -1352,7 +1354,7 @@ ipv4_tcp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 1 "Global server, local connection"
@@ -1374,14 +1376,14 @@ ipv4_tcp_vrf()
log_start
show_hint "client socket should be bound to VRF"
run_cmd nettest -s -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Global server"
log_start
show_hint "client socket should be bound to VRF"
run_cmd nettest -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -1396,7 +1398,7 @@ ipv4_tcp_vrf()
log_start
show_hint "client socket should be bound to device"
run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -1406,7 +1408,7 @@ ipv4_tcp_vrf()
log_start
show_hint "Should fail 'Connection refused' since client is not bound to VRF"
run_cmd nettest -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a}
log_test_addr ${a} $? 1 "Global server, local connection"
done
@@ -1418,13 +1420,13 @@ ipv4_tcp_vrf()
do
log_start
run_cmd_nsb nettest -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -r ${a} -d ${VRF}
log_test_addr ${a} $? 0 "Client, VRF bind"
log_start
run_cmd_nsb nettest -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 0 "Client, device bind"
@@ -1443,7 +1445,7 @@ ipv4_tcp_vrf()
do
log_start
run_cmd nettest -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -d ${VRF} -0 ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local connection"
done
@@ -1451,26 +1453,26 @@ ipv4_tcp_vrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local connection"
log_start
show_hint "Should fail 'No route to host' since client is out of VRF scope"
run_cmd nettest -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a}
log_test_addr ${a} $? 1 "VRF server, unbound client, local connection"
log_start
run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -d ${VRF} -0 ${a}
log_test_addr ${a} $? 0 "Device server, VRF client, local connection"
log_start
run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local connection"
}
@@ -1509,7 +1511,7 @@ ipv4_udp_novrf()
do
log_start
run_cmd nettest -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
@@ -1522,7 +1524,7 @@ ipv4_udp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -1533,31 +1535,31 @@ ipv4_udp_novrf()
do
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -D -r ${a} -0 ${NSA_IP}
log_test_addr ${a} $? 0 "Client"
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP}
log_test_addr ${a} $? 0 "Client, device bind"
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP}
log_test_addr ${a} $? 0 "Client, device send via cmsg"
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP}
log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF"
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} -U
log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF, with connect()"
@@ -1580,7 +1582,7 @@ ipv4_udp_novrf()
do
log_start
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a} -0 ${a} -1 ${a}
log_test_addr ${a} $? 0 "Global server, local connection"
done
@@ -1588,7 +1590,7 @@ ipv4_udp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a}
log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -1597,7 +1599,7 @@ ipv4_udp_novrf()
log_start
show_hint "Should fail 'Connection refused' since address is out of device scope"
run_cmd nettest -s -D -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a}
log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
done
@@ -1605,25 +1607,25 @@ ipv4_udp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Global server, device client, local connection"
log_start
run_cmd nettest -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -C -r ${a}
log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection"
log_start
run_cmd nettest -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -S -r ${a}
log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection"
log_start
run_cmd nettest -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} -U
log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
@@ -1636,28 +1638,28 @@ ipv4_udp_novrf()
log_start
show_hint "Should fail since addresses on loopback are out of device scope"
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 2 "Global server, device client, local connection"
log_start
show_hint "Should fail since addresses on loopback are out of device scope"
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C
log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection"
log_start
show_hint "Should fail since addresses on loopback are out of device scope"
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S
log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
log_start
show_hint "Should fail since addresses on loopback are out of device scope"
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -U
log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
@@ -1667,7 +1669,7 @@ ipv4_udp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -1709,19 +1711,19 @@ ipv4_udp_vrf()
log_start
show_hint "Fails because ingress is in a VRF and global server is disabled"
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 1 "Global server"
log_start
run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "VRF server"
log_start
run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server"
@@ -1733,7 +1735,7 @@ ipv4_udp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server is out of scope"
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 1 "Global server, VRF client, local connection"
done
@@ -1741,26 +1743,26 @@ ipv4_udp_vrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
log_start
run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection"
a=${NSA_IP}
log_start
run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
log_start
run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
@@ -1775,19 +1777,19 @@ ipv4_udp_vrf()
do
log_start
run_cmd nettest -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
log_start
run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "VRF server"
log_start
run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server"
@@ -1802,13 +1804,13 @@ ipv4_udp_vrf()
#
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -d ${VRF} -D -r ${NSB_IP} -1 ${NSA_IP}
log_test $? 0 "VRF client"
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -d ${NSA_DEV} -D -r ${NSB_IP} -1 ${NSA_IP}
log_test $? 0 "Enslaved device client"
@@ -1829,31 +1831,31 @@ ipv4_udp_vrf()
a=${NSA_IP}
log_start
run_cmd nettest -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
log_start
run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
log_start
run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local conn"
log_start
run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
log_start
run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
@@ -1861,7 +1863,7 @@ ipv4_udp_vrf()
do
log_start
run_cmd nettest -D -s -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
done
@@ -1870,7 +1872,7 @@ ipv4_udp_vrf()
do
log_start
run_cmd nettest -s -D -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
done
@@ -2093,7 +2095,7 @@ ipv4_rt()
do
log_start
run_cmd nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2107,7 +2109,7 @@ ipv4_rt()
do
log_start
run_cmd nettest ${varg} -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2120,7 +2122,7 @@ ipv4_rt()
a=${NSA_IP}
log_start
run_cmd nettest ${varg} -s -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2134,7 +2136,7 @@ ipv4_rt()
#
log_start
run_cmd_nsb nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2145,7 +2147,7 @@ ipv4_rt()
log_start
run_cmd_nsb nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2161,7 +2163,7 @@ ipv4_rt()
do
log_start
run_cmd nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${VRF} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2175,7 +2177,7 @@ ipv4_rt()
do
log_start
run_cmd nettest ${varg} -I ${VRF} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${VRF} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2189,7 +2191,7 @@ ipv4_rt()
log_start
run_cmd nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2200,7 +2202,7 @@ ipv4_rt()
log_start
run_cmd nettest ${varg} -I ${VRF} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2211,7 +2213,7 @@ ipv4_rt()
log_start
run_cmd nettest ${varg} -I ${NSA_DEV} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2561,7 +2563,7 @@ ipv6_tcp_md5_novrf()
# basic use case
log_start
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: Single address config"
@@ -2569,7 +2571,7 @@ ipv6_tcp_md5_novrf()
log_start
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: Server no config, client uses password"
@@ -2577,7 +2579,7 @@ ipv6_tcp_md5_novrf()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: Client uses wrong password"
@@ -2585,7 +2587,7 @@ ipv6_tcp_md5_novrf()
log_start
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: Client address does not match address configured with password"
@@ -2596,7 +2598,7 @@ ipv6_tcp_md5_novrf()
# client in prefix
log_start
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: Prefix config"
@@ -2604,7 +2606,7 @@ ipv6_tcp_md5_novrf()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: Prefix config, client uses wrong password"
@@ -2612,7 +2614,7 @@ ipv6_tcp_md5_novrf()
log_start
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
}
@@ -2629,7 +2631,7 @@ ipv6_tcp_md5()
# basic use case
log_start
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Single address config"
@@ -2637,7 +2639,7 @@ ipv6_tcp_md5()
log_start
show_hint "Should timeout since server does not have MD5 auth"
run_cmd nettest -6 -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Server no config, client uses password"
@@ -2645,7 +2647,7 @@ ipv6_tcp_md5()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Client uses wrong password"
@@ -2653,7 +2655,7 @@ ipv6_tcp_md5()
log_start
show_hint "Should timeout since server config differs from client"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
@@ -2664,7 +2666,7 @@ ipv6_tcp_md5()
# client in prefix
log_start
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Prefix config"
@@ -2672,7 +2674,7 @@ ipv6_tcp_md5()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
@@ -2680,7 +2682,7 @@ ipv6_tcp_md5()
log_start
show_hint "Should timeout since client address is outside of prefix"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
@@ -2691,14 +2693,14 @@ ipv6_tcp_md5()
log_start
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
log_start
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
@@ -2706,7 +2708,7 @@ ipv6_tcp_md5()
show_hint "Should timeout since client in default VRF uses VRF password"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
@@ -2714,21 +2716,21 @@ ipv6_tcp_md5()
show_hint "Should timeout since client in VRF uses default VRF password"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
log_start
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
log_start
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
@@ -2736,7 +2738,7 @@ ipv6_tcp_md5()
show_hint "Should timeout since client in default VRF uses VRF password"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
@@ -2744,7 +2746,7 @@ ipv6_tcp_md5()
show_hint "Should timeout since client in VRF uses default VRF password"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
@@ -2772,7 +2774,7 @@ ipv6_tcp_novrf()
do
log_start
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Global server"
done
@@ -2793,7 +2795,7 @@ ipv6_tcp_novrf()
do
log_start
run_cmd_nsb nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Client"
done
@@ -2802,7 +2804,7 @@ ipv6_tcp_novrf()
do
log_start
run_cmd_nsb nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 0 "Client, device bind"
done
@@ -2822,7 +2824,7 @@ ipv6_tcp_novrf()
do
log_start
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Global server, local connection"
done
@@ -2830,7 +2832,7 @@ ipv6_tcp_novrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -2839,7 +2841,7 @@ ipv6_tcp_novrf()
log_start
show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
run_cmd nettest -6 -s -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
done
@@ -2847,7 +2849,7 @@ ipv6_tcp_novrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "Global server, device client, local connection"
@@ -2856,7 +2858,7 @@ ipv6_tcp_novrf()
log_start
show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 1 "Global server, device client, local connection"
done
@@ -2865,7 +2867,7 @@ ipv6_tcp_novrf()
do
log_start
run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Device server, device client, local conn"
done
@@ -2898,7 +2900,7 @@ ipv6_tcp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 1 "Global server"
done
@@ -2907,7 +2909,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "VRF server"
done
@@ -2916,7 +2918,7 @@ ipv6_tcp_vrf()
a=${NSA_LINKIP6}%${NSB_DEV}
log_start
run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -2924,7 +2926,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Device server"
done
@@ -2943,7 +2945,7 @@ ipv6_tcp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 1 "Global server, local connection"
@@ -2964,7 +2966,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Global server"
done
@@ -2973,7 +2975,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "VRF server"
done
@@ -2982,13 +2984,13 @@ ipv6_tcp_vrf()
a=${NSA_LINKIP6}%${NSB_DEV}
log_start
run_cmd nettest -6 -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Global server"
log_start
run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -2996,7 +2998,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Device server"
done
@@ -3016,7 +3018,7 @@ ipv6_tcp_vrf()
log_start
show_hint "Fails 'Connection refused' since client is not in VRF"
run_cmd nettest -6 -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 1 "Global server, local connection"
done
@@ -3029,7 +3031,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd_nsb nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${VRF}
log_test_addr ${a} $? 0 "Client, VRF bind"
done
@@ -3038,7 +3040,7 @@ ipv6_tcp_vrf()
log_start
show_hint "Fails since VRF device does not allow linklocal addresses"
run_cmd_nsb nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${VRF}
log_test_addr ${a} $? 1 "Client, VRF bind"
@@ -3046,7 +3048,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd_nsb nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 0 "Client, device bind"
done
@@ -3071,7 +3073,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local connection"
done
@@ -3079,7 +3081,7 @@ ipv6_tcp_vrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local connection"
@@ -3087,13 +3089,13 @@ ipv6_tcp_vrf()
log_start
show_hint "Should fail since unbound client is out of VRF scope"
run_cmd nettest -6 -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 1 "VRF server, unbound client, local connection"
log_start
run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a}
log_test_addr ${a} $? 0 "Device server, VRF client, local connection"
@@ -3101,7 +3103,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local connection"
done
@@ -3141,13 +3143,13 @@ ipv6_udp_novrf()
do
log_start
run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Device server"
done
@@ -3155,7 +3157,7 @@ ipv6_udp_novrf()
a=${NSA_LO_IP6}
log_start
run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
@@ -3165,7 +3167,7 @@ ipv6_udp_novrf()
#log_start
#show_hint "Should fail since loopback address is out of scope"
#run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- #sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
#run_cmd_nsb nettest -6 -D -r ${a}
#log_test_addr ${a} $? 1 "Device server"
@@ -3185,25 +3187,25 @@ ipv6_udp_novrf()
do
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -r ${a} -0 ${NSA_IP6}
log_test_addr ${a} $? 0 "Client"
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP6}
log_test_addr ${a} $? 0 "Client, device bind"
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP6}
log_test_addr ${a} $? 0 "Client, device send via cmsg"
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP6}
log_test_addr ${a} $? 0 "Client, device bind via IPV6_UNICAST_IF"
@@ -3225,7 +3227,7 @@ ipv6_udp_novrf()
do
log_start
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a} -0 ${a} -1 ${a}
log_test_addr ${a} $? 0 "Global server, local connection"
done
@@ -3233,7 +3235,7 @@ ipv6_udp_novrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -3242,7 +3244,7 @@ ipv6_udp_novrf()
log_start
show_hint "Should fail 'Connection refused' since address is out of device scope"
run_cmd nettest -6 -s -D -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a}
log_test_addr ${a} $? 1 "Device server, local connection"
done
@@ -3250,19 +3252,19 @@ ipv6_udp_novrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Global server, device client, local connection"
log_start
run_cmd nettest -6 -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -C -r ${a}
log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection"
log_start
run_cmd nettest -6 -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -S -r ${a}
log_test_addr ${a} $? 0 "Global server, device client via IPV6_UNICAST_IF, local connection"
@@ -3271,28 +3273,28 @@ ipv6_udp_novrf()
log_start
show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 1 "Global server, device client, local connection"
log_start
show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C
log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection"
log_start
show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S
log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
log_start
show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -U
log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
done
@@ -3300,7 +3302,7 @@ ipv6_udp_novrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -3314,7 +3316,7 @@ ipv6_udp_novrf()
run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV}
log_start
run_cmd nettest -6 -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${NSA_IP6}
log_test $? 0 "UDP in - LLA to GUA"
@@ -3338,7 +3340,7 @@ ipv6_udp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server is disabled"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 1 "Global server"
done
@@ -3347,7 +3349,7 @@ ipv6_udp_vrf()
do
log_start
run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "VRF server"
done
@@ -3356,7 +3358,7 @@ ipv6_udp_vrf()
do
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server"
done
@@ -3378,7 +3380,7 @@ ipv6_udp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server is disabled"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 1 "Global server, VRF client, local conn"
done
@@ -3387,7 +3389,7 @@ ipv6_udp_vrf()
do
log_start
run_cmd nettest -6 -D -I ${VRF} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
done
@@ -3396,25 +3398,25 @@ ipv6_udp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server is disabled"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 1 "Global server, device client, local conn"
log_start
run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local conn"
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
@@ -3429,7 +3431,7 @@ ipv6_udp_vrf()
do
log_start
run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
done
@@ -3438,7 +3440,7 @@ ipv6_udp_vrf()
do
log_start
run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "VRF server"
done
@@ -3447,7 +3449,7 @@ ipv6_udp_vrf()
do
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server"
done
@@ -3465,7 +3467,7 @@ ipv6_udp_vrf()
#
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${NSB_IP6}
log_test $? 0 "VRF client"
@@ -3476,7 +3478,7 @@ ipv6_udp_vrf()
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_IP6}
log_test $? 0 "Enslaved device client"
@@ -3491,13 +3493,13 @@ ipv6_udp_vrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
#log_start
run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
@@ -3505,13 +3507,13 @@ ipv6_udp_vrf()
a=${VRF_IP6}
log_start
run_cmd nettest -6 -D -s -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
log_start
run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
@@ -3527,25 +3529,25 @@ ipv6_udp_vrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Global server, device client, local conn"
log_start
run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local conn"
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Device server, VRF client, local conn"
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -3557,7 +3559,7 @@ ipv6_udp_vrf()
# link local addresses
log_start
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -d ${NSB_DEV} -r ${NSA_LINKIP6}
log_test $? 0 "Global server, linklocal IP"
@@ -3568,7 +3570,7 @@ ipv6_udp_vrf()
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_LINKIP6}
log_test $? 0 "Enslaved device client, linklocal IP"
@@ -3579,7 +3581,7 @@ ipv6_udp_vrf()
log_start
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSA_LINKIP6}
log_test $? 0 "Enslaved device client, local conn - linklocal IP"
@@ -3592,7 +3594,7 @@ ipv6_udp_vrf()
run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV}
log_start
run_cmd nettest -6 -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${NSA_IP6}
log_test $? 0 "UDP in - LLA to GUA"
@@ -3667,7 +3669,7 @@ ipv6_addr_bind_novrf()
# when it really should not
a=${NSA_LO_IP6}
log_start
- show_hint "Tecnically should fail since address is not on device but kernel allows"
+ show_hint "Technically should fail since address is not on device but kernel allows"
run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address"
}
@@ -3724,7 +3726,7 @@ ipv6_addr_bind_vrf()
# passes when it really should not
a=${VRF_IP6}
log_start
- show_hint "Tecnically should fail since address is not on device but kernel allows"
+ show_hint "Technically should fail since address is not on device but kernel allows"
run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind"
@@ -3771,7 +3773,7 @@ ipv6_rt()
do
log_start
run_cmd nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3785,7 +3787,7 @@ ipv6_rt()
do
log_start
run_cmd nettest ${varg} -I ${VRF} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3799,7 +3801,7 @@ ipv6_rt()
do
log_start
run_cmd nettest ${varg} -I ${NSA_DEV} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3814,7 +3816,7 @@ ipv6_rt()
#
log_start
run_cmd_nsb nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP6} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3825,7 +3827,7 @@ ipv6_rt()
log_start
run_cmd_nsb nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP6} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3842,7 +3844,7 @@ ipv6_rt()
do
log_start
run_cmd nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${VRF} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3856,7 +3858,7 @@ ipv6_rt()
do
log_start
run_cmd nettest ${varg} -I ${VRF} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${VRF} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3869,7 +3871,7 @@ ipv6_rt()
a=${NSA_IP6}
log_start
run_cmd nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3880,7 +3882,7 @@ ipv6_rt()
log_start
run_cmd nettest ${varg} -I ${VRF} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3891,7 +3893,7 @@ ipv6_rt()
log_start
run_cmd nettest ${varg} -I ${NSA_DEV} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3950,7 +3952,7 @@ netfilter_tcp_reset()
do
log_start
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx"
done
@@ -3968,7 +3970,7 @@ netfilter_icmp()
do
log_start
run_cmd nettest ${arg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${arg} -r ${a}
log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach"
done
@@ -4007,7 +4009,7 @@ netfilter_tcp6_reset()
do
log_start
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx"
done
@@ -4025,7 +4027,7 @@ netfilter_icmp6()
do
log_start
run_cmd nettest -6 -s ${arg} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 ${arg} -r ${a}
log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach"
done
@@ -4221,12 +4223,12 @@ use_case_snat_on_vrf()
run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} &
- sleep 1
+ wait_local_port_listen ${NSB} ${port} tcp
run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port}
log_test $? 0 "IPv4 TCP connection over VRF with SNAT"
run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} &
- sleep 1
+ wait_local_port_listen ${NSB} ${port} tcp
run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port}
log_test $? 0 "IPv6 TCP connection over VRF with SNAT"
@@ -4272,6 +4274,7 @@ EOF
TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter"
TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter"
TESTS_OTHER="use_cases"
+# note: each TEST_ group needs a dedicated runner, e.g. fcnal-ipv4.sh
PAUSE_ON_FAIL=no
PAUSE=no
@@ -4302,16 +4305,11 @@ elif [ "$TESTS" = "ipv4" ]; then
TESTS="$TESTS_IPV4"
elif [ "$TESTS" = "ipv6" ]; then
TESTS="$TESTS_IPV6"
+elif [ "$TESTS" = "other" ]; then
+ TESTS="$TESTS_OTHER"
fi
-# nettest can be run from PATH or from same directory as this selftest
-if ! which nettest >/dev/null; then
- PATH=$PWD:$PATH
- if ! which nettest >/dev/null; then
- echo "'nettest' command not found; skipping tests"
- exit $ksft_skip
- fi
-fi
+check_gen_prog "nettest"
declare -i nfail=0
declare -i nsuccess=0
diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh
index d5e3abb8658c..9931a1e36e3d 100755
--- a/tools/testing/selftests/net/fdb_flush.sh
+++ b/tools/testing/selftests/net/fdb_flush.sh
@@ -583,7 +583,7 @@ vxlan_test_flush_by_remote_attributes()
$IP link del dev vx10
$IP link add name vx10 type vxlan dstport "$VXPORT" external
- # For multicat FDB entries, the VXLAN driver stores a linked list of
+ # For multicast FDB entries, the VXLAN driver stores a linked list of
# remotes for a given key. Verify that only the expected remotes are
# flushed.
multicast_fdb_entries_add
diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh
new file mode 100755
index 000000000000..0b8a2465dd04
--- /dev/null
+++ b/tools/testing/selftests/net/fdb_notify.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+ALL_TESTS="
+ test_dup_bridge
+ test_dup_vxlan_self
+ test_dup_vxlan_master
+ test_dup_macvlan_self
+ test_dup_macvlan_master
+"
+
+do_test_dup()
+{
+ local op=$1; shift
+ local what=$1; shift
+ local tmpf
+
+ RET=0
+
+ tmpf=$(mktemp)
+ defer rm "$tmpf"
+
+ defer_scope_push
+ bridge monitor fdb &> "$tmpf" &
+ defer kill_process $!
+
+ sleep 0.5
+ bridge fdb "$op" 00:11:22:33:44:55 vlan 1 "$@"
+ sleep 0.5
+ defer_scope_pop
+
+ local count=$(grep -c -e 00:11:22:33:44:55 $tmpf)
+ ((count == 1))
+ check_err $? "Got $count notifications, expected 1"
+
+ log_test "$what $op: Duplicate notifications"
+}
+
+test_dup_bridge()
+{
+ adf_ip_link_add br up type bridge vlan_filtering 1
+ do_test_dup add "bridge" dev br self
+ do_test_dup del "bridge" dev br self
+}
+
+test_dup_vxlan_self()
+{
+ adf_ip_link_add br up type bridge vlan_filtering 1
+ adf_ip_link_add vx up type vxlan id 2000 dstport 4789
+ adf_ip_link_set_master vx br
+
+ do_test_dup add "vxlan" dev vx self dst 192.0.2.1
+ do_test_dup del "vxlan" dev vx self dst 192.0.2.1
+}
+
+test_dup_vxlan_master()
+{
+ adf_ip_link_add br up type bridge vlan_filtering 1
+ adf_ip_link_add vx up type vxlan id 2000 dstport 4789
+ adf_ip_link_set_master vx br
+
+ do_test_dup add "vxlan master" dev vx master
+ do_test_dup del "vxlan master" dev vx master
+}
+
+test_dup_macvlan_self()
+{
+ adf_ip_link_add dd up type dummy
+ adf_ip_link_add mv up link dd type macvlan mode passthru
+
+ do_test_dup add "macvlan self" dev mv self
+ do_test_dup del "macvlan self" dev mv self
+}
+
+test_dup_macvlan_master()
+{
+ adf_ip_link_add br up type bridge vlan_filtering 1
+ adf_ip_link_add dd up type dummy
+ adf_ip_link_add mv up link dd type macvlan mode passthru
+ adf_ip_link_set_master mv br
+
+ do_test_dup add "macvlan master" dev mv self
+ do_test_dup del "macvlan master" dev mv self
+}
+
+cleanup()
+{
+ defer_scopes_cleanup
+}
+
+trap cleanup EXIT
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index ac0b2c6a5761..2b0a90581e2f 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -76,9 +76,16 @@ log_test()
printf "TEST: %-60s [ OK ]\n" "${msg}"
nsuccess=$((nsuccess+1))
else
- ret=1
- nfail=$((nfail+1))
- printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [[ $rc -eq $ksft_skip ]]; then
+ [[ $ret -eq 0 ]] && ret=$ksft_skip
+ nskip=$((nskip+1))
+ printf "TEST: %-60s [SKIP]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ fi
+
if [ "$VERBOSE" = "1" ]; then
echo " rc=$rc, expected $expected"
fi
@@ -460,8 +467,8 @@ ipv6_fdb_grp_fcnal()
log_test $? 0 "Get Fdb nexthop group by id"
# fdb nexthop group can only contain fdb nexthops
- run_cmd "$IP nexthop add id 63 via 2001:db8:91::4"
- run_cmd "$IP nexthop add id 64 via 2001:db8:91::5"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::4 dev veth1"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::5 dev veth1"
run_cmd "$IP nexthop add id 103 group 63/64 fdb"
log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
@@ -487,6 +494,26 @@ ipv6_fdb_grp_fcnal()
run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb"
log_test $? 2 "Fdb Nexthop with encap"
+ # Replace FDB nexthop to non-FDB and vice versa
+ run_cmd "$IP nexthop add id 70 via 2001:db8:91::2 fdb"
+ run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1"
+ log_test $? 0 "Replace FDB nexthop to non-FDB nexthop"
+ run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 fdb"
+ log_test $? 0 "Replace non-FDB nexthop to FDB nexthop"
+
+ # Replace FDB nexthop address while in a group
+ run_cmd "$IP nexthop add id 71 group 70 fdb"
+ run_cmd "$IP nexthop replace id 70 via 2001:db8:91::3 fdb"
+ log_test $? 0 "Replace FDB nexthop address while in a group"
+
+ # Cannot replace FDB nexthop to non-FDB and vice versa while in a group
+ run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1"
+ log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group"
+ run_cmd "$IP nexthop add id 72 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 73 group 72"
+ run_cmd "$IP nexthop replace id 72 via 2001:db8:91::2 fdb"
+ log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group"
+
run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
log_test $? 0 "Fdb mac add with nexthop group"
@@ -540,15 +567,15 @@ ipv4_fdb_grp_fcnal()
log_test $? 0 "Get Fdb nexthop group by id"
# fdb nexthop group can only contain fdb nexthops
- run_cmd "$IP nexthop add id 14 via 172.16.1.2"
- run_cmd "$IP nexthop add id 15 via 172.16.1.3"
+ run_cmd "$IP nexthop add id 14 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 15 via 172.16.1.3 dev veth1"
run_cmd "$IP nexthop add id 103 group 14/15 fdb"
log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
# Non fdb nexthop group can not contain fdb nexthops
run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb"
run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb"
- run_cmd "$IP nexthop add id 104 group 14/15"
+ run_cmd "$IP nexthop add id 104 group 16/17"
log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops"
# fdb nexthop cannot have blackhole
@@ -567,6 +594,26 @@ ipv4_fdb_grp_fcnal()
run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb"
log_test $? 2 "Fdb Nexthop with encap"
+ # Replace FDB nexthop to non-FDB and vice versa
+ run_cmd "$IP nexthop add id 18 via 172.16.1.2 fdb"
+ run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1"
+ log_test $? 0 "Replace FDB nexthop to non-FDB nexthop"
+ run_cmd "$IP nexthop replace id 18 via 172.16.1.2 fdb"
+ log_test $? 0 "Replace non-FDB nexthop to FDB nexthop"
+
+ # Replace FDB nexthop address while in a group
+ run_cmd "$IP nexthop add id 19 group 18 fdb"
+ run_cmd "$IP nexthop replace id 18 via 172.16.1.3 fdb"
+ log_test $? 0 "Replace FDB nexthop address while in a group"
+
+ # Cannot replace FDB nexthop to non-FDB and vice versa while in a group
+ run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1"
+ log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group"
+ run_cmd "$IP nexthop add id 20 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 21 group 20"
+ run_cmd "$IP nexthop replace id 20 via 172.16.1.2 fdb"
+ log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group"
+
run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
log_test $? 0 "Fdb mac add with nexthop group"
@@ -575,7 +622,7 @@ ipv4_fdb_grp_fcnal()
run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self"
log_test $? 255 "Fdb mac add with nexthop"
- run_cmd "$IP ro add 172.16.0.0/22 nhid 15"
+ run_cmd "$IP ro add 172.16.0.0/22 nhid 16"
log_test $? 2 "Route add with fdb nexthop"
run_cmd "$IP ro add 172.16.0.0/22 nhid 103"
@@ -736,7 +783,7 @@ ipv6_fcnal()
run_cmd "$IP nexthop add id 52 via 2001:db8:92::3"
log_test $? 2 "Create nexthop - gw only"
- # gw is not reachable throught given dev
+ # gw is not reachable through given dev
run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1"
log_test $? 2 "Create nexthop - invalid gw+dev combination"
@@ -923,6 +970,29 @@ ipv6_grp_fcnal()
ipv6_grp_refs
log_test $? 0 "Nexthop group replace refcounts"
+
+ #
+ # 16-bit weights.
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+ run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1"
+ run_cmd "$IP nexthop add id 66 dev veth1"
+
+ run_cmd "$IP nexthop add id 103 group 62,1000"
+ if [[ $? == 0 ]]; then
+ local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535"
+ run_cmd "$IP nexthop replace $GRP"
+ check_nexthop "id 103" "$GRP"
+ rc=$?
+ else
+ rc=$ksft_skip
+ fi
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ log_test $rc 0 "16-bit weights"
}
ipv6_res_grp_fcnal()
@@ -987,6 +1057,31 @@ ipv6_res_grp_fcnal()
check_nexthop_bucket "list id 102" \
"id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+
+ #
+ # 16-bit weights.
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+ run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1"
+ run_cmd "$IP nexthop add id 66 dev veth1"
+
+ run_cmd "$IP nexthop add id 103 group 62,1000 type resilient buckets 32"
+ if [[ $? == 0 ]]; then
+ local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535 $(:
+ )type resilient buckets 32 idle_timer 0 $(:
+ )unbalanced_timer 0"
+ run_cmd "$IP nexthop replace $GRP"
+ check_nexthop "id 103" "$GRP unbalanced_time 0"
+ rc=$?
+ else
+ rc=$ksft_skip
+ fi
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ log_test $rc 0 "16-bit weights"
}
ipv6_fcnal_runtime()
@@ -2475,6 +2570,7 @@ done
if [ "$TESTS" != "none" ]; then
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
+ printf "Tests skipped: %2d\n" ${nskip}
fi
exit $ret
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 7c01f58a20de..5fbdd2a0b537 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -35,18 +35,13 @@ log_test()
local expected=$2
local msg="$3"
- $IP rule show | grep -q l3mdev
- if [ $? -eq 0 ]; then
- msg="$msg (VRF)"
- fi
-
if [ ${rc} -eq ${expected} ]; then
nsuccess=$((nsuccess+1))
- printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
else
ret=1
nfail=$((nfail+1))
- printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
@@ -56,39 +51,6 @@ log_test()
fi
}
-log_section()
-{
- echo
- echo "######################################################################"
- echo "TEST SECTION: $*"
- echo "######################################################################"
-}
-
-check_nettest()
-{
- if which nettest > /dev/null 2>&1; then
- return 0
- fi
-
- # Add the selftest directory to PATH if not already done
- if [ "${SELFTEST_PATH}" = "" ]; then
- SELFTEST_PATH="$(dirname $0)"
- PATH="${PATH}:${SELFTEST_PATH}"
-
- # Now retry with the new path
- if which nettest > /dev/null 2>&1; then
- return 0
- fi
-
- if [ "${ret}" -eq 0 ]; then
- ret="${ksft_skip}"
- fi
- echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')"
- fi
-
- return 1
-}
-
setup()
{
set -e
@@ -187,12 +149,17 @@ fib_rule6_test_match_n_redirect()
{
local match="$1"
local getmatch="$2"
- local description="$3"
+ local getnomatch="$3"
+ local description="$4"
+ local nomatch_description="$5"
$IP -6 rule add $match table $RTABLE
$IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
log_test $? 0 "rule6 check: $description"
+ $IP -6 route get $GW_IP6 $getnomatch 2>&1 | grep -q "table $RTABLE"
+ log_test $? 1 "rule6 check: $nomatch_description"
+
fib_rule6_del_by_pref "$match"
log_test $? 0 "rule6 del by pref: $description"
}
@@ -213,18 +180,27 @@ fib_rule6_test_reject()
fib_rule6_test()
{
+ local ext_name=$1; shift
+ local getnomatch
local getmatch
local match
local cnt
+ echo
+ echo "IPv6 FIB rule tests $ext_name"
+
# setup the fib rule redirect route
$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
match="oif $DEV"
- fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+ getnomatch="oif lo"
+ fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \
+ "oif redirect to table" "oif no redirect to table"
match="from $SRC_IP6 iif $DEV"
- fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+ getnomatch="from $SRC_IP6 iif lo"
+ fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \
+ "iif redirect to table" "iif no redirect to table"
# Reject dsfield (tos) options which have ECN bits set
for cnt in $(seq 1 3); do
@@ -238,44 +214,174 @@ fib_rule6_test()
# Using option 'tos' instead of 'dsfield' as old iproute2
# versions don't support 'dsfield' in ip rule show.
getmatch="tos $cnt"
+ getnomatch="tos 0x20"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
- "$getmatch redirect to table"
+ "$getnomatch" "$getmatch redirect to table" \
+ "$getnomatch no redirect to table"
+ done
+
+ # Re-test TOS matching, but with input routes since they are handled
+ # differently from output routes.
+ match="tos 0x10"
+ for cnt in "0x10" "0x11" "0x12" "0x13"; do
+ getmatch="tos $cnt"
+ getnomatch="tos 0x20"
+ fib_rule6_test_match_n_redirect "$match" \
+ "from $SRC_IP6 iif $DEV $getmatch" \
+ "from $SRC_IP6 iif $DEV $getnomatch" \
+ "iif $getmatch redirect to table" \
+ "iif $getnomatch no redirect to table"
done
match="fwmark 0x64"
getmatch="mark 0x64"
- fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+ getnomatch="mark 0x63"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \
+ "fwmark redirect to table" "fwmark no redirect to table"
fib_check_iproute_support "uidrange" "uid"
if [ $? -eq 0 ]; then
match="uidrange 100-100"
getmatch="uid 100"
- fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ getnomatch="uid 101"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "uid redirect to table" \
+ "uid no redirect to table"
fi
fib_check_iproute_support "sport" "sport"
if [ $? -eq 0 ]; then
match="sport 666 dport 777"
- fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ getnomatch="sport 667 dport 778"
+ fib_rule6_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "sport and dport redirect to table" \
+ "sport and dport no redirect to table"
+
+ match="sport 100-200 dport 300-400"
+ getmatch="sport 100 dport 400"
+ getnomatch="sport 100 dport 401"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" \
+ "sport and dport range redirect to table" \
+ "sport and dport range no redirect to table"
+ fi
+
+ ip rule help 2>&1 | grep sport | grep -q MASK
+ if [ $? -eq 0 ]; then
+ match="sport 0x0f00/0xff00 dport 0x000f/0x00ff"
+ getmatch="sport 0x0f11 dport 0x220f"
+ getnomatch="sport 0x1f11 dport 0x221f"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "sport and dport masked redirect to table" \
+ "sport and dport masked no redirect to table"
fi
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
match="ipproto tcp"
- fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
+ getnomatch="ipproto udp"
+ fib_rule6_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "ipproto tcp match" "ipproto udp no match"
fi
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
match="ipproto ipv6-icmp"
- fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match"
+ getnomatch="ipproto tcp"
+ fib_rule6_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "ipproto ipv6-icmp match" \
+ "ipproto ipv6-tcp no match"
+ fi
+
+ fib_check_iproute_support "dscp" "tos"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x3f"
+ getmatch="tos 0xfc"
+ getnomatch="tos 0xf4"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp redirect to table" \
+ "dscp no redirect to table"
+
+ match="dscp 0x3f"
+ getmatch="from $SRC_IP6 iif $DEV tos 0xfc"
+ getnomatch="from $SRC_IP6 iif $DEV tos 0xf4"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp redirect to table" \
+ "iif dscp no redirect to table"
+ fi
+
+ ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x0f/0x0f"
+ tosmatch=$(printf 0x"%x" $((0x1f << 2)))
+ tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
+ getmatch="tos $tosmatch"
+ getnomatch="tos $tosnomatch"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp masked redirect to table" \
+ "dscp masked no redirect to table"
+
+ match="dscp 0x0f/0x0f"
+ getmatch="from $SRC_IP6 iif $DEV tos $tosmatch"
+ getnomatch="from $SRC_IP6 iif $DEV tos $tosnomatch"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp masked redirect to table" \
+ "iif dscp masked no redirect to table"
+ fi
+
+ fib_check_iproute_support "flowlabel" "flowlabel"
+ if [ $? -eq 0 ]; then
+ match="flowlabel 0xfffff"
+ getmatch="flowlabel 0xfffff"
+ getnomatch="flowlabel 0xf"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "flowlabel redirect to table" \
+ "flowlabel no redirect to table"
+
+ match="flowlabel 0xfffff"
+ getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff"
+ getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif flowlabel redirect to table" \
+ "iif flowlabel no redirect to table"
+
+ match="flowlabel 0x08000/0x08000"
+ getmatch="flowlabel 0xfffff"
+ getnomatch="flowlabel 0xf7fff"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "flowlabel masked redirect to table" \
+ "flowlabel masked no redirect to table"
+
+ match="flowlabel 0x08000/0x08000"
+ getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff"
+ getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif flowlabel masked redirect to table" \
+ "iif flowlabel masked no redirect to table"
+ fi
+
+ $IP link show dev $DEV | grep -q vrf0
+ if [ $? -eq 0 ]; then
+ match="oif vrf0"
+ getmatch="oif $DEV"
+ getnomatch="oif lo"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF oif redirect to table" \
+ "VRF oif no redirect to table"
+
+ match="from $SRC_IP6 iif vrf0"
+ getmatch="from $SRC_IP6 iif $DEV"
+ getnomatch="from $SRC_IP6 iif lo"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF iif redirect to table" \
+ "VRF iif no redirect to table"
fi
}
fib_rule6_vrf_test()
{
setup_vrf
- fib_rule6_test
+ fib_rule6_test "- with VRF"
cleanup_vrf
}
@@ -285,10 +391,8 @@ fib_rule6_connect_test()
{
local dsfield
- if ! check_nettest; then
- echo "SKIP: Could not run test without nettest tool"
- return
- fi
+ echo
+ echo "IPv6 FIB rule connect tests"
setup_peer
$IP -6 rule add dsfield 0x04 table $RTABLE_PEER
@@ -306,7 +410,45 @@ fib_rule6_connect_test()
log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})"
done
+ # Check that UDP and TCP connections fail when using a DS Field that
+ # does not match the previously configured FIB rule.
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \
+ -Q 0x20 -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 1 "rule6 dsfield udp no connect (dsfield 0x20)"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0x20 \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 1 "rule6 dsfield tcp no connect (dsfield 0x20)"
+
$IP -6 rule del dsfield 0x04 table $RTABLE_PEER
+
+ ip rule help 2>&1 | grep -q dscp
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 iprule too old, missing dscp match"
+ cleanup_peer
+ return
+ fi
+
+ $IP -6 rule add dscp 0x3f table $RTABLE_PEER
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xfc \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 0 "rule6 dscp udp connect"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xfc \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 0 "rule6 dscp tcp connect"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xf4 \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 1 "rule6 dscp udp no connect"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xf4 \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 1 "rule6 dscp tcp no connect"
+
+ $IP -6 rule del dscp 0x3f table $RTABLE_PEER
+
cleanup_peer
}
@@ -326,12 +468,17 @@ fib_rule4_test_match_n_redirect()
{
local match="$1"
local getmatch="$2"
- local description="$3"
+ local getnomatch="$3"
+ local description="$4"
+ local nomatch_description="$5"
$IP rule add $match table $RTABLE
$IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
log_test $? 0 "rule4 check: $description"
+ $IP route get $GW_IP4 $getnomatch 2>&1 | grep -q "table $RTABLE"
+ log_test $? 1 "rule4 check: $nomatch_description"
+
fib_rule4_del_by_pref "$match"
log_test $? 0 "rule4 del by pref: $description"
}
@@ -352,23 +499,28 @@ fib_rule4_test_reject()
fib_rule4_test()
{
+ local ext_name=$1; shift
+ local getnomatch
local getmatch
local match
local cnt
+ echo
+ echo "IPv4 FIB rule tests $ext_name"
+
# setup the fib rule redirect route
$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
match="oif $DEV"
- fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+ getnomatch="oif lo"
+ fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
+ "oif redirect to table" "oif no redirect to table"
- # need enable forwarding and disable rp_filter temporarily as all the
- # addresses are in the same subnet and egress device == ingress device.
ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1
- ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
match="from $SRC_IP iif $DEV"
- fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
- ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0
+ getnomatch="from $SRC_IP iif lo"
+ fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
+ "iif redirect to table" "iif no redirect to table"
# Reject dsfield (tos) options which have ECN bits set
for cnt in $(seq 1 3); do
@@ -382,44 +534,144 @@ fib_rule4_test()
# Using option 'tos' instead of 'dsfield' as old iproute2
# versions don't support 'dsfield' in ip rule show.
getmatch="tos $cnt"
+ getnomatch="tos 0x20"
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
- "$getmatch redirect to table"
+ "$getnomatch" "$getmatch redirect to table" \
+ "$getnomatch no redirect to table"
+ done
+
+ # Re-test TOS matching, but with input routes since they are handled
+ # differently from output routes.
+ match="tos 0x10"
+ for cnt in "0x10" "0x11" "0x12" "0x13"; do
+ getmatch="tos $cnt"
+ getnomatch="tos 0x20"
+ fib_rule4_test_match_n_redirect "$match" \
+ "from $SRC_IP iif $DEV $getmatch" \
+ "from $SRC_IP iif $DEV $getnomatch" \
+ "iif $getmatch redirect to table" \
+ "iif $getnomatch no redirect to table"
done
match="fwmark 0x64"
getmatch="mark 0x64"
- fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+ getnomatch="mark 0x63"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \
+ "fwmark redirect to table" "fwmark no redirect to table"
fib_check_iproute_support "uidrange" "uid"
if [ $? -eq 0 ]; then
match="uidrange 100-100"
getmatch="uid 100"
- fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ getnomatch="uid 101"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "uid redirect to table" \
+ "uid no redirect to table"
fi
fib_check_iproute_support "sport" "sport"
if [ $? -eq 0 ]; then
match="sport 666 dport 777"
- fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ getnomatch="sport 667 dport 778"
+ fib_rule4_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "sport and dport redirect to table" \
+ "sport and dport no redirect to table"
+
+ match="sport 100-200 dport 300-400"
+ getmatch="sport 100 dport 400"
+ getnomatch="sport 100 dport 401"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" \
+ "sport and dport range redirect to table" \
+ "sport and dport range no redirect to table"
+ fi
+
+ ip rule help 2>&1 | grep sport | grep -q MASK
+ if [ $? -eq 0 ]; then
+ match="sport 0x0f00/0xff00 dport 0x000f/0x00ff"
+ getmatch="sport 0x0f11 dport 0x220f"
+ getnomatch="sport 0x1f11 dport 0x221f"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "sport and dport masked redirect to table" \
+ "sport and dport masked no redirect to table"
fi
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
match="ipproto tcp"
- fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
+ getnomatch="ipproto udp"
+ fib_rule4_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "ipproto tcp match" \
+ "ipproto udp no match"
fi
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
match="ipproto icmp"
- fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+ getnomatch="ipproto tcp"
+ fib_rule4_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "ipproto icmp match" \
+ "ipproto tcp no match"
+ fi
+
+ fib_check_iproute_support "dscp" "tos"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x3f"
+ getmatch="tos 0xfc"
+ getnomatch="tos 0xf4"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp redirect to table" \
+ "dscp no redirect to table"
+
+ match="dscp 0x3f"
+ getmatch="from $SRC_IP iif $DEV tos 0xfc"
+ getnomatch="from $SRC_IP iif $DEV tos 0xf4"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp redirect to table" \
+ "iif dscp no redirect to table"
+ fi
+
+ ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x0f/0x0f"
+ tosmatch=$(printf 0x"%x" $((0x1f << 2)))
+ tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
+ getmatch="tos $tosmatch"
+ getnomatch="tos $tosnomatch"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp masked redirect to table" \
+ "dscp masked no redirect to table"
+
+ match="dscp 0x0f/0x0f"
+ getmatch="from $SRC_IP iif $DEV tos $tosmatch"
+ getnomatch="from $SRC_IP iif $DEV tos $tosnomatch"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp masked redirect to table" \
+ "iif dscp masked no redirect to table"
+ fi
+
+ $IP link show dev $DEV | grep -q vrf0
+ if [ $? -eq 0 ]; then
+ match="oif vrf0"
+ getmatch="oif $DEV"
+ getnomatch="oif lo"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF oif redirect to table" \
+ "VRF oif no redirect to table"
+
+ match="from $SRC_IP iif vrf0"
+ getmatch="from $SRC_IP iif $DEV"
+ getnomatch="from $SRC_IP iif lo"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF iif redirect to table" \
+ "VRF iif no redirect to table"
fi
}
fib_rule4_vrf_test()
{
setup_vrf
- fib_rule4_test
+ fib_rule4_test "- with VRF"
cleanup_vrf
}
@@ -429,10 +681,8 @@ fib_rule4_connect_test()
{
local dsfield
- if ! check_nettest; then
- echo "SKIP: Could not run test without nettest tool"
- return
- fi
+ echo
+ echo "IPv4 FIB rule connect tests"
setup_peer
$IP -4 rule add dsfield 0x04 table $RTABLE_PEER
@@ -450,16 +700,46 @@ fib_rule4_connect_test()
log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})"
done
+ # Check that UDP and TCP connections fail when using a DS Field that
+ # does not match the previously configured FIB rule.
+ nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0x20 \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 1 "rule4 dsfield udp no connect (dsfield 0x20)"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -Q 0x20 \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 1 "rule4 dsfield tcp no connect (dsfield 0x20)"
+
$IP -4 rule del dsfield 0x04 table $RTABLE_PEER
- cleanup_peer
-}
-run_fibrule_tests()
-{
- log_section "IPv4 fib rule"
- fib_rule4_test
- log_section "IPv6 fib rule"
- fib_rule6_test
+ ip rule help 2>&1 | grep -q dscp
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 iprule too old, missing dscp match"
+ cleanup_peer
+ return
+ fi
+
+ $IP -4 rule add dscp 0x3f table $RTABLE_PEER
+
+ nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xfc \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 0 "rule4 dscp udp connect"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -Q 0xfc \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 0 "rule4 dscp tcp connect"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xf4 \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 1 "rule4 dscp udp no connect"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -Q 0xf4 \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 1 "rule4 dscp tcp no connect"
+
+ $IP -4 rule del dscp 0x3f table $RTABLE_PEER
+
+ cleanup_peer
}
################################################################################
# usage
@@ -495,6 +775,8 @@ if [ ! -x "$(command -v ip)" ]; then
exit $ksft_skip
fi
+check_gen_prog "nettest"
+
# start clean
cleanup &> /dev/null
setup
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 73895711cdf4..a88f797c549a 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -11,7 +11,8 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
- ipv4_mpath_list ipv6_mpath_list"
+ ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance \
+ fib6_ra_to_static"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -689,7 +690,7 @@ fib6_notify_test()
log_test $ret 0 "ipv6 route add notify"
- { kill %% && wait %%; } 2>/dev/null
+ kill_process %%
#rm errors.txt
@@ -736,7 +737,7 @@ fib_notify_test()
log_test $ret 0 "ipv4 route add notify"
- { kill %% && wait %%; } 2>/dev/null
+ kill_process %%
rm errors.txt
@@ -1085,6 +1086,35 @@ route_setup()
set +e
}
+forwarding_cleanup()
+{
+ cleanup_ns $ns3
+
+ route_cleanup
+}
+
+# extend route_setup with an ns3 reachable through ns2 over both devices
+forwarding_setup()
+{
+ forwarding_cleanup
+
+ route_setup
+
+ setup_ns ns3
+
+ ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2
+ ip -netns $ns3 link set veth5 up
+ ip -netns $ns2 link set veth6 up
+
+ ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24
+ ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24
+ ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2
+
+ ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad
+ ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad
+ ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2
+}
+
# assumption is that basic add of a single path route works
# otherwise just adding an address on an interface is broken
ipv6_rt_add()
@@ -1447,6 +1477,68 @@ ipv6_route_metrics_test()
route_cleanup
}
+fib6_ra_to_static()
+{
+ setup
+
+ echo
+ echo "Fib6 route promotion from RA-learned to static test"
+ set -e
+
+ # ra6 is required for the test. (ipv6toolkit)
+ if [ ! -x "$(command -v ra6)" ]; then
+ echo "SKIP: ra6 not found."
+ set +e
+ cleanup &> /dev/null
+ return
+ fi
+
+ # Create a pair of veth devices to send a RA message from one
+ # device to another.
+ $IP link add veth1 type veth peer name veth2
+ $IP link set dev veth1 up
+ $IP link set dev veth2 up
+ $IP -6 address add 2001:10::1/64 dev veth1 nodad
+ $IP -6 address add 2001:10::2/64 dev veth2 nodad
+
+ # Make veth1 ready to receive RA messages.
+ $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2
+
+ # Send a RA message with a prefix from veth2.
+ $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60
+
+ # Wait for the RA message.
+ sleep 1
+
+ # systemd may mess up the test. Make sure that
+ # systemd-networkd.service and systemd-networkd.socket are stopped.
+ check_rt_num_clean 2 $($IP -6 route list|grep expires|wc -l) || return
+
+ # Configure static address on the same prefix
+ $IP -6 address add 2001:12::dead/64 dev veth1 nodad
+
+ # On-link route won't expire anymore, default route still owned by RA
+ check_rt_num 1 $($IP -6 route list |grep expires|wc -l)
+
+ # Send a second RA message with a prefix from veth2.
+ $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60
+ sleep 1
+
+ # Expire is not back, on-link route is still static
+ check_rt_num 1 $($IP -6 route list |grep expires|wc -l)
+
+ $IP -6 address del 2001:12::dead/64 dev veth1 nodad
+
+ # Expire is back, on-link route is now owned by RA again
+ check_rt_num 2 $($IP -6 route list |grep expires|wc -l)
+
+ log_test $ret 0 "ipv6 promote RA route to static"
+
+ set +e
+
+ cleanup &> /dev/null
+}
+
# add route for a prefix, flushing any existing routes first
# expected to be the first step of a test
add_route()
@@ -1737,53 +1829,53 @@ ipv4_rt_dsfield()
# DSCP 0x10 should match the specific route, no matter the ECN bits
$IP route get fibmatch 172.16.102.1 dsfield 0x10 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT"
$IP route get fibmatch 172.16.102.1 dsfield 0x11 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)"
$IP route get fibmatch 172.16.102.1 dsfield 0x12 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)"
$IP route get fibmatch 172.16.102.1 dsfield 0x13 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:CE"
# Unknown DSCP should match the generic route, no matter the ECN bits
$IP route get fibmatch 172.16.102.1 dsfield 0x14 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT"
$IP route get fibmatch 172.16.102.1 dsfield 0x15 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)"
$IP route get fibmatch 172.16.102.1 dsfield 0x16 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)"
$IP route get fibmatch 172.16.102.1 dsfield 0x17 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE"
# Null DSCP should match the generic route, no matter the ECN bits
$IP route get fibmatch 172.16.102.1 dsfield 0x00 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT"
$IP route get fibmatch 172.16.102.1 dsfield 0x01 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)"
$IP route get fibmatch 172.16.102.1 dsfield 0x02 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)"
$IP route get fibmatch 172.16.102.1 dsfield 0x03 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:CE"
}
@@ -2328,7 +2420,7 @@ ipv4_mangle_test()
$IP route del table 123 172.16.101.0/24 dev veth1
$IP rule del pref 100
- { kill %% && wait %%; } 2>/dev/null
+ kill_process %%
rm $tmp_file
route_cleanup
@@ -2386,7 +2478,7 @@ ipv6_mangle_test()
$IP -6 route del table 123 2001:db8:101::/64 dev veth1
$IP -6 rule del pref 100
- { kill %% && wait %%; } 2>/dev/null
+ kill_process %%
rm $tmp_file
route_cleanup
@@ -2531,9 +2623,6 @@ ipv4_mpath_list_test()
run_cmd "ip -n $ns2 route add 203.0.113.0/24
nexthop via 172.16.201.2 nexthop via 172.16.202.2"
run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
- run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0"
- run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0"
- run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0"
set +e
local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]')
@@ -2600,6 +2689,93 @@ ipv6_mpath_list_test()
route_cleanup
}
+tc_set_flower_counter__saddr_syn() {
+ tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2"
+}
+
+ip_mpath_balance_dep_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v jq)" ]; then
+ echo "jq command not found. Skipping test"
+ return 1
+ fi
+}
+
+ip_mpath_balance() {
+ local -r ipver=$1
+ local -r daddr=$2
+ local -r num_conn=20
+
+ for i in $(seq 1 $num_conn); do
+ ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null &
+ sleep 0.02
+ echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000
+ done
+
+ local -r syn0="$(tc_get_flower_counter $ns1 veth1)"
+ local -r syn1="$(tc_get_flower_counter $ns1 veth3)"
+ local -r syns=$((syn0+syn1))
+
+ [ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)"
+
+ [[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]]
+}
+
+ipv4_mpath_balance_test()
+{
+ echo
+ echo "IPv4 multipath load balance test"
+
+ ip_mpath_balance_dep_check || return 1
+ forwarding_setup
+
+ $IP route add 172.16.105.1 \
+ nexthop via 172.16.101.2 \
+ nexthop via 172.16.103.2
+
+ ip netns exec $ns1 \
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+
+ tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1
+ tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1
+
+ ip_mpath_balance -4 172.16.105.1
+
+ log_test $? 0 "IPv4 multipath loadbalance"
+
+ forwarding_cleanup
+}
+
+ipv6_mpath_balance_test()
+{
+ echo
+ echo "IPv6 multipath load balance test"
+
+ ip_mpath_balance_dep_check || return 1
+ forwarding_setup
+
+ $IP route add 2001:db8:105::1\
+ nexthop via 2001:db8:101::2 \
+ nexthop via 2001:db8:103::2
+
+ ip netns exec $ns1 \
+ sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+ tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1
+ tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1
+
+ ip_mpath_balance -6 "[2001:db8:105::1]"
+
+ log_test $? 0 "IPv6 multipath loadbalance"
+
+ forwarding_cleanup
+}
+
################################################################################
# usage
@@ -2683,6 +2859,9 @@ do
fib6_gc_test|ipv6_gc) fib6_gc_test;;
ipv4_mpath_list) ipv4_mpath_list_test;;
ipv6_mpath_list) ipv6_mpath_list_test;;
+ ipv4_mpath_balance) ipv4_mpath_balance_test;;
+ ipv6_mpath_balance) ipv6_mpath_balance_test;;
+ fib6_ra_to_static) fib6_ra_to_static;;
help) echo "Test names: $TESTS"; exit 0;;
esac
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 224346426ef2..ff4a00d91a26 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -1,6 +1,9 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-TEST_PROGS = bridge_fdb_learning_limit.sh \
+TEST_PROGS := \
+ bridge_activity_notify.sh \
+ bridge_fdb_learning_limit.sh \
+ bridge_fdb_local_vlan_0.sh \
bridge_igmp.sh \
bridge_locked_port.sh \
bridge_mdb.sh \
@@ -18,64 +21,64 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
gre_custom_multipath_hash.sh \
gre_inner_v4_multipath.sh \
gre_inner_v6_multipath.sh \
- gre_multipath_nh_res.sh \
- gre_multipath_nh.sh \
gre_multipath.sh \
+ gre_multipath_nh.sh \
+ gre_multipath_nh_res.sh \
ip6_forward_instats_vrf.sh \
ip6gre_custom_multipath_hash.sh \
+ ip6gre_flat.sh \
ip6gre_flat_key.sh \
ip6gre_flat_keys.sh \
- ip6gre_flat.sh \
+ ip6gre_hier.sh \
ip6gre_hier_key.sh \
ip6gre_hier_keys.sh \
- ip6gre_hier.sh \
ip6gre_inner_v4_multipath.sh \
ip6gre_inner_v6_multipath.sh \
+ ipip_flat_gre.sh \
ipip_flat_gre_key.sh \
ipip_flat_gre_keys.sh \
- ipip_flat_gre.sh \
+ ipip_hier_gre.sh \
ipip_hier_gre_key.sh \
ipip_hier_gre_keys.sh \
- ipip_hier_gre.sh \
lib_sh_test.sh \
local_termination.sh \
min_max_mtu.sh \
+ mirror_gre.sh \
mirror_gre_bound.sh \
mirror_gre_bridge_1d.sh \
mirror_gre_bridge_1d_vlan.sh \
- mirror_gre_bridge_1q_lag.sh \
mirror_gre_bridge_1q.sh \
+ mirror_gre_bridge_1q_lag.sh \
mirror_gre_changes.sh \
mirror_gre_flower.sh \
mirror_gre_lag_lacp.sh \
mirror_gre_neigh.sh \
mirror_gre_nh.sh \
- mirror_gre.sh \
- mirror_gre_vlan_bridge_1q.sh \
mirror_gre_vlan.sh \
+ mirror_gre_vlan_bridge_1q.sh \
mirror_vlan.sh \
no_forwarding.sh \
pedit_dsfield.sh \
pedit_ip.sh \
pedit_l4port.sh \
- q_in_vni_ipv6.sh \
q_in_vni.sh \
+ q_in_vni_ipv6.sh \
+ router.sh \
router_bridge.sh \
router_bridge_1d.sh \
router_bridge_1d_lag.sh \
router_bridge_lag.sh \
+ router_bridge_pvid_vlan_upper.sh \
router_bridge_vlan.sh \
router_bridge_vlan_upper.sh \
- router_bridge_pvid_vlan_upper.sh \
router_bridge_vlan_upper_pvid.sh \
router_broadcast.sh \
- router_mpath_nh_res.sh \
router_mpath_nh.sh \
+ router_mpath_nh_res.sh \
router_mpath_seed.sh \
router_multicast.sh \
router_multipath.sh \
router_nh.sh \
- router.sh \
router_vid_1.sh \
sch_ets.sh \
sch_red.sh \
@@ -85,30 +88,34 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
skbedit_priority.sh \
tc_actions.sh \
tc_chains.sh \
- tc_flower_router.sh \
tc_flower.sh \
- tc_flower_l2_miss.sh \
tc_flower_cfm.sh \
+ tc_flower_l2_miss.sh \
tc_flower_port_range.sh \
+ tc_flower_router.sh \
tc_mpls_l2vpn.sh \
tc_police.sh \
tc_shblocks.sh \
tc_tunnel_key.sh \
tc_vlan_modify.sh \
- vxlan_asymmetric_ipv6.sh \
vxlan_asymmetric.sh \
+ vxlan_asymmetric_ipv6.sh \
+ vxlan_bridge_1d.sh \
vxlan_bridge_1d_ipv6.sh \
- vxlan_bridge_1d_port_8472_ipv6.sh \
vxlan_bridge_1d_port_8472.sh \
- vxlan_bridge_1d.sh \
+ vxlan_bridge_1d_port_8472_ipv6.sh \
+ vxlan_bridge_1q.sh \
vxlan_bridge_1q_ipv6.sh \
- vxlan_bridge_1q_port_8472_ipv6.sh \
+ vxlan_bridge_1q_mc_ul.sh \
vxlan_bridge_1q_port_8472.sh \
- vxlan_bridge_1q.sh \
+ vxlan_bridge_1q_port_8472_ipv6.sh \
+ vxlan_reserved.sh \
+ vxlan_symmetric.sh \
vxlan_symmetric_ipv6.sh \
- vxlan_symmetric.sh
+# end of TEST_PROGS
-TEST_FILES := devlink_lib.sh \
+TEST_FILES := \
+ devlink_lib.sh \
fib_offload_lib.sh \
forwarding.config.sample \
ip6gre_lib.sh \
@@ -123,9 +130,12 @@ TEST_FILES := devlink_lib.sh \
sch_ets_tests.sh \
sch_tbf_core.sh \
sch_tbf_etsprio.sh \
- tc_common.sh
+ tc_common.sh \
+# end of TEST_FILES
TEST_INCLUDES := \
- ../lib.sh
+ $(wildcard ../lib/sh/*.sh) \
+ ../lib.sh \
+# end of TEST_INCLUDES
include ../../lib.mk
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
index 7fdb6a9ca543..392a5a91ed37 100644
--- a/tools/testing/selftests/net/forwarding/README
+++ b/tools/testing/selftests/net/forwarding/README
@@ -6,7 +6,7 @@ to easily create and test complex environments.
Unfortunately, these namespaces can not be used with actual switching
ASICs, as their ports can not be migrated to other network namespaces
-(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+(dev->netns_immutable) and most of them probably do not support the
L1-separation provided by namespaces.
However, a similar kind of flexibility can be achieved by using VRFs and
@@ -57,6 +57,21 @@ o Code shall be checked using ShellCheck [1] prior to submission.
1. https://www.shellcheck.net/
+Cleanups
+--------
+
+o lib.sh brings in defer.sh (by way of ../lib.sh) by default. Consider
+ making use of the defer primitive to schedule automatic cleanups. This
+ makes it harder to forget to remove a temporary netdevice, kill a running
+ process or perform other cleanup when the test script is interrupted.
+
+o When adding a helper that dirties the environment, but schedules all
+ necessary cleanups through defer, consider prefixing it adf_ for
+ consistency with lib.sh and ../lib.sh helpers. This serves as an
+ immediately visible bit of documentation about the helper API.
+
+o Definitely do the above for any new code in lib.sh, if practical.
+
Customization
=============
diff --git a/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh
new file mode 100755
index 000000000000..522a5b1b046c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh
@@ -0,0 +1,170 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | 192.0.2.1/28 | | 192.0.2.2/28 |
+# | + $h1 | | + $h2 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR1 (802.1d) + $swp2 | |
+# | | | |
+# | +-----------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ new_inactive_test
+ existing_active_test
+ norefresh_test
+"
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ adf_simple_if_init "$h1" 192.0.2.1/28
+}
+
+h2_create()
+{
+ adf_simple_if_init "$h2" 192.0.2.2/28
+}
+
+switch_create()
+{
+ adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 \
+ ageing_time "$LOW_AGEING_TIME"
+ adf_ip_link_set_up br1
+
+ adf_ip_link_set_master "$swp1" br1
+ adf_ip_link_set_up "$swp1"
+
+ adf_ip_link_set_master "$swp2" br1
+ adf_ip_link_set_up "$swp2"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ adf_vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+fdb_active_wait()
+{
+ local mac=$1; shift
+
+ bridge -d fdb get "$mac" br br1 | grep -q -v "inactive"
+}
+
+fdb_inactive_wait()
+{
+ local mac=$1; shift
+
+ bridge -d fdb get "$mac" br br1 | grep -q "inactive"
+}
+
+new_inactive_test()
+{
+ local mac="00:11:22:33:44:55"
+
+ # Add a new FDB entry as static and inactive and check that it
+ # becomes active upon traffic.
+ RET=0
+
+ bridge fdb add "$mac" dev "$swp1" master static activity_notify inactive
+ bridge -d fdb get "$mac" br br1 | grep -q "inactive"
+ check_err $? "FDB entry not present as \"inactive\" when should"
+
+ $MZ "$h1" -c 1 -p 64 -a "$mac" -b bcast -t ip -q
+
+ busywait "$BUSYWAIT_TIMEOUT" fdb_active_wait "$mac"
+ check_err $? "FDB entry present as \"inactive\" when should not"
+
+ log_test "Transition from inactive to active"
+
+ bridge fdb del "$mac" dev "$swp1" master
+}
+
+existing_active_test()
+{
+ local mac="00:11:22:33:44:55"
+ local ageing_time
+
+ # Enable activity notifications on an existing dynamic FDB entry and
+ # check that it becomes inactive after the ageing time passed.
+ RET=0
+
+ bridge fdb add "$mac" dev "$swp1" master dynamic
+ bridge fdb replace "$mac" dev "$swp1" master static activity_notify norefresh
+
+ bridge -d fdb get "$mac" br br1 | grep -q "activity_notify"
+ check_err $? "FDB entry not present as \"activity_notify\" when should"
+
+ bridge -d fdb get "$mac" br br1 | grep -q "inactive"
+ check_fail $? "FDB entry present as \"inactive\" when should not"
+
+ ageing_time=$(bridge_ageing_time_get br1)
+ slowwait $((ageing_time * 2)) fdb_inactive_wait "$mac"
+ check_err $? "FDB entry not present as \"inactive\" when should"
+
+ log_test "Transition from active to inactive"
+
+ bridge fdb del "$mac" dev "$swp1" master
+}
+
+norefresh_test()
+{
+ local mac="00:11:22:33:44:55"
+ local updated_time
+
+ # Check that the "updated" time is reset when replacing an FDB entry
+ # without the "norefresh" keyword and that it is not reset when
+ # replacing with the "norefresh" keyword.
+ RET=0
+
+ bridge fdb add "$mac" dev "$swp1" master static
+ sleep 1
+
+ bridge fdb replace "$mac" dev "$swp1" master static activity_notify
+ updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]')
+ if [[ $updated_time -ne 0 ]]; then
+ check_err 1 "\"updated\" time was not reset when should"
+ fi
+
+ sleep 1
+ bridge fdb replace "$mac" dev "$swp1" master static norefresh
+ updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]')
+ if [[ $updated_time -eq 0 ]]; then
+ check_err 1 "\"updated\" time was reset when should not"
+ fi
+
+ log_test "Resetting of \"updated\" time"
+
+ bridge fdb del "$mac" dev "$swp1" master
+}
+
+if ! bridge fdb help 2>&1 | grep -q "activity_notify"; then
+ echo "SKIP: iproute2 too old, missing bridge FDB activity notification control"
+ exit "$ksft_skip"
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
index 0760a34b7114..a21b7085da2e 100755
--- a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
@@ -178,6 +178,22 @@ fdb_del()
check_err $? "Failed to remove a FDB entry of type ${type}"
}
+check_fdb_n_learned_support()
+{
+ if ! ip link help bridge 2>&1 | grep -q "fdb_max_learned"; then
+ echo "SKIP: iproute2 too old, missing bridge max learned support"
+ exit $ksft_skip
+ fi
+
+ ip link add dev br0 type bridge
+ local learned=$(fdb_get_n_learned)
+ ip link del dev br0
+ if [ "$learned" == "null" ]; then
+ echo "SKIP: kernel too old; bridge fdb_n_learned feature not supported."
+ exit $ksft_skip
+ fi
+}
+
check_accounting_one_type()
{
local type=$1 is_counted=$2 overrides_learned=$3
@@ -274,6 +290,8 @@ check_limit()
done
}
+check_fdb_n_learned_support
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh
new file mode 100755
index 000000000000..694de8ba97e4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh
@@ -0,0 +1,387 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +-----------------------+ +-----------------------+
+# | H1 (vrf) | | H2 (vrf) | | H3 (vrf) |
+# | + $h1 | | + $h2 | | + $h3 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 | | | 192.0.2.18/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 |
+# | | | | | | | | |
+# +----|------------------+ +----|------------------+ +----|------------------+
+# | | |
+# +----|-------------------------|-------------------------|------------------+
+# | +--|-------------------------|------------------+ | |
+# | | + $swp1 + $swp2 | + $swp3 |
+# | | | 192.0.2.17/28 |
+# | | BR1 (802.1q) | 2001:db8:2::1/64 |
+# | | 192.0.2.3/28 | |
+# | | 2001:db8:1::3/64 | |
+# | +-----------------------------------------------+ SW |
+# +---------------------------------------------------------------------------+
+#
+#shellcheck disable=SC2317 # SC doesn't see our uses of functions.
+#shellcheck disable=SC2034 # ... and global variables
+
+ALL_TESTS="
+ test_d_no_sharing
+ test_d_sharing
+ test_q_no_sharing
+ test_q_sharing
+ test_addr_set
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+pMAC=00:11:22:33:44:55
+bMAC=00:11:22:33:44:66
+mMAC=00:11:22:33:44:77
+xMAC=00:11:22:33:44:88
+
+host_create()
+{
+ local h=$1; shift
+ local ipv4=$1; shift
+ local ipv6=$1; shift
+
+ adf_simple_if_init "$h" "$ipv4" "$ipv6"
+ adf_ip_route_add vrf "v$h" 192.0.2.16/28 nexthop via 192.0.2.3
+ adf_ip_route_add vrf "v$h" 2001:db8:2::/64 nexthop via 2001:db8:1::3
+}
+
+h3_create()
+{
+ adf_simple_if_init "$h3" 192.0.2.18/28 2001:db8:2::2/64
+ adf_ip_route_add vrf "v$h3" 192.0.2.0/28 nexthop via 192.0.2.17
+ adf_ip_route_add vrf "v$h3" 2001:db8:1::/64 nexthop via 2001:db8:2::1
+
+ tc qdisc add dev "$h3" clsact
+ defer tc qdisc del dev "$h3" clsact
+
+ tc filter add dev "$h3" ingress proto ip pref 104 \
+ flower skip_hw ip_proto udp dst_port 4096 \
+ action pass
+ defer tc filter del dev "$h3" ingress proto ip pref 104
+
+ tc qdisc add dev "$h2" clsact
+ defer tc qdisc del dev "$h2" clsact
+
+ tc filter add dev "$h2" ingress proto ip pref 104 \
+ flower skip_hw ip_proto udp dst_port 4096 \
+ action pass
+ defer tc filter del dev "$h2" ingress proto ip pref 104
+}
+
+switch_create()
+{
+ adf_ip_link_set_up "$swp1"
+
+ adf_ip_link_set_up "$swp2"
+
+ adf_ip_addr_add "$swp3" 192.0.2.17/28
+ adf_ip_addr_add "$swp3" 2001:db8:2::1/64
+ adf_ip_link_set_up "$swp3"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ adf_vrf_prepare
+ adf_forwarding_enable
+
+ host_create "$h1" 192.0.2.1/28 2001:db8:1::1/64
+ host_create "$h2" 192.0.2.2/28 2001:db8:1::2/64
+ h3_create
+
+ switch_create
+}
+
+adf_bridge_configure()
+{
+ local dev
+
+ adf_ip_addr_add br 192.0.2.3/28
+ adf_ip_addr_add br 2001:db8:1::3/64
+
+ adf_bridge_vlan_add dev br vid 1 pvid untagged self
+ adf_bridge_vlan_add dev br vid 2 self
+ adf_bridge_vlan_add dev br vid 3 self
+
+ for dev in "$swp1" "$swp2"; do
+ adf_ip_link_set_master "$dev" br
+ adf_bridge_vlan_add dev "$dev" vid 1 pvid untagged
+ adf_bridge_vlan_add dev "$dev" vid 2
+ adf_bridge_vlan_add dev "$dev" vid 3
+ done
+}
+
+adf_bridge_create()
+{
+ local mac
+
+ adf_ip_link_add br up type bridge vlan_default_pvid 0 "$@"
+ mac=$(mac_get br)
+ adf_bridge_configure
+ adf_ip_link_set_addr br "$mac"
+}
+
+check_fdb_local_vlan_0_support()
+{
+ if adf_ip_link_add XXbr up type bridge vlan_filtering 1 \
+ fdb_local_vlan_0 1 &>/dev/null; then
+ return 0
+ fi
+
+ log_test_skip "FDB sharing" \
+ "iproute 2 or the kernel do not support fdb_local_vlan_0"
+}
+
+check_mac_presence()
+{
+ local should_fail=$1; shift
+ local dev=$1; shift
+ local vlan=$1; shift
+ local mac
+
+ mac=$(mac_get "$dev")
+
+ if ((vlan == 0)); then
+ vlan=null
+ fi
+
+ bridge -j fdb show dev "$dev" |
+ jq -e --arg mac "$mac" --argjson vlan "$vlan" \
+ '.[] | select(.mac == $mac) | select(.vlan == $vlan)' > /dev/null
+ check_err_fail "$should_fail" $? "FDB dev $dev vid $vlan addr $mac exists"
+}
+
+do_sharing_test()
+{
+ local should_fail=$1; shift
+ local what=$1; shift
+ local dev
+
+ RET=0
+
+ for dev in "$swp1" "$swp2" br; do
+ check_mac_presence 0 "$dev" 0
+ check_mac_presence "$should_fail" "$dev" 1
+ check_mac_presence "$should_fail" "$dev" 2
+ check_mac_presence "$should_fail" "$dev" 3
+ done
+
+ log_test "$what"
+}
+
+do_end_to_end_test()
+{
+ local mac=$1; shift
+ local what=$1; shift
+ local probe_dev=${1-$h3}; shift
+ local expect=${1-10}; shift
+
+ local t0
+ local t1
+ local dd
+
+ RET=0
+
+ # In mausezahn, use $dev MAC as the destination MAC. In the MAC sharing
+ # context, that will cause an FDB miss on VLAN 1 and prompt a second
+ # lookup in VLAN 0.
+
+ t0=$(tc_rule_stats_get "$probe_dev" 104 ingress)
+
+ $MZ "$h1" -c 10 -p 64 -a own -b "$mac" \
+ -A 192.0.2.1 -B 192.0.2.18 -t udp "dp=4096,sp=2048" -q
+ sleep 1
+
+ t1=$(tc_rule_stats_get "$probe_dev" 104 ingress)
+ dd=$((t1 - t0))
+
+ ((dd == expect))
+ check_err $? "Expected $expect packets on $probe_dev got $dd"
+
+ log_test "$what"
+}
+
+do_tests()
+{
+ local should_fail=$1; shift
+ local what=$1; shift
+ local swp1_mac
+ local br_mac
+
+ swp1_mac=$(mac_get "$swp1")
+ br_mac=$(mac_get br)
+
+ do_sharing_test "$should_fail" "$what"
+ do_end_to_end_test "$swp1_mac" "$what: end to end, $swp1 MAC"
+ do_end_to_end_test "$br_mac" "$what: end to end, br MAC"
+}
+
+bridge_standard()
+{
+ local vlan_filtering=$1; shift
+
+ if ((vlan_filtering)); then
+ echo 802.1q
+ else
+ echo 802.1d
+ fi
+}
+
+nonexistent_fdb_test()
+{
+ local vlan_filtering=$1; shift
+ local standard
+
+ standard=$(bridge_standard "$vlan_filtering")
+
+ # We expect flooding, so $h2 should get the traffic.
+ do_end_to_end_test "$xMAC" "$standard: Nonexistent FDB" "$h2"
+}
+
+misleading_fdb_test()
+{
+ local vlan_filtering=$1; shift
+ local standard
+
+ standard=$(bridge_standard "$vlan_filtering")
+
+ defer_scope_push
+ # Add an FDB entry on VLAN 0. The lookup on VLAN-aware bridge
+ # shouldn't pick this up even with fdb_local_vlan_0 enabled, so
+ # the traffic should be flooded. This all holds on
+ # vlan_filtering bridge, on non-vlan_filtering one the FDB entry
+ # is expected to be found as usual, no flooding takes place.
+ #
+ # Adding only on VLAN 0 is a bit tricky, because bridge is
+ # trying to be nice and interprets the request as if the FDB
+ # should be added on each VLAN.
+
+ bridge fdb add "$mMAC" dev "$swp1" master
+ bridge fdb del "$mMAC" dev "$swp1" vlan 1 master
+ bridge fdb del "$mMAC" dev "$swp1" vlan 2 master
+ bridge fdb del "$mMAC" dev "$swp1" vlan 3 master
+
+ local expect=$((vlan_filtering ? 10 : 0))
+ do_end_to_end_test "$mMAC" \
+ "$standard: Lookup of non-local MAC on VLAN 0" \
+ "$h2" "$expect"
+ defer_scope_pop
+}
+
+change_mac()
+{
+ local dev=$1; shift
+ local mac=$1; shift
+ local cur_mac
+
+ cur_mac=$(mac_get "$dev")
+
+ log_info "Change $dev MAC $cur_mac -> $mac"
+ adf_ip_link_set_addr "$dev" "$mac"
+ defer log_info "Change $dev MAC back"
+}
+
+do_test_no_sharing()
+{
+ local vlan_filtering=$1; shift
+ local standard
+
+ standard=$(bridge_standard "$vlan_filtering")
+
+ adf_bridge_create vlan_filtering "$vlan_filtering"
+ setup_wait
+
+ do_tests 0 "$standard, no FDB sharing"
+
+ change_mac "$swp1" "$pMAC"
+ change_mac br "$bMAC"
+
+ do_tests 0 "$standard, no FDB sharing after MAC change"
+
+ in_defer_scope check_fdb_local_vlan_0_support || return
+
+ log_info "Set fdb_local_vlan_0=1"
+ ip link set dev br type bridge fdb_local_vlan_0 1
+
+ do_tests 1 "$standard, fdb sharing after toggle"
+}
+
+do_test_sharing()
+{
+ local vlan_filtering=$1; shift
+ local standard
+
+ standard=$(bridge_standard "$vlan_filtering")
+
+ in_defer_scope check_fdb_local_vlan_0_support || return
+
+ adf_bridge_create vlan_filtering "$vlan_filtering" fdb_local_vlan_0 1
+ setup_wait
+
+ do_tests 1 "$standard, FDB sharing"
+
+ nonexistent_fdb_test "$vlan_filtering"
+ misleading_fdb_test "$vlan_filtering"
+
+ change_mac "$swp1" "$pMAC"
+ change_mac br "$bMAC"
+
+ do_tests 1 "$standard, FDB sharing after MAC change"
+
+ log_info "Set fdb_local_vlan_0=0"
+ ip link set dev br type bridge fdb_local_vlan_0 0
+
+ do_tests 0 "$standard, No FDB sharing after toggle"
+}
+
+test_d_no_sharing()
+{
+ do_test_no_sharing 0
+}
+
+test_d_sharing()
+{
+ do_test_sharing 0
+}
+
+test_q_no_sharing()
+{
+ do_test_no_sharing 1
+}
+
+test_q_sharing()
+{
+ do_test_sharing 1
+}
+
+adf_addr_set_bridge_create()
+{
+ adf_ip_link_add br up type bridge vlan_filtering 0
+ adf_ip_link_set_addr br "$(mac_get br)"
+ adf_bridge_configure
+}
+
+test_addr_set()
+{
+ adf_addr_set_bridge_create
+ setup_wait
+
+ do_end_to_end_test "$(mac_get br)" "NET_ADDR_SET: end to end, br MAC"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index e6a3e04fd83f..d4e7dd659354 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,10 +1,24 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
- v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
- v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
- v3exc_timeout_test v3star_ex_auto_add_test"
+ALL_TESTS="
+ v2reportleave_test
+ v3include_test
+ v3inc_allow_test
+ v3inc_is_include_test
+ v3inc_is_exclude_test
+ v3inc_to_exclude_test
+ v3exc_allow_test
+ v3exc_is_include_test
+ v3exc_is_exclude_test
+ v3exc_to_exclude_test
+ v3inc_block_test
+ v3exc_block_test
+ v3exc_timeout_test
+ v3star_ex_auto_add_test
+ v2per_vlan_snooping_port_stp_test
+ v2per_vlan_snooping_vlan_stp_test
+"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="239.10.10.10"
@@ -554,6 +568,64 @@ v3star_ex_auto_add_test()
v3cleanup $swp2 $TEST_GROUP
}
+v2per_vlan_snooping_stp_test()
+{
+ local is_port=$1
+
+ local msg="port"
+ [[ $is_port -ne 1 ]] && msg="vlan"
+
+ ip link set br0 up type bridge vlan_filtering 1 \
+ mcast_igmp_version 2 \
+ mcast_snooping 1 \
+ mcast_vlan_snooping 1 \
+ mcast_querier 1 \
+ mcast_stats_enabled 1
+ bridge vlan global set vid 1 dev br0 \
+ mcast_snooping 1 \
+ mcast_querier 1 \
+ mcast_query_interval 100 \
+ mcast_startup_query_count 0
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+ sleep 5
+ local tx_s=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+ sleep 5
+ local tx_e=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+ RET=0
+ local tx=$(expr $tx_e - $tx_s)
+ test $tx -gt 0
+ check_err $? "No IGMP queries after STP state becomes forwarding"
+ log_test "per vlan snooping with $msg stp state change"
+
+ # restore settings
+ bridge vlan global set vid 1 dev br0 \
+ mcast_querier 0 \
+ mcast_query_interval 12500 \
+ mcast_startup_query_count 2
+ ip link set br0 up type bridge vlan_filtering 0 \
+ mcast_vlan_snooping 0 \
+ mcast_stats_enabled 0
+}
+
+v2per_vlan_snooping_port_stp_test()
+{
+ v2per_vlan_snooping_stp_test 1
+}
+
+v2per_vlan_snooping_vlan_stp_test()
+{
+ v2per_vlan_snooping_stp_test 0
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index d9d587454d20..e86d77946585 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -28,6 +28,7 @@ ALL_TESTS="
cfg_test
fwd_test
ctrl_test
+ disable_test
"
NUM_NETIFS=4
@@ -64,7 +65,10 @@ h2_destroy()
switch_create()
{
- ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ local vlan_filtering=$1; shift
+
+ ip link add name br0 type bridge \
+ vlan_filtering "$vlan_filtering" vlan_default_pvid 0 \
mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2
bridge vlan add vid 10 dev br0 self
bridge vlan add vid 20 dev br0 self
@@ -118,7 +122,7 @@ setup_prepare()
h1_create
h2_create
- switch_create
+ switch_create 1
}
cleanup()
@@ -149,7 +153,7 @@ cfg_test_host_common()
check_err $? "Failed to add $name host entry"
bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
- check_fail $? "Managed to replace $name host entry"
+ check_err $? "Failed to replace $name host entry"
bridge mdb del dev br0 port br0 grp $grp $state vid 10
bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
@@ -1357,6 +1361,98 @@ ctrl_test()
ctrl_mldv2_is_in_test
}
+check_group()
+{
+ local group=$1; shift
+ local vid=$1; shift
+ local should_fail=$1; shift
+ local when=$1; shift
+ local -a vidkws
+
+ if ((vid)); then
+ vidkws=(vid "$vid")
+ fi
+
+ bridge mdb get dev br0 grp "$group" "${vidkws[@]}" 2>/dev/null |
+ grep -q "port $swp1"
+ check_err_fail "$should_fail" $? "$group seen $when snooping disable:"
+}
+
+__disable_test()
+{
+ local vid=$1; shift
+ local what=$1; shift
+ local -a vidkws
+
+ if ((vid)); then
+ vidkws=(vid "$vid")
+ fi
+
+ RET=0
+
+ bridge mdb add dev br0 port "$swp1" grp ff0e::1 permanent \
+ "${vidkws[@]}" filter_mode include source_list 2001:db8:1::1
+ bridge mdb add dev br0 port "$swp1" grp ff0e::2 permanent \
+ "${vidkws[@]}" filter_mode exclude
+
+ bridge mdb add dev br0 port "$swp1" grp ff0e::3 \
+ "${vidkws[@]}" filter_mode include source_list 2001:db8:1::2
+ bridge mdb add dev br0 port "$swp1" grp ff0e::4 \
+ "${vidkws[@]}" filter_mode exclude
+
+ bridge mdb add dev br0 port "$swp1" grp 239.1.1.1 permanent \
+ "${vidkws[@]}" filter_mode include source_list 192.0.2.1
+ bridge mdb add dev br0 port "$swp1" grp 239.1.1.2 permanent \
+ "${vidkws[@]}" filter_mode exclude
+
+ bridge mdb add dev br0 port "$swp1" grp 239.1.1.3 \
+ "${vidkws[@]}" filter_mode include source_list 192.0.2.2
+ bridge mdb add dev br0 port "$swp1" grp 239.1.1.4 \
+ "${vidkws[@]}" filter_mode exclude
+
+ check_group ff0e::1 "$vid" 0 "before"
+ check_group ff0e::2 "$vid" 0 "before"
+ check_group ff0e::3 "$vid" 0 "before"
+ check_group ff0e::4 "$vid" 0 "before"
+
+ check_group 239.1.1.1 "$vid" 0 "before"
+ check_group 239.1.1.2 "$vid" 0 "before"
+ check_group 239.1.1.3 "$vid" 0 "before"
+ check_group 239.1.1.4 "$vid" 0 "before"
+
+ ip link set dev br0 type bridge mcast_snooping 0
+
+ check_group ff0e::1 "$vid" 0 "after"
+ check_group ff0e::2 "$vid" 0 "after"
+ check_group ff0e::3 "$vid" 1 "after"
+ check_group ff0e::4 "$vid" 1 "after"
+
+ check_group 239.1.1.1 "$vid" 0 "after"
+ check_group 239.1.1.2 "$vid" 0 "after"
+ check_group 239.1.1.3 "$vid" 1 "after"
+ check_group 239.1.1.4 "$vid" 1 "after"
+
+ log_test "$what: Flush after disable"
+
+ ip link set dev br0 type bridge mcast_snooping 1
+ sleep 10
+}
+
+disable_test()
+{
+ __disable_test 10 802.1q
+
+ switch_destroy
+ switch_create 0
+ setup_wait
+
+ __disable_test 0 802.1d
+
+ switch_destroy
+ switch_create 1
+ setup_wait
+}
+
if ! bridge mdb help 2>&1 | grep -q "flush"; then
echo "SKIP: iproute2 too old, missing bridge mdb flush support"
exit $ksft_skip
diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index f84ab2e65754..4cacef5a813a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,10 +1,23 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
- mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
- mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
- mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test"
+ALL_TESTS="
+ mldv2include_test
+ mldv2inc_allow_test
+ mldv2inc_is_include_test
+ mldv2inc_is_exclude_test
+ mldv2inc_to_exclude_test
+ mldv2exc_allow_test
+ mldv2exc_is_include_test
+ mldv2exc_is_exclude_test
+ mldv2exc_to_exclude_test
+ mldv2inc_block_test
+ mldv2exc_block_test
+ mldv2exc_timeout_test
+ mldv2star_ex_auto_add_test
+ mldv2per_vlan_snooping_port_stp_test
+ mldv2per_vlan_snooping_vlan_stp_test
+"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="ff02::cc"
@@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test()
mldv2cleanup $swp2
}
+mldv2per_vlan_snooping_stp_test()
+{
+ local is_port=$1
+
+ local msg="port"
+ [[ $is_port -ne 1 ]] && msg="vlan"
+
+ ip link set br0 up type bridge vlan_filtering 1 \
+ mcast_mld_version 2 \
+ mcast_snooping 1 \
+ mcast_vlan_snooping 1 \
+ mcast_querier 1 \
+ mcast_stats_enabled 1
+ bridge vlan global set vid 1 dev br0 \
+ mcast_mld_version 2 \
+ mcast_snooping 1 \
+ mcast_querier 1 \
+ mcast_query_interval 100 \
+ mcast_startup_query_count 0
+
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+ sleep 5
+ local tx_s=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+ sleep 5
+ local tx_e=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+
+ RET=0
+ local tx=$(expr $tx_e - $tx_s)
+ test $tx -gt 0
+ check_err $? "No MLD queries after STP state becomes forwarding"
+ log_test "per vlan snooping with $msg stp state change"
+
+ # restore settings
+ bridge vlan global set vid 1 dev br0 \
+ mcast_querier 0 \
+ mcast_query_interval 12500 \
+ mcast_startup_query_count 2 \
+ mcast_mld_version 1
+ ip link set br0 up type bridge vlan_filtering 0 \
+ mcast_vlan_snooping 0 \
+ mcast_stats_enabled 0
+}
+
+mldv2per_vlan_snooping_port_stp_test()
+{
+ mldv2per_vlan_snooping_stp_test 1
+}
+
+mldv2per_vlan_snooping_vlan_stp_test()
+{
+ mldv2per_vlan_snooping_stp_test 0
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 64bd00fe9a4f..e59fba366a0a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged"
NUM_NETIFS=4
CHECK_TC="yes"
source lib.sh
@@ -142,6 +142,152 @@ extern_learn()
bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null
}
+other_tpid()
+{
+ local mac=de:ad:be:ef:13:37
+
+ # Test that packets with TPID 802.1ad VID 3 + TPID 802.1Q VID 5 are
+ # classified as untagged by a bridge with vlan_protocol 802.1Q, and
+ # are processed in the PVID of the ingress port (here 1). Not VID 3,
+ # and not VID 5.
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ tc filter add dev $h2 ingress protocol all pref 1 handle 101 \
+ flower dst_mac $mac action drop
+ ip link set $h2 promisc on
+ ethtool -K $h2 rx-vlan-filter off rx-vlan-stag-filter off
+
+ $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa"
+ sleep 1
+
+ # Match on 'self' addresses as well, for those drivers which
+ # do not push their learned addresses to the bridge software
+ # database
+ bridge -j fdb show $swp1 | \
+ jq -e ".[] | select(.mac == \"$(mac_get $h1)\") | select(.vlan == 1)" &> /dev/null
+ check_err $? "FDB entry was not learned when it should"
+
+ log_test "FDB entry in PVID for VLAN-tagged with other TPID"
+
+ RET=0
+ tc -j -s filter show dev $h2 ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err $? "Packet was not forwarded when it should"
+ log_test "Reception of VLAN with other TPID as untagged"
+
+ bridge vlan del dev $swp1 vid 1
+
+ $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa"
+ sleep 1
+
+ RET=0
+ tc -j -s filter show dev $h2 ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err $? "Packet was forwarded when should not"
+ log_test "Reception of VLAN with other TPID as untagged (no PVID)"
+
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
+8021p_do()
+{
+ local should_fail=$1; shift
+ local mac=de:ad:be:ef:13:37
+
+ tc filter add dev $h2 ingress protocol all pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa"
+ sleep 1
+
+ tc -j -s filter show dev $h2 ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err_fail $should_fail $? "802.1p-tagged reception"
+
+ tc filter del dev $h2 ingress pref 1
+}
+
+8021p()
+{
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ ip link set $h2 promisc on
+
+ # Test that with the default_pvid, 1, packets tagged with VID 0 are
+ # accepted.
+ 8021p_do 0
+
+ # Test that packets tagged with VID 0 are still accepted after changing
+ # the default_pvid.
+ ip link set br0 type bridge vlan_default_pvid 10
+ 8021p_do 0
+
+ log_test "Reception of 802.1p-tagged traffic"
+
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
+send_untagged_and_8021p()
+{
+ ping_do $h1 192.0.2.2
+ check_fail $?
+
+ 8021p_do 1
+}
+
+drop_untagged()
+{
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ ip link set $h2 promisc on
+
+ # Test that with no PVID, untagged and 802.1p-tagged traffic is
+ # dropped.
+ ip link set br0 type bridge vlan_default_pvid 1
+
+ # First we reconfigure the default_pvid, 1, as a non-PVID VLAN.
+ bridge vlan add dev $swp1 vid 1 untagged
+ send_untagged_and_8021p
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+
+ # Next we try to delete VID 1 altogether
+ bridge vlan del dev $swp1 vid 1
+ send_untagged_and_8021p
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+
+ # Set up the bridge without a default_pvid, then check that the 8021q
+ # module, when the bridge port goes down and then up again, does not
+ # accidentally re-enable untagged packet reception.
+ ip link set br0 type bridge vlan_default_pvid 0
+ ip link set $swp1 down
+ ip link set $swp1 up
+ setup_wait
+ send_untagged_and_8021p
+
+ # Remove swp1 as a bridge port and let it rejoin the bridge while it
+ # has no default_pvid.
+ ip link set $swp1 nomaster
+ ip link set $swp1 master br0
+ send_untagged_and_8021p
+
+ # Restore settings
+ ip link set br0 type bridge vlan_default_pvid 1
+
+ log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID"
+
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index 1c8a26046589..2b5700b61ffa 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding pvid_change"
NUM_NETIFS=4
source lib.sh
@@ -77,12 +77,16 @@ cleanup()
ping_ipv4()
{
- ping_test $h1 192.0.2.2
+ local msg=$1
+
+ ping_test $h1 192.0.2.2 "$msg"
}
ping_ipv6()
{
- ping6_test $h1 2001:db8:1::2
+ local msg=$1
+
+ ping6_test $h1 2001:db8:1::2 "$msg"
}
learning()
@@ -95,6 +99,21 @@ flooding()
flood_test $swp2 $h1 $h2
}
+pvid_change()
+{
+ # Test that the changing of the VLAN-aware PVID does not affect
+ # VLAN-unaware forwarding
+ bridge vlan add vid 3 dev $swp1 pvid untagged
+
+ ping_ipv4 " with bridge port $swp1 PVID changed"
+ ping_ipv6 " with bridge port $swp1 PVID changed"
+
+ bridge vlan del vid 3 dev $swp1
+
+ ping_ipv4 " with bridge port $swp1 PVID deleted"
+ ping_ipv6 " with bridge port $swp1 PVID deleted"
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index 8d7a1a004b7c..ce64518aaa11 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -1,23 +1,23 @@
+CONFIG_BPF_SYSCALL=y
CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_IGMP_SNOOPING=y
CONFIG_BRIDGE_VLAN_FILTERING=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_NET_VRF=m
-CONFIG_BPF_SYSCALL=y
CONFIG_CGROUP_BPF=y
CONFIG_DUMMY=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
CONFIG_IPV6=y
CONFIG_IPV6_GRE=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_PIMSM_V2=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
CONFIG_MACVLAN=m
+CONFIG_NAMESPACES=y
CONFIG_NET_ACT_CT=m
+CONFIG_NET_ACT_GACT=m
CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_MPLS=m
CONFIG_NET_ACT_PEDIT=m
@@ -26,29 +26,30 @@ CONFIG_NET_ACT_SAMPLE=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_CLS_MATCHALL=m
-CONFIG_NET_CLS_BASIC=m
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_META=m
+CONFIG_NETFILTER=y
CONFIG_NET_IPGRE=m
CONFIG_NET_IPGRE_DEMUX=m
CONFIG_NET_IPIP=m
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
CONFIG_NET_SCH_ETS=m
CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_ACT_GACT=m
CONFIG_NET_SCH_PRIO=m
CONFIG_NET_SCH_RED=m
CONFIG_NET_SCH_TBF=m
CONFIG_NET_TC_SKB_EXT=y
CONFIG_NET_TEAM=y
CONFIG_NET_TEAM_MODE_LOADBALANCE=y
-CONFIG_NETFILTER=y
+CONFIG_NET_VRF=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_FLOW_TABLE=m
CONFIG_NF_TABLES=m
CONFIG_VETH=m
-CONFIG_NAMESPACES=y
-CONFIG_NET_NS=y
+CONFIG_VLAN_8021Q=m
CONFIG_VXLAN=m
CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
index 1783c10215e5..5dbfab0e23e3 100755
--- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -224,10 +224,10 @@ send_dst_ipv6()
send_flowlabel()
{
# Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec v$h1 \
- $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1
- done
+ ip vrf exec v$h1 sh -c \
+ "for _ in {1..16384}; do \
+ $PING6 -F 0 -c 1 -q 2001:db8:4::2 >/dev/null 2>&1; \
+ done"
}
send_src_udp6()
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index f1de525cfa55..18afa89ebbcc 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -122,6 +122,8 @@ devlink_reload()
still_pending=$(devlink resource show "$DEVLINK_DEV" | \
grep -c "size_new")
check_err $still_pending "Failed reload - There are still unset sizes"
+
+ udevadm settle
}
declare -A DEVLINK_ORIG
@@ -499,7 +501,7 @@ devlink_trap_drop_cleanup()
local pref=$1; shift
local handle=$1; shift
- kill $mz_pid && wait $mz_pid &> /dev/null
+ kill_process $mz_pid
tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower
}
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
index 9788bd0f6e8b..b4f17a5bbc61 100755
--- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -319,10 +319,10 @@ send_dst_ipv6()
send_flowlabel()
{
# Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec v$h1 \
- $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
- done
+ ip vrf exec v$h1 sh -c \
+ "for _ in {1..16384}; do \
+ $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \
+ done"
}
send_src_udp6()
diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
index 49fa94b53a1c..25036e38043c 100755
--- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
+++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
@@ -95,7 +95,7 @@ ipv6_in_too_big_err()
# Send too big packets
ip vrf exec $vrf_name \
- $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+ $PING6 -s 1300 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null
local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors)
test "$((t1 - t0))" -ne 0
@@ -131,7 +131,7 @@ ipv6_in_addr_err()
# Disable forwarding temporary while sending the packet
sysctl -qw net.ipv6.conf.all.forwarding=0
ip vrf exec $vrf_name \
- $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+ $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null
sysctl -qw net.ipv6.conf.all.forwarding=1
local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors)
@@ -150,7 +150,7 @@ ipv6_in_discard()
# Add a policy to discard
ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block
ip vrf exec $vrf_name \
- $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+ $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null
ip xfrm policy del dst 2001:1:2::2/128 dir fwd
local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards)
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
index 2ab9eaaa5532..b24acfa52a3a 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -321,10 +321,10 @@ send_dst_ipv6()
send_flowlabel()
{
# Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec v$h1 \
- $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
- done
+ ip vrf exec v$h1 sh -c \
+ "for _ in {1..16384}; do \
+ $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \
+ done"
}
send_src_udp6()
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh
index 96c97064f2d3..becc7c3fc809 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh
@@ -8,6 +8,7 @@
ALL_TESTS="
gre_flat
gre_mtu_change
+ gre_flat_remote_change
"
NUM_NETIFS=6
@@ -44,6 +45,19 @@ gre_mtu_change()
test_mtu_change
}
+gre_flat_remote_change()
+{
+ flat_remote_change
+
+ test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 (new remote)"
+ test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 (new remote)"
+
+ flat_remote_restore
+
+ test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 (old remote)"
+ test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 (old remote)"
+}
+
cleanup()
{
pre_cleanup
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh
index ff9fb0db9bd1..e5335116a2fd 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh
@@ -8,6 +8,7 @@
ALL_TESTS="
gre_flat
gre_mtu_change
+ gre_flat_remote_change
"
NUM_NETIFS=6
@@ -44,6 +45,19 @@ gre_mtu_change()
test_mtu_change
}
+gre_flat_remote_change()
+{
+ flat_remote_change
+
+ test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key (new remote)"
+ test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key (new remote)"
+
+ flat_remote_restore
+
+ test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key (old remote)"
+ test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key (old remote)"
+}
+
cleanup()
{
pre_cleanup
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh
index 12c138785242..7e0cbfdefab0 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh
@@ -8,6 +8,7 @@
ALL_TESTS="
gre_flat
gre_mtu_change
+ gre_flat_remote_change
"
NUM_NETIFS=6
@@ -44,6 +45,19 @@ gre_mtu_change()
test_mtu_change gre
}
+gre_flat_remote_change()
+{
+ flat_remote_change
+
+ test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey (new remote)"
+ test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey (new remote)"
+
+ flat_remote_restore
+
+ test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey (old remote)"
+ test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey (old remote)"
+}
+
cleanup()
{
pre_cleanup
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh
index 83b55c30a5c3..e0844495f3d1 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh
@@ -8,6 +8,7 @@
ALL_TESTS="
gre_hier
gre_mtu_change
+ gre_hier_remote_change
"
NUM_NETIFS=6
@@ -44,6 +45,19 @@ gre_mtu_change()
test_mtu_change gre
}
+gre_hier_remote_change()
+{
+ hier_remote_change
+
+ test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 (new remote)"
+ test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 (new remote)"
+
+ hier_remote_restore
+
+ test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 (old remote)"
+ test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 (old remote)"
+}
+
cleanup()
{
pre_cleanup
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh
index 256607916d92..741bc9c928eb 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh
@@ -8,6 +8,7 @@
ALL_TESTS="
gre_hier
gre_mtu_change
+ gre_hier_remote_change
"
NUM_NETIFS=6
@@ -44,6 +45,19 @@ gre_mtu_change()
test_mtu_change gre
}
+gre_hier_remote_change()
+{
+ hier_remote_change
+
+ test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key (new remote)"
+ test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key (new remote)"
+
+ hier_remote_restore
+
+ test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key (old remote)"
+ test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key (old remote)"
+}
+
cleanup()
{
pre_cleanup
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh
index ad1bcd6334a8..ad9eab4b1367 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh
@@ -8,6 +8,7 @@
ALL_TESTS="
gre_hier
gre_mtu_change
+ gre_hier_remote_change
"
NUM_NETIFS=6
@@ -44,6 +45,19 @@ gre_mtu_change()
test_mtu_change gre
}
+gre_hier_remote_change()
+{
+ hier_remote_change
+
+ test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey (new remote)"
+ test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey (new remote)"
+
+ hier_remote_restore
+
+ test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey (old remote)"
+ test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey (old remote)"
+}
+
cleanup()
{
pre_cleanup
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
index 24f4ab328bd2..2d91281dc5b7 100644
--- a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
@@ -436,3 +436,83 @@ test_mtu_change()
check_err $?
log_test "ping GRE IPv6, packet size 1800 after MTU change"
}
+
+topo_flat_remote_change()
+{
+ local old1=$1; shift
+ local new1=$1; shift
+ local old2=$1; shift
+ local new2=$1; shift
+
+ ip link set dev g1a type ip6gre local $new1 remote $new2
+ __addr_add_del g1a add "$new1/128"
+ __addr_add_del g1a del "$old1/128"
+ ip -6 route add $new2/128 via 2001:db8:10::2
+ ip -6 route del $old2/128
+
+ ip link set dev g2a type ip6gre local $new2 remote $new1
+ __addr_add_del g2a add "$new2/128"
+ __addr_add_del g2a del "$old2/128"
+ ip -6 route add vrf v$ol2 $new1/128 via 2001:db8:10::1
+ ip -6 route del vrf v$ol2 $old1/128
+}
+
+flat_remote_change()
+{
+ local old1=2001:db8:3::1
+ local new1=2001:db8:3::10
+ local old2=2001:db8:3::2
+ local new2=2001:db8:3::20
+
+ topo_flat_remote_change $old1 $new1 $old2 $new2
+}
+
+flat_remote_restore()
+{
+ local old1=2001:db8:3::10
+ local new1=2001:db8:3::1
+ local old2=2001:db8:3::20
+ local new2=2001:db8:3::2
+
+ topo_flat_remote_change $old1 $new1 $old2 $new2
+}
+
+topo_hier_remote_change()
+{
+ local old1=$1; shift
+ local new1=$1; shift
+ local old2=$1; shift
+ local new2=$1; shift
+
+ __addr_add_del dummy1 del "$old1/64"
+ __addr_add_del dummy1 add "$new1/64"
+ ip link set dev g1a type ip6gre local $new1 remote $new2
+ ip -6 route add vrf v$ul1 $new2/128 via 2001:db8:10::2
+ ip -6 route del vrf v$ul1 $old2/128
+
+ __addr_add_del dummy2 del "$old2/64"
+ __addr_add_del dummy2 add "$new2/64"
+ ip link set dev g2a type ip6gre local $new2 remote $new1
+ ip -6 route add vrf v$ul2 $new1/128 via 2001:db8:10::1
+ ip -6 route del vrf v$ul2 $old1/128
+}
+
+hier_remote_change()
+{
+ local old1=2001:db8:3::1
+ local new1=2001:db8:3::10
+ local old2=2001:db8:3::2
+ local new2=2001:db8:3::20
+
+ topo_hier_remote_change $old1 $new1 $old2 $new2
+}
+
+hier_remote_restore()
+{
+ local old1=2001:db8:3::10
+ local new1=2001:db8:3::1
+ local old2=2001:db8:3::20
+ local new2=2001:db8:3::2
+
+ topo_hier_remote_change $old1 $new1 $old2 $new2
+}
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index ff96bb7535ff..a9034f0bb58b 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -37,6 +37,7 @@ declare -A NETIFS=(
: "${TEAMD:=teamd}"
: "${MCD:=smcrouted}"
: "${MC_CLI:=smcroutectl}"
+: "${MCD_TABLE_NAME:=selftests}"
# Constants for netdevice bring-up:
# Default time in seconds to wait for an interface to come up before giving up
@@ -48,7 +49,6 @@ declare -A NETIFS=(
: "${WAIT_TIME:=5}"
# Whether to pause on, respectively, after a failure and before cleanup.
-: "${PAUSE_ON_FAIL:=no}"
: "${PAUSE_ON_CLEANUP:=no}"
# Whether to create virtual interfaces, and what netdevice type they should be.
@@ -69,6 +69,7 @@ declare -A NETIFS=(
: "${REQUIRE_JQ:=yes}"
: "${REQUIRE_MZ:=yes}"
: "${REQUIRE_MTOOLS:=no}"
+: "${REQUIRE_TEAMD:=no}"
# Whether to override MAC addresses on interfaces participating in the test.
: "${STABLE_MAC_ADDRS:=no}"
@@ -141,6 +142,20 @@ check_tc_version()
fi
}
+check_tc_erspan_support()
+{
+ local dev=$1; shift
+
+ tc filter add dev $dev ingress pref 1 handle 1 flower \
+ erspan_opts 1:0:0:0 &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing erspan support"
+ return $ksft_skip
+ fi
+ tc filter del dev $dev ingress pref 1 handle 1 flower \
+ erspan_opts 1:0:0:0 &> /dev/null
+}
+
# Old versions of tc don't understand "mpls_uc"
check_tc_mpls_support()
{
@@ -291,16 +306,6 @@ if [[ "$CHECK_TC" = "yes" ]]; then
check_tc_version
fi
-require_command()
-{
- local cmd=$1; shift
-
- if [[ ! -x "$(command -v "$cmd")" ]]; then
- echo "SKIP: $cmd not installed"
- exit $ksft_skip
- fi
-}
-
# IPv6 support was added in v3.0
check_mtools_version()
{
@@ -322,6 +327,9 @@ fi
if [[ "$REQUIRE_MZ" = "yes" ]]; then
require_command $MZ
fi
+if [[ "$REQUIRE_TEAMD" = "yes" ]]; then
+ require_command $TEAMD
+fi
if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
# https://github.com/troglobit/mtools
require_command msend
@@ -446,179 +454,6 @@ done
##############################################################################
# Helpers
-# Exit status to return at the end. Set in case one of the tests fails.
-EXIT_STATUS=0
-# Per-test return value. Clear at the beginning of each test.
-RET=0
-
-ret_set_ksft_status()
-{
- local ksft_status=$1; shift
- local msg=$1; shift
-
- RET=$(ksft_status_merge $RET $ksft_status)
- if (( $? )); then
- retmsg=$msg
- fi
-}
-
-# Whether FAILs should be interpreted as XFAILs. Internal.
-FAIL_TO_XFAIL=
-
-check_err()
-{
- local err=$1
- local msg=$2
-
- if ((err)); then
- if [[ $FAIL_TO_XFAIL = yes ]]; then
- ret_set_ksft_status $ksft_xfail "$msg"
- else
- ret_set_ksft_status $ksft_fail "$msg"
- fi
- fi
-}
-
-check_fail()
-{
- local err=$1
- local msg=$2
-
- check_err $((!err)) "$msg"
-}
-
-check_err_fail()
-{
- local should_fail=$1; shift
- local err=$1; shift
- local what=$1; shift
-
- if ((should_fail)); then
- check_fail $err "$what succeeded, but should have failed"
- else
- check_err $err "$what failed"
- fi
-}
-
-xfail_on_slow()
-{
- if [[ $KSFT_MACHINE_SLOW = yes ]]; then
- FAIL_TO_XFAIL=yes "$@"
- else
- "$@"
- fi
-}
-
-xfail_on_veth()
-{
- local dev=$1; shift
- local kind
-
- kind=$(ip -j -d link show dev $dev |
- jq -r '.[].linkinfo.info_kind')
- if [[ $kind = veth ]]; then
- FAIL_TO_XFAIL=yes "$@"
- else
- "$@"
- fi
-}
-
-log_test_result()
-{
- local test_name=$1; shift
- local opt_str=$1; shift
- local result=$1; shift
- local retmsg=$1; shift
-
- printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result"
- if [[ $retmsg ]]; then
- printf "\t%s\n" "$retmsg"
- fi
-}
-
-pause_on_fail()
-{
- if [[ $PAUSE_ON_FAIL == yes ]]; then
- echo "Hit enter to continue, 'q' to quit"
- read a
- [[ $a == q ]] && exit 1
- fi
-}
-
-handle_test_result_pass()
-{
- local test_name=$1; shift
- local opt_str=$1; shift
-
- log_test_result "$test_name" "$opt_str" " OK "
-}
-
-handle_test_result_fail()
-{
- local test_name=$1; shift
- local opt_str=$1; shift
-
- log_test_result "$test_name" "$opt_str" FAIL "$retmsg"
- pause_on_fail
-}
-
-handle_test_result_xfail()
-{
- local test_name=$1; shift
- local opt_str=$1; shift
-
- log_test_result "$test_name" "$opt_str" XFAIL "$retmsg"
- pause_on_fail
-}
-
-handle_test_result_skip()
-{
- local test_name=$1; shift
- local opt_str=$1; shift
-
- log_test_result "$test_name" "$opt_str" SKIP "$retmsg"
-}
-
-log_test()
-{
- local test_name=$1
- local opt_str=$2
-
- if [[ $# -eq 2 ]]; then
- opt_str="($opt_str)"
- fi
-
- if ((RET == ksft_pass)); then
- handle_test_result_pass "$test_name" "$opt_str"
- elif ((RET == ksft_xfail)); then
- handle_test_result_xfail "$test_name" "$opt_str"
- elif ((RET == ksft_skip)); then
- handle_test_result_skip "$test_name" "$opt_str"
- else
- handle_test_result_fail "$test_name" "$opt_str"
- fi
-
- EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET)
- return $RET
-}
-
-log_test_skip()
-{
- RET=$ksft_skip retmsg= log_test "$@"
-}
-
-log_test_xfail()
-{
- RET=$ksft_xfail retmsg= log_test "$@"
-}
-
-log_info()
-{
- local msg=$1
-
- echo "INFO: $msg"
-}
-
not()
{
"$@"
@@ -705,9 +540,9 @@ setup_wait_dev_with_timeout()
return 1
}
-setup_wait()
+setup_wait_n()
{
- local num_netifs=${1:-$NUM_NETIFS}
+ local num_netifs=$1; shift
local i
for ((i = 1; i <= num_netifs; ++i)); do
@@ -718,6 +553,11 @@ setup_wait()
sleep $WAIT_TIME
}
+setup_wait()
+{
+ setup_wait_n "$NUM_NETIFS"
+}
+
wait_for_dev()
{
local dev=$1; shift
@@ -731,30 +571,6 @@ wait_for_dev()
fi
}
-cmd_jq()
-{
- local cmd=$1
- local jq_exp=$2
- local jq_opts=$3
- local ret
- local output
-
- output="$($cmd)"
- # it the command fails, return error right away
- ret=$?
- if [[ $ret -ne 0 ]]; then
- return $ret
- fi
- output=$(echo $output | jq -r $jq_opts "$jq_exp")
- ret=$?
- if [[ $ret -ne 0 ]]; then
- return $ret
- fi
- echo $output
- # return success only in case of non-empty output
- [ ! -z "$output" ]
-}
-
pre_cleanup()
{
if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
@@ -783,6 +599,12 @@ vrf_cleanup()
ip -4 rule del pref 32765
}
+adf_vrf_prepare()
+{
+ vrf_prepare
+ defer vrf_cleanup
+}
+
__last_tb_id=0
declare -A __TB_IDS
@@ -895,6 +717,12 @@ simple_if_fini()
vrf_destroy $vrf_name
}
+adf_simple_if_init()
+{
+ simple_if_init "$@"
+ defer simple_if_fini "$@"
+}
+
tunnel_create()
{
local name=$1; shift
@@ -1106,11 +934,37 @@ packets_rate()
echo $(((t1 - t0) / interval))
}
-mac_get()
+ether_addr_to_u64()
{
- local if_name=$1
+ local addr="$1"
+ local order="$((1 << 40))"
+ local val=0
+ local byte
+
+ addr="${addr//:/ }"
+
+ for byte in $addr; do
+ byte="0x$byte"
+ val=$((val + order * byte))
+ order=$((order >> 8))
+ done
- ip -j link show dev $if_name | jq -r '.[]["address"]'
+ printf "0x%x" $val
+}
+
+u64_to_ether_addr()
+{
+ local val=$1
+ local byte
+ local i
+
+ for ((i = 40; i >= 0; i -= 8)); do
+ byte=$(((val & (0xff << i)) >> i))
+ printf "%02x" $byte
+ if [ $i -ne 0 ]; then
+ printf ":"
+ fi
+ done
}
ipv6_lladdr_get()
@@ -1169,6 +1023,12 @@ forwarding_restore()
sysctl_restore net.ipv4.conf.all.forwarding
}
+adf_forwarding_enable()
+{
+ forwarding_enable
+ defer forwarding_restore
+}
+
declare -A MTU_ORIG
mtu_set()
{
@@ -1353,13 +1213,10 @@ matchall_sink_create()
action drop
}
-tests_run()
+cleanup()
{
- local current_test
-
- for current_test in ${TESTS:-$ALL_TESTS}; do
- $current_test
- done
+ pre_cleanup
+ defer_scopes_cleanup
}
multipath_eval()
@@ -1428,8 +1285,8 @@ ping_do()
vrf_name=$(master_name_get $if_name)
ip vrf exec $vrf_name \
- $PING $args $dip -c $PING_COUNT -i 0.1 \
- -w $PING_TIMEOUT &> /dev/null
+ $PING $args -c $PING_COUNT -i 0.1 \
+ -w $PING_TIMEOUT $dip &> /dev/null
}
ping_test()
@@ -1459,8 +1316,8 @@ ping6_do()
vrf_name=$(master_name_get $if_name)
ip vrf exec $vrf_name \
- $PING6 $args $dip -c $PING_COUNT -i 0.1 \
- -w $PING_TIMEOUT &> /dev/null
+ $PING6 $args -c $PING_COUNT -i 0.1 \
+ -w $PING_TIMEOUT $dip &> /dev/null
}
ping6_test()
@@ -1716,8 +1573,9 @@ start_tcp_traffic()
stop_traffic()
{
- # Suppress noise from killing mausezahn.
- { kill %% && wait %%; } 2>/dev/null
+ local pid=${1-%%}; shift
+
+ kill_process "$pid"
}
declare -A cappid
@@ -1913,6 +1771,51 @@ mc_send()
msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
}
+adf_mcd_start()
+{
+ local ifs=("$@")
+
+ local table_name="$MCD_TABLE_NAME"
+ local smcroutedir
+ local pid
+ local if
+ local i
+
+ check_command "$MCD" || return 1
+ check_command "$MC_CLI" || return 1
+
+ smcroutedir=$(mktemp -d)
+ defer rm -rf "$smcroutedir"
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ echo "phyint ${NETIFS[p$i]} enable" >> \
+ "$smcroutedir/$table_name.conf"
+ done
+
+ for if in "${ifs[@]}"; do
+ if ! ip_link_has_flag "$if" MULTICAST; then
+ ip link set dev "$if" multicast on
+ defer ip link set dev "$if" multicast off
+ fi
+
+ echo "phyint $if enable" >> \
+ "$smcroutedir/$table_name.conf"
+ done
+
+ "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \
+ -P "$smcroutedir/$table_name.pid"
+ busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid"
+ pid=$(cat "$smcroutedir/$table_name.pid")
+ defer kill_process "$pid"
+}
+
+mc_cli()
+{
+ local table_name="$MCD_TABLE_NAME"
+
+ "$MC_CLI" -I "$table_name" "$@"
+}
+
start_ip_monitor()
{
local mtype=$1; shift
@@ -2229,3 +2132,22 @@ absval()
echo $((v > 0 ? v : -v))
}
+
+has_unicast_flt()
+{
+ local dev=$1; shift
+ local mac_addr=$(mac_get $dev)
+ local tmp=$(ether_addr_to_u64 $mac_addr)
+ local promisc
+
+ ip link set $dev up
+ ip link add link $dev name macvlan-tmp type macvlan mode private
+ ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1)))
+ ip link set macvlan-tmp up
+
+ promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity')
+
+ ip link del macvlan-tmp
+
+ [[ $promisc == 1 ]] && echo "no" || echo "yes"
+}
diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
index ff2accccaf4d..b4eda6c6199e 100755
--- a/tools/testing/selftests/net/forwarding/lib_sh_test.sh
+++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
@@ -30,6 +30,11 @@ tfail()
do_test "tfail" false
}
+tfail2()
+{
+ do_test "tfail2" false
+}
+
txfail()
{
FAIL_TO_XFAIL=yes do_test "txfail" false
@@ -132,6 +137,8 @@ test_ret()
ret_subtest $ksft_fail "tfail" txfail tfail
ret_subtest $ksft_xfail "txfail" txfail txfail
+
+ ret_subtest $ksft_fail "tfail2" tfail2 tfail
}
exit_status_tests_run()
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index 4b364cdf3ef0..892895659c7e 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -1,11 +1,12 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="standalone bridge"
+ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \
+ vlan_over_vlan_unaware_bridged_port vlan_over_vlan_aware_bridged_port \
+ vlan_over_vlan_unaware_bridge vlan_over_vlan_aware_bridge"
NUM_NETIFS=2
PING_COUNT=1
REQUIRE_MTOOLS=yes
-REQUIRE_MZ=no
source lib.sh
@@ -37,9 +38,68 @@ UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05"
UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06"
UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07"
-NON_IP_MC="01:02:03:04:05:06"
-NON_IP_PKT="00:04 48:45:4c:4f"
-BC="ff:ff:ff:ff:ff:ff"
+PTP_1588_L2_SYNC=" \
+01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 00 02 \
+00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \
+00 00 00 00 00 00 00 00 00 00"
+PTP_1588_L2_FOLLOW_UP=" \
+01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 08 02 \
+00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \
+00 00 66 83 c5 f1 17 97 ed f0"
+PTP_1588_L2_PDELAY_REQ=" \
+01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:f7 02 02 \
+00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 06 05 7f \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 00 00"
+PTP_1588_IPV4_SYNC=" \
+01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \
+00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \
+01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \
+02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \
+63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \
+00 00 00 00 00 00"
+PTP_1588_IPV4_FOLLOW_UP="
+01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \
+00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \
+01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \
+63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \
+c6 0f 1d 9a 61 87"
+PTP_1588_IPV4_PDELAY_REQ=" \
+01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \
+00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \
+00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \
+63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00"
+PTP_1588_IPV6_SYNC=" \
+33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \
+7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \
+00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \
+00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \
+00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \
+00 00 00 00 00 00 00 00 00 00 00 00"
+PTP_1588_IPV6_FOLLOW_UP=" \
+33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \
+00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \
+00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \
+00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \
+00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \
+00 00 66 83 c6 2a 32 09 bd 74 00 00"
+PTP_1588_IPV6_PDELAY_REQ=" \
+33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \
+5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \
+63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \
+00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \
+00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 00 00 00 00"
# Disable promisc to ensure we don't receive unknown MAC DA packets
export TCPDUMP_EXTRA_FLAGS="-pl"
@@ -47,13 +107,15 @@ export TCPDUMP_EXTRA_FLAGS="-pl"
h1=${NETIFS[p1]}
h2=${NETIFS[p2]}
-send_non_ip()
+send_raw()
{
- local if_name=$1
- local smac=$2
- local dmac=$3
+ local if_name=$1; shift
+ local pkt="$1"; shift
+ local smac=$(mac_get $if_name)
+
+ pkt="${pkt/00:00:de:ad:be:ef/$smac}"
- $MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+ $MZ -q $if_name "$pkt"
}
send_uc_ipv4()
@@ -68,10 +130,11 @@ send_uc_ipv4()
check_rcv()
{
- local if_name=$1
- local type=$2
- local pattern=$3
- local should_receive=$4
+ local if_name=$1; shift
+ local type=$1; shift
+ local pattern=$1; shift
+ local should_receive=$1; shift
+ local test_name="$1"; shift
local should_fail=
[ $should_receive = true ] && should_fail=0 || should_fail=1
@@ -81,7 +144,7 @@ check_rcv()
check_err_fail "$should_fail" "$?" "reception"
- log_test "$if_name: $type"
+ log_test "$test_name: $type"
}
mc_route_prepare()
@@ -104,44 +167,80 @@ mc_route_destroy()
run_test()
{
- local rcv_if_name=$1
- local smac=$(mac_get $h1)
+ local send_if_name=$1; shift
+ local rcv_if_name=$1; shift
+ local skip_ptp=$1; shift
+ local no_unicast_flt=$1; shift
+ local test_name="$1"; shift
+ local smac=$(mac_get $send_if_name)
local rcv_dmac=$(mac_get $rcv_if_name)
+ local should_receive
+
+ setup_wait
tcpdump_start $rcv_if_name
- mc_route_prepare $h1
+ mc_route_prepare $send_if_name
mc_route_prepare $rcv_if_name
- send_uc_ipv4 $h1 $rcv_dmac
- send_uc_ipv4 $h1 $MACVLAN_ADDR
- send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1
+ send_uc_ipv4 $send_if_name $rcv_dmac
+ send_uc_ipv4 $send_if_name $MACVLAN_ADDR
+ send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR1
ip link set dev $rcv_if_name promisc on
- send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2
- mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2
- mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2
+ send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR2
+ mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR2
+ mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR2
ip link set dev $rcv_if_name promisc off
mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR
- mc_send $h1 $JOINED_IPV4_MC_ADDR
+ mc_send $send_if_name $JOINED_IPV4_MC_ADDR
mc_leave
mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR
- mc_send $h1 $JOINED_IPV6_MC_ADDR
+ mc_send $send_if_name $JOINED_IPV6_MC_ADDR
mc_leave
- mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1
- mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1
+ mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR1
+ mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR1
ip link set dev $rcv_if_name allmulticast on
- send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3
- mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3
- mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3
+ send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR3
+ mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR3
+ mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR3
ip link set dev $rcv_if_name allmulticast off
mc_route_destroy $rcv_if_name
- mc_route_destroy $h1
+ mc_route_destroy $send_if_name
+
+ if [ $skip_ptp = false ]; then
+ ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name
+ send_raw $send_if_name "$PTP_1588_L2_SYNC"
+ send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP"
+ ip maddress del 01:1b:19:00:00:00 dev $rcv_if_name
+
+ ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name
+ send_raw $send_if_name "$PTP_1588_L2_PDELAY_REQ"
+ ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name
+
+ mc_join $rcv_if_name 224.0.1.129
+ send_raw $send_if_name "$PTP_1588_IPV4_SYNC"
+ send_raw $send_if_name "$PTP_1588_IPV4_FOLLOW_UP"
+ mc_leave
+
+ mc_join $rcv_if_name 224.0.0.107
+ send_raw $send_if_name "$PTP_1588_IPV4_PDELAY_REQ"
+ mc_leave
+
+ mc_join $rcv_if_name ff0e::181
+ send_raw $send_if_name "$PTP_1588_IPV6_SYNC"
+ send_raw $send_if_name "$PTP_1588_IPV6_FOLLOW_UP"
+ mc_leave
+
+ mc_join $rcv_if_name ff02::6b
+ send_raw $send_if_name "$PTP_1588_IPV6_PDELAY_REQ"
+ mc_leave
+ fi
sleep 1
@@ -149,61 +248,99 @@ run_test()
check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \
"$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \
"$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
- xfail_on_veth $h1 \
- check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
- "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
- false
+ [ $no_unicast_flt = true ] && should_receive=true || should_receive=false
+ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
+ "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
+ $should_receive "$test_name"
check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \
"$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
- xfail_on_veth $h1 \
- check_rcv $rcv_if_name \
- "Unicast IPv4 to unknown MAC address, allmulti" \
- "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
- false
+ [ $no_unicast_flt = true ] && should_receive=true || should_receive=false
+ check_rcv $rcv_if_name \
+ "Unicast IPv4 to unknown MAC address, allmulti" \
+ "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
+ $should_receive "$test_name"
check_rcv $rcv_if_name "Multicast IPv4 to joined group" \
"$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
- xfail_on_veth $h1 \
+ xfail \
check_rcv $rcv_if_name \
"Multicast IPv4 to unknown group" \
"$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
- false
+ false "$test_name"
check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \
"$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \
"$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
check_rcv $rcv_if_name "Multicast IPv6 to joined group" \
"$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \
- true
+ true "$test_name"
- xfail_on_veth $h1 \
+ xfail \
check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
"$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
- false
+ false "$test_name"
check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \
"$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \
- true
+ true "$test_name"
check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \
"$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \
- true
+ true "$test_name"
+
+ if [ $skip_ptp = false ]; then
+ check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \
+ true "$test_name"
+ fi
tcpdump_cleanup $rcv_if_name
}
@@ -228,62 +365,217 @@ h2_destroy()
simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
}
+h1_vlan_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 100 v$h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_vlan_destroy()
+{
+ vlan_destroy $h1 100
+ simple_if_fini $h1
+}
+
+h2_vlan_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 100 v$h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_vlan_destroy()
+{
+ vlan_destroy $h2 100
+ simple_if_fini $h2
+}
+
bridge_create()
{
- ip link add br0 type bridge
+ local vlan_filtering=$1
+
+ ip link add br0 type bridge vlan_filtering $vlan_filtering
ip link set br0 address $BRIDGE_ADDR
ip link set br0 up
ip link set $h2 master br0
ip link set $h2 up
-
- simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64
}
bridge_destroy()
{
- simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64
-
ip link del br0
}
-standalone()
+macvlan_create()
{
- h1_create
- h2_create
+ local lower=$1
- ip link add link $h2 name macvlan0 type macvlan mode private
+ ip link add link $lower name macvlan0 type macvlan mode private
ip link set macvlan0 address $MACVLAN_ADDR
ip link set macvlan0 up
+}
- run_test $h2
-
+macvlan_destroy()
+{
ip link del macvlan0
+}
+
+standalone()
+{
+ local no_unicast_flt=true
+ local skip_ptp=false
+ if [ $(has_unicast_flt $h2) = yes ]; then
+ no_unicast_flt=false
+ fi
+
+ h1_create
+ h2_create
+ macvlan_create $h2
+
+ run_test $h1 $h2 $skip_ptp $no_unicast_flt "$h2"
+
+ macvlan_destroy
h2_destroy
h1_destroy
}
-bridge()
+test_bridge()
{
+ local no_unicast_flt=true
+ local vlan_filtering=$1
+ local skip_ptp=true
+
h1_create
- bridge_create
+ bridge_create $vlan_filtering
+ simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64
+ macvlan_create br0
- ip link add link br0 name macvlan0 type macvlan mode private
- ip link set macvlan0 address $MACVLAN_ADDR
- ip link set macvlan0 up
+ run_test $h1 br0 $skip_ptp $no_unicast_flt \
+ "vlan_filtering=$vlan_filtering bridge"
- run_test br0
+ macvlan_destroy
+ simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64
+ bridge_destroy
+ h1_destroy
+}
- ip link del macvlan0
+vlan_unaware_bridge()
+{
+ test_bridge 0
+}
+
+vlan_aware_bridge()
+{
+ test_bridge 1
+}
+
+test_vlan()
+{
+ local no_unicast_flt=true
+ local skip_ptp=false
+
+ if [ $(has_unicast_flt $h2) = yes ]; then
+ no_unicast_flt=false
+ fi
+
+ h1_vlan_create
+ h2_vlan_create
+ macvlan_create $h2.100
+ run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt "VLAN upper"
+
+ macvlan_destroy
+ h2_vlan_destroy
+ h1_vlan_destroy
+}
+
+vlan_over_bridged_port()
+{
+ local no_unicast_flt=true
+ local vlan_filtering=$1
+ local skip_ptp=false
+
+ # br_manage_promisc() will not force a single vlan_filtering port to
+ # promiscuous mode, so we should still expect unicast filtering to take
+ # place if the device can do it.
+ if [ $(has_unicast_flt $h2) = yes ] && [ $vlan_filtering = 1 ]; then
+ no_unicast_flt=false
+ fi
+
+ h1_vlan_create
+ h2_vlan_create
+ bridge_create $vlan_filtering
+ macvlan_create $h2.100
+
+ run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt \
+ "VLAN over vlan_filtering=$vlan_filtering bridged port"
+
+ macvlan_destroy
bridge_destroy
- h1_destroy
+ h2_vlan_destroy
+ h1_vlan_destroy
+}
+
+vlan_over_vlan_unaware_bridged_port()
+{
+ vlan_over_bridged_port 0
+}
+
+vlan_over_vlan_aware_bridged_port()
+{
+ vlan_over_bridged_port 1
+}
+
+vlan_over_bridge()
+{
+ local no_unicast_flt=true
+ local vlan_filtering=$1
+ local skip_ptp=true
+
+ h1_vlan_create
+ bridge_create $vlan_filtering
+ simple_if_init br0
+ vlan_create br0 100 vbr0 $H2_IPV4/24 $H2_IPV6/64
+ macvlan_create br0.100
+
+ if [ $vlan_filtering = 1 ]; then
+ bridge vlan add dev $h2 vid 100 master
+ bridge vlan add dev br0 vid 100 self
+ fi
+
+ run_test $h1.100 br0.100 $skip_ptp $no_unicast_flt \
+ "VLAN over vlan_filtering=$vlan_filtering bridge"
+
+ if [ $vlan_filtering = 1 ]; then
+ bridge vlan del dev br0 vid 100 self
+ bridge vlan del dev $h2 vid 100 master
+ fi
+
+ macvlan_destroy
+ vlan_destroy br0 100
+ simple_if_fini br0
+ bridge_destroy
+ h1_vlan_destroy
+}
+
+vlan_over_vlan_unaware_bridge()
+{
+ vlan_over_bridge 0
+}
+
+vlan_over_vlan_aware_bridge()
+{
+ vlan_over_bridge 1
}
cleanup()
{
pre_cleanup
+
+ ip link set $h2 down
+ ip link set $h1 down
+
vrf_cleanup
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
index fe4d7c906a70..8d4ae6c952a1 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -49,6 +49,7 @@ ALL_TESTS="
test_mirror_gretap_second
"
+REQUIRE_TEAMD="yes"
NUM_NETIFS=6
source lib.sh
source mirror_lib.sh
@@ -237,7 +238,7 @@ test_lag_slave()
ip neigh flush dev br1
setup_wait_dev $up_dev
setup_wait_dev $host_dev
- $ARPING -I br1 192.0.2.130 -qfc 1
+ $ARPING -I br1 -qfc 1 192.0.2.130
sleep 2
mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10"
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
index 1261e6f46e34..ff7049582d35 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
@@ -53,6 +53,7 @@ ALL_TESTS="
test_mirror_gretap_second
"
+REQUIRE_TEAMD="yes"
NUM_NETIFS=6
source lib.sh
source mirror_lib.sh
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
index 1b902cc579f6..a21c771908b3 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -196,7 +196,7 @@ test_span_gre_forbidden_egress()
bridge vlan add dev $swp3 vid 555
# Re-prime FDB
- $ARPING -I br1.555 192.0.2.130 -fqc 1
+ $ARPING -I br1.555 -fqc 1 192.0.2.130
sleep 1
quick_test_span_gre_dir $tundev
@@ -290,7 +290,7 @@ test_span_gre_fdb_roaming()
bridge fdb del dev $swp2 $h3mac vlan 555 master 2>/dev/null
# Re-prime FDB
- $ARPING -I br1.555 192.0.2.130 -fqc 1
+ $ARPING -I br1.555 -fqc 1 192.0.2.130
sleep 1
quick_test_span_gre_dir $tundev
diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh
index af3b398d13f0..694ece9ba3a7 100755
--- a/tools/testing/selftests/net/forwarding/no_forwarding.sh
+++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh
@@ -202,7 +202,7 @@ one_bridge_two_pvids()
ip link set $swp2 master br0
bridge vlan add dev $swp1 vid 1 pvid untagged
- bridge vlan add dev $swp1 vid 2 pvid untagged
+ bridge vlan add dev $swp2 vid 2 pvid untagged
run_test "Switch ports in VLAN-aware bridge with different PVIDs"
@@ -233,6 +233,9 @@ cleanup()
{
pre_cleanup
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
h2_destroy
h1_destroy
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index b98ea9449b8b..dfb6646cb97b 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -18,6 +18,8 @@
# | 2001:db8:1::1/64 2001:db8:2::1/64 |
# | |
# +-----------------------------------------------------------------+
+#
+#shellcheck disable=SC2034 # SC doesn't see our uses of global variables
ALL_TESTS="
ping_ipv4
@@ -27,6 +29,7 @@ ALL_TESTS="
ipv4_sip_equal_dip
ipv6_sip_equal_dip
ipv4_dip_link_local
+ ipv4_sip_link_local
"
NUM_NETIFS=4
@@ -330,6 +333,32 @@ ipv4_dip_link_local()
tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
}
+ipv4_sip_link_local()
+{
+ local sip=169.254.1.1
+
+ RET=0
+
+ # Disable rpfilter to prevent packets to be dropped because of it.
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf."$rp1".rp_filter 0
+
+ tc filter add dev "$rp2" egress protocol ip pref 1 handle 101 \
+ flower src_ip "$sip" action pass
+
+ $MZ "$h1" -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b "$rp1mac" \
+ -A "$sip" -B 198.51.100.2 -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "IPv4 source IP is link-local"
+
+ tc filter del dev "$rp2" egress protocol ip pref 1 handle 101 flower
+ sysctl_restore net.ipv4.conf."$rp1".rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
index e064b946e821..16583a470ec3 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
@@ -109,6 +109,7 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
"
+REQUIRE_TEAMD="yes"
NUM_NETIFS=8
source lib.sh
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
index f05ffe213c46..2a4cd1af1b85 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
@@ -76,6 +76,7 @@
ping_ipv4
ping_ipv6
"}
+REQUIRE_TEAMD="yes"
NUM_NETIFS=8
: ${lib_dir:=.}
source $lib_dir/lib.sh
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 2ba44247c60a..a7d8399c8d4f 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -40,6 +40,7 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
multipath_test
+ multipath16_test
ping_ipv4_blackhole
ping_ipv6_blackhole
nh_stats_test_v4
@@ -226,9 +227,11 @@ routing_nh_obj()
multipath4_test()
{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
+ local desc=$1; shift
+ local weight_rp12=$1; shift
+ local weight_rp13=$1; shift
+ local ports=${1-sp=1024,dp=0-32768}; shift
+
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
@@ -242,7 +245,8 @@ multipath4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "$ports"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -258,9 +262,11 @@ multipath4_test()
multipath6_test()
{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
+ local desc=$1; shift
+ local weight_rp12=$1; shift
+ local weight_rp13=$1; shift
+ local ports=${1-sp=1024,dp=0-32768}; shift
+
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
@@ -275,7 +281,8 @@ multipath6_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "$ports"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -313,6 +320,23 @@ multipath_test()
multipath6_test "Weighted MP 11:45" 11 45
}
+multipath16_test()
+{
+ check_nhgw16 104 || return
+
+ log_info "Running 16-bit IPv4 multipath tests"
+ multipath4_test "65535:65535" 65535 65535
+ multipath4_test "128:512" 128 512
+ omit_on_slow \
+ multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+
+ log_info "Running 16-bit IPv6 multipath tests"
+ multipath6_test "65535:65535" 65535 65535
+ multipath6_test "128:512" 128 512
+ omit_on_slow \
+ multipath6_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+}
+
ping_ipv4_blackhole()
{
RET=0
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
index 2903294d8bca..507b2852dabe 100644
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
@@ -117,3 +117,16 @@ __nh_stats_test_v6()
$MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2
sysctl_restore net.ipv6.fib_multipath_hash_policy
}
+
+check_nhgw16()
+{
+ local nhid=$1; shift
+
+ ip nexthop replace id 9999 group "$nhid,65535" &>/dev/null
+ if (( $? )); then
+ log_test_skip "16-bit multipath tests" \
+ "iproute2 or the kernel do not support 16-bit next hop weights"
+ return 1
+ fi
+ ip nexthop del id 9999 ||:
+}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index cd9e346436fc..88ddae05b39d 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -40,6 +40,7 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
multipath_test
+ multipath16_test
nh_stats_test_v4
nh_stats_test_v6
"
@@ -228,9 +229,11 @@ routing_nh_obj()
multipath4_test()
{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
+ local desc=$1; shift
+ local weight_rp12=$1; shift
+ local weight_rp13=$1; shift
+ local ports=${1-sp=1024,dp=0-32768}; shift
+
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
@@ -243,7 +246,8 @@ multipath4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "$ports"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -258,9 +262,11 @@ multipath4_test()
multipath6_l4_test()
{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
+ local desc=$1; shift
+ local weight_rp12=$1; shift
+ local weight_rp13=$1; shift
+ local ports=${1-sp=1024,dp=0-32768}; shift
+
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
@@ -273,7 +279,8 @@ multipath6_l4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "$ports"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -371,6 +378,41 @@ multipath_test()
ip nexthop replace id 106 group 104,1/105,1 type resilient
}
+multipath16_test()
+{
+ check_nhgw16 104 || return
+
+ log_info "Running 16-bit IPv4 multipath tests"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 0
+
+ ip nexthop replace id 103 group 101,65535/102,65535 type resilient
+ multipath4_test "65535:65535" 65535 65535
+
+ ip nexthop replace id 103 group 101,128/102,512 type resilient
+ multipath4_test "128:512" 128 512
+
+ ip nexthop replace id 103 group 101,255/102,65535 type resilient
+ omit_on_slow \
+ multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running 16-bit IPv6 L4 hash multipath tests"
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 0
+
+ ip nexthop replace id 106 group 104,65535/105,65535 type resilient
+ multipath6_l4_test "65535:65535" 65535 65535
+
+ ip nexthop replace id 106 group 104,128/105,512 type resilient
+ multipath6_l4_test "128:512" 128 512
+
+ ip nexthop replace id 106 group 104,255/105,65535 type resilient
+ omit_on_slow \
+ multipath6_l4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+}
+
nh_stats_test_v4()
{
__nh_stats_test_v4 resilient
diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
index 5a58b1ec8aef..83e52abdbc2e 100755
--- a/tools/testing/selftests/net/forwarding/router_multicast.sh
+++ b/tools/testing/selftests/net/forwarding/router_multicast.sh
@@ -33,10 +33,6 @@ NUM_NETIFS=6
source lib.sh
source tc_common.sh
-require_command $MCD
-require_command $MC_CLI
-table_name=selftests
-
h1_create()
{
simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64
@@ -149,25 +145,6 @@ router_destroy()
ip link set dev $rp1 down
}
-start_mcd()
-{
- SMCROUTEDIR="$(mktemp -d)"
-
- for ((i = 1; i <= $NUM_NETIFS; ++i)); do
- echo "phyint ${NETIFS[p$i]} enable" >> \
- $SMCROUTEDIR/$table_name.conf
- done
-
- $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
- -P $SMCROUTEDIR/$table_name.pid
-}
-
-kill_mcd()
-{
- pkill $MCD
- rm -rf $SMCROUTEDIR
-}
-
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -179,7 +156,7 @@ setup_prepare()
rp3=${NETIFS[p5]}
h3=${NETIFS[p6]}
- start_mcd
+ adf_mcd_start || exit "$EXIT_STATUS"
vrf_prepare
@@ -206,7 +183,7 @@ cleanup()
vrf_cleanup
- kill_mcd
+ defer_scopes_cleanup
}
create_mcast_sg()
@@ -214,9 +191,9 @@ create_mcast_sg()
local if_name=$1; shift
local s_addr=$1; shift
local mcast=$1; shift
- local dest_ifs=${@}
+ local dest_ifs=("${@}")
- $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs
+ mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
}
delete_mcast_sg()
@@ -224,9 +201,9 @@ delete_mcast_sg()
local if_name=$1; shift
local s_addr=$1; shift
local mcast=$1; shift
- local dest_ifs=${@}
+ local dest_ifs=("${@}")
- $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs
+ mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
}
mcast_v4()
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index e2be354167a1..46f365b557b7 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -180,6 +180,7 @@ multipath4_test()
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -217,6 +218,7 @@ multipath6_test()
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh
index e60c8b4818cc..6269d5e23487 100755
--- a/tools/testing/selftests/net/forwarding/sch_ets.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets.sh
@@ -11,6 +11,7 @@ ALL_TESTS="
ets_test_strict
ets_test_mixed
ets_test_dwrr
+ ets_test_plug
classifier_mode
ets_test_strict
ets_test_mixed
@@ -24,15 +25,10 @@ switch_create()
# Create a bottleneck so that the DWRR process can kick in.
tc qdisc add dev $swp2 root handle 1: tbf \
rate 1Gbit burst 1Mbit latency 100ms
+ defer tc qdisc del dev $swp2 root
PARENT="parent 1:"
}
-switch_destroy()
-{
- ets_switch_destroy
- tc qdisc del dev $swp2 root
-}
-
# Callback from sch_ets_tests.sh
collect_stats()
{
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh
index f906fcc66572..0453210271dc 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_core.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh
@@ -165,45 +165,31 @@ h1_create()
{
local i;
- simple_if_init $h1
+ adf_simple_if_init $h1
+
mtu_set $h1 9900
+ defer mtu_restore $h1
+
for i in {0..2}; do
vlan_create $h1 1$i v$h1 $(sip $i)/28
+ defer vlan_destroy $h1 1$i
ip link set dev $h1.1$i type vlan egress 0:$i
done
}
-h1_destroy()
-{
- local i
-
- for i in {0..2}; do
- vlan_destroy $h1 1$i
- done
- mtu_restore $h1
- simple_if_fini $h1
-}
-
h2_create()
{
local i
- simple_if_init $h2
- mtu_set $h2 9900
- for i in {0..2}; do
- vlan_create $h2 1$i v$h2 $(dip $i)/28
- done
-}
+ adf_simple_if_init $h2
-h2_destroy()
-{
- local i
+ mtu_set $h2 9900
+ defer mtu_restore $h2
for i in {0..2}; do
- vlan_destroy $h2 1$i
+ vlan_create $h2 1$i v$h2 $(dip $i)/28
+ defer vlan_destroy $h2 1$i
done
- mtu_restore $h2
- simple_if_fini $h2
}
ets_switch_create()
@@ -211,44 +197,45 @@ ets_switch_create()
local i
ip link set dev $swp1 up
+ defer ip link set dev $swp1 down
+
mtu_set $swp1 9900
+ defer mtu_restore $swp1
ip link set dev $swp2 up
+ defer ip link set dev $swp2 down
+
mtu_set $swp2 9900
+ defer mtu_restore $swp2
for i in {0..2}; do
vlan_create $swp1 1$i
+ defer vlan_destroy $swp1 1$i
ip link set dev $swp1.1$i type vlan ingress 0:0 1:1 2:2
vlan_create $swp2 1$i
+ defer vlan_destroy $swp2 1$i
ip link add dev br1$i type bridge
+ defer ip link del dev br1$i
+
ip link set dev $swp1.1$i master br1$i
+ defer ip link set dev $swp1.1$i nomaster
+
ip link set dev $swp2.1$i master br1$i
+ defer ip link set dev $swp2.1$i nomaster
ip link set dev br1$i up
- ip link set dev $swp1.1$i up
- ip link set dev $swp2.1$i up
- done
-}
+ defer ip link set dev br1$i down
-ets_switch_destroy()
-{
- local i
-
- ets_delete_qdisc
+ ip link set dev $swp1.1$i up
+ defer ip link set dev $swp1.1$i down
- for i in {0..2}; do
- ip link del dev br1$i
- vlan_destroy $swp2 1$i
- vlan_destroy $swp1 1$i
+ ip link set dev $swp2.1$i up
+ defer ip link set dev $swp2.1$i down
done
- mtu_restore $swp2
- ip link set dev $swp2 down
-
- mtu_restore $swp1
- ip link set dev $swp1 down
+ defer ets_delete_qdisc
}
setup_prepare()
@@ -262,24 +249,13 @@ setup_prepare()
put=$swp2
hut=$h2
- vrf_prepare
+ adf_vrf_prepare
h1_create
h2_create
switch_create
}
-cleanup()
-{
- pre_cleanup
-
- switch_destroy
- h2_destroy
- h1_destroy
-
- vrf_cleanup
-}
-
ping_ipv4()
{
ping_test $h1.10 $(dip 0) " vlan 10"
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
index f9d26a7911bb..79d837a2868a 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -90,6 +90,7 @@ __ets_dwrr_test()
for stream in ${streams[@]}; do
ets_start_traffic $stream
+ defer stop_traffic $!
done
sleep 10
@@ -120,25 +121,24 @@ __ets_dwrr_test()
${d[0]} ${d[$i]}
fi
done
-
- for stream in ${streams[@]}; do
- stop_traffic
- done
}
ets_dwrr_test_012()
{
- __ets_dwrr_test 0 1 2
+ in_defer_scope \
+ __ets_dwrr_test 0 1 2
}
ets_dwrr_test_01()
{
- __ets_dwrr_test 0 1
+ in_defer_scope \
+ __ets_dwrr_test 0 1
}
ets_dwrr_test_12()
{
- __ets_dwrr_test 1 2
+ in_defer_scope \
+ __ets_dwrr_test 1 2
}
ets_qdisc_setup()
@@ -224,3 +224,11 @@ ets_test_dwrr()
ets_set_dwrr_two_bands
xfail_on_slow ets_dwrr_test_01
}
+
+ets_test_plug()
+{
+ ets_change_qdisc $put 2 "3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" "1514 1514"
+ tc qdisc add dev $put handle 20: parent 10:4 plug
+ start_traffic_pktsize 100 $h1.10 192.0.2.1 192.0.2.2 00:c1:a0:c1:a0:00 "-c 1"
+ ets_qdisc_setup $put 2
+}
diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
index 17f28644568e..f2a3d9254642 100755
--- a/tools/testing/selftests/net/forwarding/sch_red.sh
+++ b/tools/testing/selftests/net/forwarding/sch_red.sh
@@ -52,72 +52,61 @@ PKTSZ=1400
h1_create()
{
- simple_if_init $h1 192.0.2.1/28
+ adf_simple_if_init $h1 192.0.2.1/28
+
mtu_set $h1 10000
+ defer mtu_restore $h1
+
tc qdisc replace dev $h1 root handle 1: tbf \
rate 10Mbit burst 10K limit 1M
-}
-
-h1_destroy()
-{
- tc qdisc del dev $h1 root
- mtu_restore $h1
- simple_if_fini $h1 192.0.2.1/28
+ defer tc qdisc del dev $h1 root
}
h2_create()
{
- simple_if_init $h2 192.0.2.2/28
- mtu_set $h2 10000
-}
+ adf_simple_if_init $h2 192.0.2.2/28
-h2_destroy()
-{
- mtu_restore $h2
- simple_if_fini $h2 192.0.2.2/28
+ mtu_set $h2 10000
+ defer mtu_restore $h2
}
h3_create()
{
- simple_if_init $h3 192.0.2.3/28
- mtu_set $h3 10000
-}
+ adf_simple_if_init $h3 192.0.2.3/28
-h3_destroy()
-{
- mtu_restore $h3
- simple_if_fini $h3 192.0.2.3/28
+ mtu_set $h3 10000
+ defer mtu_restore $h3
}
switch_create()
{
ip link add dev br up type bridge
+ defer ip link del dev br
+
ip link set dev $swp1 up master br
+ defer ip link set dev $swp1 down nomaster
+
ip link set dev $swp2 up master br
+ defer ip link set dev $swp2 down nomaster
+
ip link set dev $swp3 up master br
+ defer ip link set dev $swp3 down nomaster
mtu_set $swp1 10000
+ defer mtu_restore $h1
+
mtu_set $swp2 10000
+ defer mtu_restore $h2
+
mtu_set $swp3 10000
+ defer mtu_restore $h3
tc qdisc replace dev $swp3 root handle 1: tbf \
rate 10Mbit burst 10K limit 1M
- ip link add name _drop_test up type dummy
-}
+ defer tc qdisc del dev $swp3 root
-switch_destroy()
-{
- ip link del dev _drop_test
- tc qdisc del dev $swp3 root
-
- mtu_restore $h3
- mtu_restore $h2
- mtu_restore $h1
-
- ip link set dev $swp3 down nomaster
- ip link set dev $swp2 down nomaster
- ip link set dev $swp1 down nomaster
- ip link del dev br
+ ip link add name _drop_test up type dummy
+ defer ip link del dev _drop_test
}
setup_prepare()
@@ -133,7 +122,7 @@ setup_prepare()
h3_mac=$(mac_get $h3)
- vrf_prepare
+ adf_vrf_prepare
h1_create
h2_create
@@ -141,18 +130,6 @@ setup_prepare()
switch_create
}
-cleanup()
-{
- pre_cleanup
-
- switch_destroy
- h3_destroy
- h2_destroy
- h1_destroy
-
- vrf_cleanup
-}
-
ping_ipv4()
{
ping_test $h1 192.0.2.3 " from host 1"
@@ -287,6 +264,7 @@ do_ecn_test()
$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
-a own -b $h3_mac -t tcp -q tos=0x01 &
+ defer stop_traffic $!
sleep 1
ecn_test_common "$name" $limit
@@ -298,9 +276,6 @@ do_ecn_test()
build_backlog $((2 * limit)) udp >/dev/null
check_fail $? "UDP traffic went into backlog instead of being early-dropped"
log_test "$name backlog > limit: UDP early-dropped"
-
- stop_traffic
- sleep 1
}
do_ecn_nodrop_test()
@@ -310,6 +285,7 @@ do_ecn_nodrop_test()
$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
-a own -b $h3_mac -t tcp -q tos=0x01 &
+ defer stop_traffic $!
sleep 1
ecn_test_common "$name" $limit
@@ -321,9 +297,6 @@ do_ecn_nodrop_test()
build_backlog $((2 * limit)) udp >/dev/null
check_err $? "UDP traffic was early-dropped instead of getting into backlog"
log_test "$name backlog > limit: UDP not dropped"
-
- stop_traffic
- sleep 1
}
do_red_test()
@@ -336,6 +309,7 @@ do_red_test()
# is above limit.
$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
-a own -b $h3_mac -t tcp -q tos=0x01 &
+ defer stop_traffic $!
# Pushing below the queue limit should work.
RET=0
@@ -352,9 +326,6 @@ do_red_test()
pct=$(check_marking "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
log_test "RED backlog > limit"
-
- stop_traffic
- sleep 1
}
do_red_qevent_test()
@@ -369,6 +340,7 @@ do_red_qevent_test()
$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
-a own -b $h3_mac -t udp -q &
+ defer stop_traffic $!
sleep 1
tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
@@ -396,9 +368,6 @@ do_red_qevent_test()
check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen"
log_test "RED early_dropped packets mirrored"
-
- stop_traffic
- sleep 1
}
do_ecn_qevent_test()
@@ -410,6 +379,7 @@ do_ecn_qevent_test()
$MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
-a own -b $h3_mac -t tcp -q tos=0x01 &
+ defer stop_traffic $!
sleep 1
tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
@@ -428,9 +398,6 @@ do_ecn_qevent_test()
tc filter del block 10 pref 1234 handle 102 matchall
log_test "ECN marked packets mirrored"
-
- stop_traffic
- sleep 1
}
install_qdisc()
@@ -451,36 +418,36 @@ uninstall_qdisc()
ecn_test()
{
install_qdisc ecn
+ defer uninstall_qdisc
xfail_on_slow do_ecn_test $BACKLOG
- uninstall_qdisc
}
ecn_nodrop_test()
{
install_qdisc ecn nodrop
+ defer uninstall_qdisc
xfail_on_slow do_ecn_nodrop_test $BACKLOG
- uninstall_qdisc
}
red_test()
{
install_qdisc
+ defer uninstall_qdisc
xfail_on_slow do_red_test $BACKLOG
- uninstall_qdisc
}
red_qevent_test()
{
install_qdisc qevent early_drop block 10
+ defer uninstall_qdisc
xfail_on_slow do_red_qevent_test $BACKLOG
- uninstall_qdisc
}
ecn_qevent_test()
{
install_qdisc ecn qevent mark block 10
+ defer uninstall_qdisc
xfail_on_slow do_ecn_qevent_test $BACKLOG
- uninstall_qdisc
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
index 9cd884d4a5de..070c17faa9e4 100644
--- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
@@ -59,69 +59,65 @@ host_create()
local dev=$1; shift
local host=$1; shift
- simple_if_init $dev
+ adf_simple_if_init $dev
+
mtu_set $dev 10000
+ defer mtu_restore $dev
vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
+ defer vlan_destroy $dev 10
ip link set dev $dev.10 type vlan egress 0:0
vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
+ defer vlan_destroy $dev 11
ip link set dev $dev.11 type vlan egress 0:1
}
-host_destroy()
-{
- local dev=$1; shift
-
- vlan_destroy $dev 11
- vlan_destroy $dev 10
- mtu_restore $dev
- simple_if_fini $dev
-}
-
h1_create()
{
host_create $h1 1
}
-h1_destroy()
-{
- host_destroy $h1
-}
-
h2_create()
{
host_create $h2 2
tc qdisc add dev $h2 clsact
+ defer tc qdisc del dev $h2 clsact
+
tc filter add dev $h2 ingress pref 1010 prot 802.1q \
flower $TCFLAGS vlan_id 10 action pass
tc filter add dev $h2 ingress pref 1011 prot 802.1q \
flower $TCFLAGS vlan_id 11 action pass
}
-h2_destroy()
-{
- tc qdisc del dev $h2 clsact
- host_destroy $h2
-}
-
switch_create()
{
local intf
local vlan
ip link add dev br10 type bridge
+ defer ip link del dev br10
+
ip link add dev br11 type bridge
+ defer ip link del dev br11
for intf in $swp1 $swp2; do
ip link set dev $intf up
+ defer ip link set dev $intf down
+
mtu_set $intf 10000
+ defer mtu_restore $intf
for vlan in 10 11; do
vlan_create $intf $vlan
+ defer vlan_destroy $intf $vlan
+
ip link set dev $intf.$vlan master br$vlan
+ defer ip link set dev $intf.$vlan nomaster
+
ip link set dev $intf.$vlan up
+ defer ip link set dev $intf.$vlan down
done
done
@@ -130,34 +126,10 @@ switch_create()
done
ip link set dev br10 up
- ip link set dev br11 up
-}
-
-switch_destroy()
-{
- local intf
- local vlan
-
- # A test may have been interrupted mid-run, with Qdisc installed. Delete
- # it here.
- tc qdisc del dev $swp2 root 2>/dev/null
-
- ip link set dev br11 down
- ip link set dev br10 down
+ defer ip link set dev br10 down
- for intf in $swp2 $swp1; do
- for vlan in 11 10; do
- ip link set dev $intf.$vlan down
- ip link set dev $intf.$vlan nomaster
- vlan_destroy $intf $vlan
- done
-
- mtu_restore $intf
- ip link set dev $intf down
- done
-
- ip link del dev br11
- ip link del dev br10
+ ip link set dev br11 up
+ defer ip link set dev br11 down
}
setup_prepare()
@@ -176,24 +148,13 @@ setup_prepare()
h2_mac=$(mac_get $h2)
- vrf_prepare
+ adf_vrf_prepare
h1_create
h2_create
switch_create
}
-cleanup()
-{
- pre_cleanup
-
- switch_destroy
- h2_destroy
- h1_destroy
-
- vrf_cleanup
-}
-
ping_ipv4()
{
ping_test $h1.10 $(ipaddr 2 10) " vlan 10"
@@ -207,18 +168,18 @@ tbf_get_counter()
tc_rule_stats_get $h2 10$vlan ingress .bytes
}
-do_tbf_test()
+__tbf_test()
{
local vlan=$1; shift
local mbit=$1; shift
start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 2 $vlan) $h2_mac
+ defer stop_traffic $!
sleep 5 # Wait for the burst to dwindle
local t2=$(busywait_for_counter 1000 +1 tbf_get_counter $vlan)
sleep 10
local t3=$(tbf_get_counter $vlan)
- stop_traffic
RET=0
@@ -231,3 +192,9 @@ do_tbf_test()
log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit"
}
+
+do_tbf_test()
+{
+ in_defer_scope \
+ __tbf_test "$@"
+}
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
index df9bcd6a811a..c182a04282bc 100644
--- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
@@ -30,8 +30,9 @@ tbf_test()
# This test is used for both ETS and PRIO. Even though we only need two
# bands, PRIO demands a minimum of three.
tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0
+ defer tc qdisc del dev $swp2 root
+
tbf_test_one 128K
- tc qdisc del dev $swp2 root
}
tbf_root_test()
@@ -42,6 +43,8 @@ tbf_root_test()
tc qdisc replace dev $swp2 root handle 1: \
tbf rate 400Mbit burst $bs limit 1M
+ defer tc qdisc del dev $swp2 root
+
tc qdisc replace dev $swp2 parent 1:1 handle 10: \
$QDISC 3 priomap 2 1 0
tc qdisc replace dev $swp2 parent 10:3 handle 103: \
@@ -53,8 +56,6 @@ tbf_root_test()
do_tbf_test 10 400 $bs
do_tbf_test 11 400 $bs
-
- tc qdisc del dev $swp2 root
}
if type -t sch_tbf_pre_hook >/dev/null; then
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
index 96c997be0d03..9f20320f8d84 100755
--- a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
@@ -14,13 +14,14 @@ tbf_test_one()
tc qdisc replace dev $swp2 root handle 108: tbf \
rate 400Mbit burst $bs limit 1M
+ defer tc qdisc del dev $swp2 root
+
do_tbf_test 10 400 $bs
}
tbf_test()
{
tbf_test_one 128K
- tc qdisc del dev $swp2 root
}
if type -t sch_tbf_pre_hook >/dev/null; then
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 589629636502..ea89e558672d 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -4,7 +4,8 @@
ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
gact_trap_test mirred_egress_to_ingress_test \
- mirred_egress_to_ingress_tcp_test"
+ mirred_egress_to_ingress_tcp_test \
+ ingress_2nd_vlan_push egress_2nd_vlan_push"
NUM_NETIFS=4
source tc_common.sh
source lib.sh
@@ -244,6 +245,49 @@ mirred_egress_to_ingress_tcp_test()
log_test "mirred_egress_to_ingress_tcp ($tcflags)"
}
+ingress_2nd_vlan_push()
+{
+ tc filter add dev $swp1 ingress pref 20 chain 0 handle 20 flower \
+ $tcflags num_of_vlans 1 \
+ action vlan push id 100 protocol 0x8100 action goto chain 5
+ tc filter add dev $swp1 ingress pref 30 chain 5 handle 30 flower \
+ $tcflags num_of_vlans 2 \
+ cvlan_ethtype 0x800 action pass
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -Q 10 -q
+
+ tc_check_packets "dev $swp1 ingress" 30 1
+ check_err $? "No double-vlan packets received"
+
+ tc filter del dev $swp1 ingress pref 20 chain 0 handle 20 flower
+ tc filter del dev $swp1 ingress pref 30 chain 5 handle 30 flower
+
+ log_test "ingress_2nd_vlan_push ($tcflags)"
+}
+
+egress_2nd_vlan_push()
+{
+ tc filter add dev $h1 egress pref 20 chain 0 handle 20 flower \
+ $tcflags num_of_vlans 0 \
+ action vlan push id 10 protocol 0x8100 \
+ pipe action vlan push id 100 protocol 0x8100 action goto chain 5
+ tc filter add dev $h1 egress pref 30 chain 5 handle 30 flower \
+ $tcflags num_of_vlans 2 \
+ cvlan_ethtype 0x800 action pass
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h1 egress" 30 1
+ check_err $? "No double-vlan packets received"
+
+ tc filter del dev $h1 egress pref 20 chain 0 handle 20 flower
+ tc filter del dev $h1 egress pref 30 chain 5 handle 30 flower
+
+ log_test "egress_2nd_vlan_push ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index b1daad19b01e..b58909a93112 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -6,7 +6,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
match_ip_tos_test match_indev_test match_ip_ttl_test
match_mpls_label_test \
match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \
- match_mpls_lse_test"
+ match_mpls_lse_test match_erspan_opts_test"
NUM_NETIFS=2
source tc_common.sh
source lib.sh
@@ -676,6 +676,56 @@ match_mpls_lse_test()
log_test "mpls lse match ($tcflags)"
}
+match_erspan_opts_test()
+{
+ RET=0
+
+ check_tc_erspan_support $h2 || return 0
+
+ # h1 erspan setup
+ tunnel_create erspan1 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1001 \
+ tos C ttl 64 erspan_ver 1 erspan 6789 # ERSPAN Type II
+ tunnel_create erspan2 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1002 \
+ tos C ttl 64 erspan_ver 2 erspan_dir egress erspan_hwid 63 \
+ # ERSPAN Type III
+ ip link set dev erspan1 master v$h1
+ ip link set dev erspan2 master v$h1
+ # h2 erspan setup
+ ip link add ep-ex type erspan ttl 64 external # To collect tunnel info
+ ip link set ep-ex up
+ ip link set dev ep-ex master v$h2
+ tc qdisc add dev ep-ex clsact
+
+ # ERSPAN Type II [decap direction]
+ tc filter add dev ep-ex ingress protocol ip handle 101 flower \
+ $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+ enc_key_id 1001 erspan_opts 1:6789:0:0 \
+ action drop
+ # ERSPAN Type III [decap direction]
+ tc filter add dev ep-ex ingress protocol ip handle 102 flower \
+ $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+ enc_key_id 1002 erspan_opts 2:0:1:63 action drop
+
+ ep1mac=$(mac_get erspan1)
+ $MZ erspan1 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+ tc_check_packets "dev ep-ex ingress" 101 1
+ check_err $? "ERSPAN Type II"
+
+ ep2mac=$(mac_get erspan2)
+ $MZ erspan2 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+ tc_check_packets "dev ep-ex ingress" 102 1
+ check_err $? "ERSPAN Type III"
+
+ # h2 erspan cleanup
+ tc qdisc del dev ep-ex clsact
+ tunnel_destroy ep-ex
+ # h1 erspan cleanup
+ tunnel_destroy erspan2 # ERSPAN Type III
+ tunnel_destroy erspan1 # ERSPAN Type II
+
+ log_test "erspan_opts match ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
index 3885a2a91f7d..baed5e380dae 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
@@ -20,6 +20,7 @@ ALL_TESTS="
test_port_range_ipv4_tcp
test_port_range_ipv6_udp
test_port_range_ipv6_tcp
+ test_port_range_ipv4_udp_drop
"
NUM_NETIFS=4
@@ -194,6 +195,51 @@ test_port_range_ipv6_tcp()
__test_port_range $proto $ip_proto $sip $dip $mode "$name"
}
+test_port_range_ipv4_udp_drop()
+{
+ local proto=ipv4
+ local ip_proto=udp
+ local sip=192.0.2.1
+ local dip=192.0.2.2
+ local mode="-4"
+ local name="IPv4 UDP Drop"
+ local dmac=$(mac_get $h2)
+ local smac=$(mac_get $h1)
+ local sport_min=2000
+ local sport_max=3000
+ local sport_mid=$((sport_min + (sport_max - sport_min) / 2))
+ local dport=5000
+
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \
+ flower src_ip $sip dst_ip $dip ip_proto $ip_proto \
+ src_port $sport_min-$sport_max \
+ dst_port $dport \
+ action drop
+
+ # Test ports outside range - should pass
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_min - 1)),dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_max + 1)),dp=$dport"
+
+ # Test ports inside range - should be dropped
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_min,dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_mid,dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_max,dp=$dport"
+
+ tc_check_packets "dev $swp1 ingress" 101 3
+ check_err $? "Filter did not drop the expected number of packets"
+
+ tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower
+
+ log_test "Port range matching - $name"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
index 5103f64a71d6..509fdedfcfa1 100755
--- a/tools/testing/selftests/net/forwarding/tc_police.sh
+++ b/tools/testing/selftests/net/forwarding/tc_police.sh
@@ -148,7 +148,7 @@ police_common_test()
log_test "$test_name"
- { kill %% && wait %%; } 2>/dev/null
+ kill_process %%
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
}
@@ -198,7 +198,7 @@ police_shared_common_test()
log_test "$test_name"
- { kill %% && wait %%; } 2>/dev/null
+ kill_process %%
}
police_shared_test()
@@ -278,7 +278,7 @@ police_mirror_common_test()
log_test "$test_name"
- { kill %% && wait %%; } 2>/dev/null
+ kill_process %%
tc filter del dev $pol_if $dir protocol ip pref 1 handle 101 flower
tc filter del dev $h3 ingress protocol ip pref 1 handle 101 flower
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
@@ -320,7 +320,7 @@ police_pps_common_test()
log_test "$test_name"
- { kill %% && wait %%; } 2>/dev/null
+ kill_process %%
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
}
diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh
new file mode 100755
index 000000000000..8992aeabfe0b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh
@@ -0,0 +1,421 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS=" \
+ test_clock_jump_backward \
+ test_taprio_after_ptp \
+ test_max_sdu \
+ test_clock_jump_backward_forward \
+"
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+source tsn_lib.sh
+
+require_command python3
+
+# The test assumes the usual topology from the README, where h1 is connected to
+# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge.
+# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2.
+# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to
+# swp1 (and both to CLOCK_REALTIME).
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+UDS_ADDRESS_H1="/var/run/ptp4l_h1"
+UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1"
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# Tunables
+NUM_PKTS=100
+STREAM_VID=10
+STREAM_PRIO_1=6
+STREAM_PRIO_2=5
+STREAM_PRIO_3=4
+# PTP uses TC 0
+ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2))
+# Use a conservative cycle of 10 ms to allow the test to still pass when the
+# kernel has some extra overhead like lockdep etc
+CYCLE_TIME_NS=10000000
+# Create two Gate Control List entries, one OPEN and one CLOSE, of equal
+# durations
+GATE_DURATION_NS=$((CYCLE_TIME_NS / 2))
+# Give 2/3 of the cycle time to user space and 1/3 to the kernel
+FUDGE_FACTOR=$((CYCLE_TIME_NS / 3))
+# Shift the isochron base time by half the gate time, so that packets are
+# always received by swp1 close to the middle of the time slot, to minimize
+# inaccuracies due to network sync
+SHIFT_TIME_NS=$((GATE_DURATION_NS / 2))
+
+path_delay=
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+switch_create()
+{
+ local h2_mac_addr=$(mac_get $h2)
+
+ ip link set $swp1 up
+ ip link set $swp2 up
+
+ ip link add br0 type bridge vlan_filtering 1
+ ip link set $swp1 master br0
+ ip link set $swp2 master br0
+ ip link set br0 up
+
+ bridge vlan add dev $swp2 vid $STREAM_VID
+ bridge vlan add dev $swp1 vid $STREAM_VID
+ bridge fdb add dev $swp2 \
+ $h2_mac_addr vlan $STREAM_VID static master
+}
+
+switch_destroy()
+{
+ ip link del br0
+}
+
+ptp_setup()
+{
+ # Set up swp1 as a master PHC for h1, synchronized to the local
+ # CLOCK_REALTIME.
+ phc2sys_start $UDS_ADDRESS_SWP1
+ ptp4l_start $h1 true $UDS_ADDRESS_H1
+ ptp4l_start $swp1 false $UDS_ADDRESS_SWP1
+}
+
+ptp_cleanup()
+{
+ ptp4l_stop $swp1
+ ptp4l_stop $h1
+ phc2sys_stop
+}
+
+txtime_setup()
+{
+ local if_name=$1
+
+ tc qdisc add dev $if_name clsact
+ # Classify PTP on TC 7 and isochron on TC 6
+ tc filter add dev $if_name egress protocol 0x88f7 \
+ flower action skbedit priority 7
+ tc filter add dev $if_name egress protocol 802.1Q \
+ flower vlan_ethtype 0xdead action skbedit priority 6
+ tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ hw 1
+ # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1.
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+}
+
+txtime_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name clsact
+ tc qdisc del dev $if_name root
+}
+
+taprio_replace()
+{
+ local if_name="$1"; shift
+ local extra_args="$1"; shift
+
+ # STREAM_PRIO_1 always has an open gate.
+ # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time)
+ # STREAM_PRIO_3 always has a closed gate.
+ tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \
+ sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \
+ base-time 0 flags 0x2 $extra_args
+ taprio_wait_for_admin $if_name
+}
+
+taprio_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name root
+}
+
+probe_path_delay()
+{
+ local isochron_dat="$(mktemp)"
+ local received
+
+ log_info "Probing path delay"
+
+ isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \
+ "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \
+ "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat"
+
+ received=$(isochron_report_num_received "$isochron_dat")
+ if [ "$received" != "$NUM_PKTS" ]; then
+ echo "Cannot establish basic data path between $h1 and $h2"
+ exit $ksft_fail
+ fi
+
+ printf "pdelay = {}\n" > isochron_data.py
+ isochron report --input-file "$isochron_dat" \
+ --printf-format "pdelay[%u] = %d - %d\n" \
+ --printf-args "qRT" \
+ >> isochron_data.py
+ cat <<-'EOF' > isochron_postprocess.py
+ #!/usr/bin/env python3
+
+ from isochron_data import pdelay
+ import numpy as np
+
+ w = np.array(list(pdelay.values()))
+ print("{}".format(np.max(w)))
+ EOF
+ path_delay=$(python3 ./isochron_postprocess.py)
+
+ log_info "Path delay from $h1 to $h2 estimated at $path_delay ns"
+
+ if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then
+ echo "Path delay larger than gate duration, aborting"
+ exit $ksft_fail
+ fi
+
+ rm -f ./isochron_data.py 2> /dev/null
+ rm -f ./isochron_postprocess.py 2> /dev/null
+ rm -f "$isochron_dat" 2> /dev/null
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+
+ txtime_setup $h1
+
+ # Temporarily set up PTP just to probe the end-to-end path delay.
+ ptp_setup
+ probe_path_delay
+ ptp_cleanup
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ isochron_recv_stop
+ txtime_cleanup $h1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+run_test()
+{
+ local base_time=$1; shift
+ local stream_prio=$1; shift
+ local expected_delay=$1; shift
+ local should_fail=$1; shift
+ local test_name=$1; shift
+ local isochron_dat="$(mktemp)"
+ local received
+ local median_delay
+
+ RET=0
+
+ # Set the shift time equal to the cycle time, which effectively
+ # cancels the default advance time. Packets won't be sent early in
+ # software, which ensures that they won't prematurely enter through
+ # the open gate in __test_out_of_band(). Also, the gate is open for
+ # long enough that this won't cause a problem in __test_in_band().
+ isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \
+ "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \
+ "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat"
+
+ received=$(isochron_report_num_received "$isochron_dat")
+ [ "$received" = "$NUM_PKTS" ]
+ check_err_fail $should_fail $? "Reception of $NUM_PKTS packets"
+
+ if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then
+ printf "pdelay = {}\n" > isochron_data.py
+ isochron report --input-file "$isochron_dat" \
+ --printf-format "pdelay[%u] = %d - %d\n" \
+ --printf-args "qRT" \
+ >> isochron_data.py
+ cat <<-'EOF' > isochron_postprocess.py
+ #!/usr/bin/env python3
+
+ from isochron_data import pdelay
+ import numpy as np
+
+ w = np.array(list(pdelay.values()))
+ print("{}".format(int(np.median(w))))
+ EOF
+ median_delay=$(python3 ./isochron_postprocess.py)
+
+ # If the condition below is true, packets were delayed by a closed gate
+ [ "$median_delay" -gt $((path_delay + expected_delay)) ]
+ check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay"
+
+ # If the condition below is true, packets were sent expecting them to
+ # hit a closed gate in the switch, but were not delayed
+ [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ]
+ check_fail $? "Median delay $median_delay is less than expected delay $expected_delay"
+ fi
+
+ log_test "$test_name"
+
+ rm -f ./isochron_data.py 2> /dev/null
+ rm -f ./isochron_postprocess.py 2> /dev/null
+ rm -f "$isochron_dat" 2> /dev/null
+}
+
+__test_always_open()
+{
+ run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open"
+}
+
+__test_always_closed()
+{
+ run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed"
+}
+
+__test_in_band()
+{
+ # Send packets in-band with the OPEN gate entry
+ run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate"
+}
+
+__test_out_of_band()
+{
+ # Send packets in-band with the CLOSE gate entry
+ run_test 0.005000000 $STREAM_PRIO_2 \
+ $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \
+ "Out of band with gate"
+}
+
+run_subtests()
+{
+ __test_always_open
+ __test_always_closed
+ __test_in_band
+ __test_out_of_band
+}
+
+test_taprio_after_ptp()
+{
+ log_info "Setting up taprio after PTP"
+ ptp_setup
+ taprio_replace $swp2
+ run_subtests
+ taprio_cleanup $swp2
+ ptp_cleanup
+}
+
+__test_under_max_sdu()
+{
+ # Limit max-sdu for STREAM_PRIO_1
+ taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0"
+ run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU"
+}
+
+__test_over_max_sdu()
+{
+ # Limit max-sdu for STREAM_PRIO_1
+ taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0"
+ run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU"
+}
+
+test_max_sdu()
+{
+ ptp_setup
+ __test_under_max_sdu
+ __test_over_max_sdu
+ taprio_cleanup $swp2
+ ptp_cleanup
+}
+
+# Perform a clock jump in the past without synchronization running, so that the
+# time base remains where it was set by phc_ctl.
+test_clock_jump_backward()
+{
+ # This is a more complex schedule specifically crafted in a way that
+ # has been problematic on NXP LS1028A. Not much to test with it other
+ # than the fact that it passes traffic.
+ tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \
+ base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \
+ sched-entry S 20 300000 sched-entry S 48 200000 \
+ sched-entry S 20 300000 sched-entry S 83 200000 \
+ sched-entry S 40 300000 sched-entry S 00 200000 flags 2
+
+ log_info "Forcing a backward clock jump"
+ phc_ctl $swp1 set 0
+
+ ping_test $h1 192.0.2.2
+ taprio_cleanup $swp2
+}
+
+# Test that taprio tolerates clock jumps.
+# Since ptp4l and phc2sys are running, it is expected for the time to
+# eventually recover (through yet another clock jump). Isochron waits
+# until that is the case.
+test_clock_jump_backward_forward()
+{
+ log_info "Forcing a backward and a forward clock jump"
+ taprio_replace $swp2
+ phc_ctl $swp1 set 0
+ ptp_setup
+ ping_test $h1 192.0.2.2
+ run_subtests
+ ptp_cleanup
+ taprio_cleanup $swp2
+}
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_test_skip "Could not test offloaded functionality"
+ exit $EXIT_STATUS
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
index b91bcd8008a9..08c044ff6689 100644
--- a/tools/testing/selftests/net/forwarding/tsn_lib.sh
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -2,6 +2,8 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright 2021-2022 NXP
+tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts
+
REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes}
REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes}
@@ -18,6 +20,7 @@ fi
if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then
require_command phc2sys
require_command ptp4l
+ require_command phc_ctl
fi
phc2sys_start()
@@ -182,6 +185,7 @@ isochron_do()
local base_time=$1; shift
local cycle_time=$1; shift
local shift_time=$1; shift
+ local window_size=$1; shift
local num_pkts=$1; shift
local vid=$1; shift
local priority=$1; shift
@@ -212,6 +216,10 @@ isochron_do()
extra_args="${extra_args} --shift-time=${shift_time}"
fi
+ if ! [ -z "${window_size}" ]; then
+ extra_args="${extra_args} --window-size=${window_size}"
+ fi
+
if [ "${use_l2}" = "true" ]; then
extra_args="${extra_args} --l2 --etype=0xdead ${vid}"
receiver_extra_args="--l2 --etype=0xdead"
@@ -247,3 +255,21 @@ isochron_do()
cpufreq_restore ${ISOCHRON_CPU}
}
+
+isochron_report_num_received()
+{
+ local isochron_dat=$1; shift
+
+ # Count all received packets by looking at the non-zero RX timestamps
+ isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "%u\n" --printf-args "R" | \
+ grep -w -v '0' | wc -l
+}
+
+taprio_wait_for_admin()
+{
+ local if_name="$1"; shift
+
+ "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name"
+}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index 3f9d50f1ef9e..b43816dd998c 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -428,6 +428,14 @@ __test_flood()
test_flood()
{
__test_flood de:ad:be:ef:13:37 192.0.2.100 "flood"
+
+ # Add an entry with arbitrary destination IP. Verify that packets are
+ # not duplicated (this can happen if hardware floods the packets, and
+ # then traps them due to misconfiguration, so software data path repeats
+ # flooding and resends packets).
+ bridge fdb append dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self
+ __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood, unresolved FDB entry"
+ bridge fdb del dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self
}
vxlan_fdb_add_del()
@@ -740,6 +748,8 @@ test_learning()
vxlan_flood_test $mac $dst 0 10 0
+ # The entry should age out when it only forwards traffic
+ $MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q &
sleep 60
bridge fdb show brport vx1 | grep $mac | grep -q self
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
index fb9a34cb50c6..afc65647f673 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
@@ -539,6 +539,21 @@ test_flood()
10 10 0 10 0
__test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \
10 0 10 0 10
+
+ # Add entries with arbitrary destination IP. Verify that packets are
+ # not duplicated (this can happen if hardware floods the packets, and
+ # then traps them due to misconfiguration, so software data path repeats
+ # flooding and resends packets).
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self
+
+ __test_flood de:ad:be:ef:13:37 192.0.2.100 10 \
+ "flood vlan 10, unresolved FDB entry" 10 10 0 10 0
+ __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 \
+ "flood vlan 20, unresolved FDB entry" 10 0 10 0 10
+
+ bridge fdb del dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self
+ bridge fdb del dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self
}
vxlan_fdb_add_del()
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
new file mode 100755
index 000000000000..6a570d256e07
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
@@ -0,0 +1,766 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------------------------+
+# | + $h1.10 + $h1.20 |
+# | | 192.0.2.1/28 | 2001:db8:1::1/64 |
+# | \________ ________/ |
+# | \ / |
+# | + $h1 H1 (vrf) |
+# +-----------|-----------------------------+
+# |
+# +-----------|----------------------------------------------------------------+
+# | +---------|--------------------------------------+ SWITCH (main vrf) |
+# | | + $swp1 BR1 (802.1q) | |
+# | | vid 10 20 | |
+# | | | |
+# | | + vx10 (vxlan) + vx20 (vxlan) | + lo10 (dummy) |
+# | | local 192.0.2.100 local 2001:db8:4::1 | 192.0.2.100/28 |
+# | | group 233.252.0.1 group ff0e::1:2:3 | 2001:db8:4::1/64 |
+# | | id 1000 id 2000 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | +------------------------------------------------+ |
+# | |
+# | + $swp2 $swp3 + |
+# | | 192.0.2.33/28 192.0.2.65/28 | |
+# | | 2001:db8:2::1/64 2001:db8:3::1/64 | |
+# | | | |
+# +---|--------------------------------------------------------------------|---+
+# | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | | H2 (vrf) | | H3 (vrf) | |
+# | +-|----------------------------+ | | +-----------------------------|-+ |
+# | | + $h2 BR2 (802.1d) | | | | BR3 (802.1d) $h3 + | |
+# | | | | | | | |
+# | | + v1$h2 (veth) | | | | v1$h3 (veth) + | |
+# | +-|----------------------------+ | | +-----------------------------|-+ |
+# | | | | | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | + v2$h2 (veth) NS2 (netns) | | NS3 (netns) v2$h3 (veth) + |
+# | 192.0.2.34/28 | | 192.0.2.66/28 |
+# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 |
+# | | | |
+# | +--------------------------------+ | | +--------------------------------+ |
+# | | BR1 (802.1q) | | | | BR1 (802.1q) | |
+# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | |
+# | | local 192.0.2.34 | | | | local 192.0.2.50 | |
+# | | group 233.252.0.1 dev v2$h2 | | | | group 233.252.0.1 dev v2$h3 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | |
+# | | | | | | | |
+# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | |
+# | | local 2001:db8:2::2 | | | | local 2001:db8:3::2 | |
+# | | group ff0e::1:2:3 dev v2$h2 | | | | group ff0e::1:2:3 dev v2$h3 | |
+# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | |
+# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 10 20 | | | | | vid 10 20 | |
+# | +--|-----------------------------+ | | +--|-----------------------------+ |
+# | | | | | |
+# | +--|-----------------------------+ | | +--|-----------------------------+ |
+# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | |
+# | | |\ | | | | |\ | |
+# | | | + w2.10 | | | | | + w2.10 | |
+# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | |
+# | | | | | | | | | |
+# | | + w2.20 | | | | + w2.20 | |
+# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | |
+# | +--------------------------------+ | | +--------------------------------+ |
+# +------------------------------------+ +------------------------------------+
+#
+#shellcheck disable=SC2317 # SC doesn't see our uses of functions.
+
+: "${VXPORT:=4789}"
+export VXPORT
+
+: "${GROUP4:=233.252.0.1}"
+export GROUP4
+
+: "${GROUP6:=ff0e::1:2:3}"
+export GROUP6
+
+: "${IPMR:=lo10}"
+
+ALL_TESTS="
+ ipv4_nomcroute
+ ipv4_mcroute
+ ipv4_mcroute_changelink
+ ipv4_mcroute_starg
+ ipv4_mcroute_noroute
+ ipv4_mcroute_fdb
+ ipv4_mcroute_fdb_oif0
+ ipv4_mcroute_fdb_oif0_sep
+
+ ipv6_nomcroute
+ ipv6_mcroute
+ ipv6_mcroute_changelink
+ ipv6_mcroute_starg
+ ipv6_mcroute_noroute
+ ipv6_mcroute_fdb
+ ipv6_mcroute_fdb_oif0
+
+ ipv4_nomcroute_rx
+ ipv4_mcroute_rx
+ ipv4_mcroute_starg_rx
+ ipv4_mcroute_fdb_oif0_sep_rx
+ ipv4_mcroute_fdb_sep_rx
+
+ ipv6_nomcroute_rx
+ ipv6_mcroute_rx
+ ipv6_mcroute_starg_rx
+ ipv6_mcroute_fdb_sep_rx
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ adf_simple_if_init "$h1"
+
+ adf_ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10
+ adf_ip_link_set_up "$h1.10"
+ adf_ip_addr_add "$h1.10" 192.0.2.1/28
+
+ adf_ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20
+ adf_ip_link_set_up "$h1.20"
+ adf_ip_addr_add "$h1.20" 2001:db8:1::1/64
+}
+
+install_capture()
+{
+ local dev=$1; shift
+
+ tc qdisc add dev "$dev" clsact
+ defer tc qdisc del dev "$dev" clsact
+
+ tc filter add dev "$dev" ingress proto ip pref 104 \
+ flower skip_hw ip_proto udp dst_port "$VXPORT" \
+ action pass
+ defer tc filter del dev "$dev" ingress proto ip pref 104
+
+ tc filter add dev "$dev" ingress proto ipv6 pref 106 \
+ flower skip_hw ip_proto udp dst_port "$VXPORT" \
+ action pass
+ defer tc filter del dev "$dev" ingress proto ipv6 pref 106
+}
+
+h2_create()
+{
+ # $h2
+ adf_ip_link_set_up "$h2"
+
+ # H2
+ vrf_create "v$h2"
+ defer vrf_destroy "v$h2"
+
+ adf_ip_link_set_up "v$h2"
+
+ # br2
+ adf_ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0
+ adf_ip_link_set_master br2 "v$h2"
+ adf_ip_link_set_up br2
+
+ # $h2
+ adf_ip_link_set_master "$h2" br2
+ install_capture "$h2"
+
+ # v1$h2
+ adf_ip_link_set_up "v1$h2"
+ adf_ip_link_set_master "v1$h2" br2
+}
+
+h3_create()
+{
+ # $h3
+ adf_ip_link_set_up "$h3"
+
+ # H3
+ vrf_create "v$h3"
+ defer vrf_destroy "v$h3"
+
+ adf_ip_link_set_up "v$h3"
+
+ # br3
+ adf_ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0
+ adf_ip_link_set_master br3 "v$h3"
+ adf_ip_link_set_up br3
+
+ # $h3
+ adf_ip_link_set_master "$h3" br3
+ install_capture "$h3"
+
+ # v1$h3
+ adf_ip_link_set_up "v1$h3"
+ adf_ip_link_set_master "v1$h3" br3
+}
+
+switch_create()
+{
+ local swp1_mac
+
+ # br1
+ swp1_mac=$(mac_get "$swp1")
+ adf_ip_link_add br1 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ adf_ip_link_set_addr br1 "$swp1_mac"
+ adf_ip_link_set_up br1
+
+ # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on
+ # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test.
+ adf_ip_link_add "X$IPMR" up type dummy
+
+ # IPMR
+ adf_ip_link_add "$IPMR" up type dummy
+ adf_ip_addr_add "$IPMR" 192.0.2.100/28
+ adf_ip_addr_add "$IPMR" 2001:db8:4::1/64
+
+ # $swp1
+ adf_ip_link_set_up "$swp1"
+ adf_ip_link_set_master "$swp1" br1
+ adf_bridge_vlan_add vid 10 dev "$swp1"
+ adf_bridge_vlan_add vid 20 dev "$swp1"
+
+ # $swp2
+ adf_ip_link_set_up "$swp2"
+ adf_ip_addr_add "$swp2" 192.0.2.33/28
+ adf_ip_addr_add "$swp2" 2001:db8:2::1/64
+
+ # $swp3
+ adf_ip_link_set_up "$swp3"
+ adf_ip_addr_add "$swp3" 192.0.2.65/28
+ adf_ip_addr_add "$swp3" 2001:db8:3::1/64
+}
+
+vx_create()
+{
+ local name=$1; shift
+ local vid=$1; shift
+
+ adf_ip_link_add "$name" up type vxlan dstport "$VXPORT" \
+ nolearning noudpcsum tos inherit ttl 16 \
+ "$@"
+ adf_ip_link_set_master "$name" br1
+ adf_bridge_vlan_add vid "$vid" dev "$name" pvid untagged
+}
+export -f vx_create
+
+vx_wait()
+{
+ # Wait for all the ARP, IGMP etc. noise to settle down so that the
+ # tunnel is clear for measurements.
+ sleep 10
+}
+
+vx10_create()
+{
+ vx_create vx10 10 id 1000 "$@"
+}
+export -f vx10_create
+
+vx20_create()
+{
+ vx_create vx20 20 id 2000 "$@"
+}
+export -f vx20_create
+
+vx10_create_wait()
+{
+ vx10_create "$@"
+ vx_wait
+}
+
+vx20_create_wait()
+{
+ vx20_create "$@"
+ vx_wait
+}
+
+ns_init_common()
+{
+ local ns=$1; shift
+ local if_in=$1; shift
+ local ipv4_in=$1; shift
+ local ipv6_in=$1; shift
+ local ipv4_host=$1; shift
+ local ipv6_host=$1; shift
+
+ # v2$h2 / v2$h3
+ adf_ip_link_set_up "$if_in"
+ adf_ip_addr_add "$if_in" "$ipv4_in"
+ adf_ip_addr_add "$if_in" "$ipv6_in"
+
+ # br1
+ adf_ip_link_add br1 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ adf_ip_link_set_up br1
+
+ # vx10, vx20
+ vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in"
+ vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in"
+
+ # w1
+ adf_ip_link_add w1 type veth peer name w2
+ adf_ip_link_set_master w1 br1
+ adf_ip_link_set_up w1
+ adf_bridge_vlan_add vid 10 dev w1
+ adf_bridge_vlan_add vid 20 dev w1
+
+ # w2
+ adf_simple_if_init w2
+
+ # w2.10
+ adf_ip_link_add w2.10 master vw2 link w2 type vlan id 10
+ adf_ip_link_set_up w2.10
+ adf_ip_addr_add w2.10 "$ipv4_host"
+
+ # w2.20
+ adf_ip_link_add w2.20 master vw2 link w2 type vlan id 20
+ adf_ip_link_set_up w2.20
+ adf_ip_addr_add w2.20 "$ipv6_host"
+}
+export -f ns_init_common
+
+ns2_create()
+{
+ # NS2
+ ip netns add ns2
+ defer ip netns del ns2
+
+ # v2$h2
+ ip link set dev "v2$h2" netns ns2
+ defer ip -n ns2 link set dev "v2$h2" netns 1
+
+ in_ns ns2 \
+ ns_init_common ns2 "v2$h2" \
+ 192.0.2.34/28 2001:db8:2::2/64 \
+ 192.0.2.3/28 2001:db8:1::3/64
+}
+
+ns3_create()
+{
+ # NS3
+ ip netns add ns3
+ defer ip netns del ns3
+
+ # v2$h3
+ ip link set dev "v2$h3" netns ns3
+ defer ip -n ns3 link set dev "v2$h3" netns 1
+
+ ip -n ns3 link set dev "v2$h3" up
+
+ in_ns ns3 \
+ ns_init_common ns3 "v2$h3" \
+ 192.0.2.66/28 2001:db8:3::2/64 \
+ 192.0.2.4/28 2001:db8:1::4/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ adf_vrf_prepare
+ adf_forwarding_enable
+
+ adf_ip_link_add "v1$h2" type veth peer name "v2$h2"
+ adf_ip_link_add "v1$h3" type veth peer name "v2$h3"
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+ ns2_create
+ ns3_create
+}
+
+adf_install_broken_sg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+ defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+
+ mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+ defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+}
+
+adf_install_rx()
+{
+ mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR"
+ defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10
+
+ mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR"
+ defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10
+
+ mc_cli add "$swp2" :: "$GROUP6" "$IPMR"
+ defer mc_cli remove "$swp2" :: "$GROUP6" lo10
+
+ mc_cli add "$swp3" :: "$GROUP6" "$IPMR"
+ defer mc_cli remove "$swp3" :: "$GROUP6" lo10
+}
+
+adf_install_sg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+adf_install_sg_sep()
+{
+ adf_mcd_start lo || exit "$EXIT_STATUS"
+
+ mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+}
+
+adf_install_sg_sep_rx()
+{
+ local lo=$1; shift
+
+ adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS"
+
+ mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+adf_install_starg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+do_packets_v4()
+{
+ local mac
+
+ mac=$(mac_get "$h2")
+ "$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+ -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q
+}
+
+do_packets_v6()
+{
+ local mac
+
+ mac=$(mac_get "$h2")
+ "$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q
+}
+
+do_test()
+{
+ local ipv=$1; shift
+ local expect_h2=$1; shift
+ local expect_h3=$1; shift
+ local what=$1; shift
+
+ local pref=$((100 + ipv))
+ local t0_h2
+ local t0_h3
+ local t1_h2
+ local t1_h3
+ local d_h2
+ local d_h3
+
+ RET=0
+
+ t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+ t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+ "do_packets_v$ipv"
+ sleep 1
+
+ t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+ t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+ d_h2=$((t1_h2 - t0_h2))
+ d_h3=$((t1_h3 - t0_h3))
+
+ ((d_h2 == expect_h2))
+ check_err $? "Expected $expect_h2 packets on H2, got $d_h2"
+
+ ((d_h3 == expect_h3))
+ check_err $? "Expected $expect_h3 packets on H3, got $d_h3"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv4_do_test_rx()
+{
+ local h3_should_fail=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ping_do "$h1.10" 192.0.2.3
+ check_err $? "H2 should respond"
+
+ ping_do "$h1.10" 192.0.2.4
+ check_err_fail "$h3_should_fail" $? "H3 responds"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv6_do_test_rx()
+{
+ local h3_should_fail=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ping6_do "$h1.20" 2001:db8:1::3
+ check_err $? "H2 should respond"
+
+ ping6_do "$h1.20" 2001:db8:1::4
+ check_err_fail "$h3_should_fail" $? "H3 responds"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv4_nomcroute()
+{
+ # Install a misleading (S,G) rule to attempt to trick the system into
+ # pushing the packets elsewhere.
+ adf_install_broken_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ do_test 4 10 0 "IPv4 nomcroute"
+}
+
+ipv6_nomcroute()
+{
+ # Like for IPv4, install a misleading (S,G).
+ adf_install_broken_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ do_test 6 10 0 "IPv6 nomcroute"
+}
+
+ipv4_nomcroute_rx()
+{
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ ipv4_do_test_rx 1 "IPv4 nomcroute ping"
+}
+
+ipv6_nomcroute_rx()
+{
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ ipv6_do_test_rx 1 "IPv6 nomcroute ping"
+}
+
+ipv4_mcroute()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 10 10 "IPv4 mcroute"
+}
+
+ipv6_mcroute()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 10 10 "IPv6 mcroute"
+}
+
+ipv4_mcroute_rx()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ ipv4_do_test_rx 0 "IPv4 mcroute ping"
+}
+
+ipv6_mcroute_rx()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ipv6_do_test_rx 0 "IPv6 mcroute ping"
+}
+
+ipv4_mcroute_changelink()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR"
+ ip link set dev vx10 type vxlan mcroute
+ sleep 1
+ do_test 4 10 10 "IPv4 mcroute changelink"
+}
+
+ipv6_mcroute_changelink()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ip link set dev vx20 type vxlan mcroute
+ sleep 1
+ do_test 6 10 10 "IPv6 mcroute changelink"
+}
+
+ipv4_mcroute_starg()
+{
+ adf_install_starg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 10 10 "IPv4 mcroute (*,G)"
+}
+
+ipv6_mcroute_starg()
+{
+ adf_install_starg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 10 10 "IPv6 mcroute (*,G)"
+}
+
+ipv4_mcroute_starg_rx()
+{
+ adf_install_starg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping"
+}
+
+ipv6_mcroute_starg_rx()
+{
+ adf_install_starg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping"
+}
+
+ipv4_mcroute_noroute()
+{
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 0 0 "IPv4 mcroute, no route"
+}
+
+ipv6_mcroute_noroute()
+{
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 0 0 "IPv6 mcroute, no route"
+}
+
+ipv4_mcroute_fdb()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute
+ bridge fdb add dev vx10 \
+ 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR"
+ do_test 4 10 10 "IPv4 mcroute FDB"
+}
+
+ipv6_mcroute_fdb()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute
+ bridge -6 fdb add dev vx20 \
+ 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR"
+ do_test 6 10 10 "IPv6 mcroute FDB"
+}
+
+# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup.
+ipv4_mcroute_fdb_oif0()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ do_test 4 10 10 "IPv4 mcroute oif=0"
+}
+
+ipv6_mcroute_fdb_oif0()
+{
+ # The IPv6 tunnel lookup does not fall back to selection by source
+ # address. Instead it just does a FIB match, and that would find one of
+ # the several ff00::/8 multicast routes -- each device has one. In order
+ # to reliably force the $IPMR device, add a /128 route for the
+ # destination group address.
+ ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR"
+ defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR"
+
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ bridge -6 fdb del dev vx20 00:00:00:00:00:00
+ bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6"
+ do_test 6 10 10 "IPv6 mcroute oif=0"
+}
+
+# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR.
+# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT.
+ipv4_mcroute_fdb_oif0_sep()
+{
+ adf_install_sg_sep
+
+ adf_ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0"
+}
+
+ipv4_mcroute_fdb_oif0_sep_rx()
+{
+ adf_install_sg_sep_rx lo
+
+ adf_ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping"
+}
+
+ipv4_mcroute_fdb_sep_rx()
+{
+ adf_install_sg_sep_rx lo
+
+ adf_ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add \
+ dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo
+ ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping"
+}
+
+ipv6_mcroute_fdb_sep_rx()
+{
+ adf_install_sg_sep_rx "X$IPMR"
+
+ adf_ip_addr_add "X$IPMR" 2001:db8:5::1/64
+ vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute
+ bridge -6 fdb del dev vx20 00:00:00:00:00:00
+ bridge -6 fdb add dev vx20 00:00:00:00:00:00 \
+ self static dst "$GROUP6" via "X$IPMR"
+ ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
new file mode 100755
index 000000000000..709845123727
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
@@ -0,0 +1,347 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/28 |
+# +----|---------------+
+# |
+# +----|--------------------------------+
+# | SW | |
+# | +--|------------------------------+ |
+# | | + $swp1 BR1 (802.1d) | |
+# | | | |
+# | | + vx1 (vxlan) | |
+# | | local 192.0.2.17 | |
+# | | id 1000 dstport $VXPORT | |
+# | +---------------------------------+ |
+# | |
+# | 192.0.2.32/28 via 192.0.2.18 |
+# | |
+# | + $rp1 |
+# | | 192.0.2.17/28 |
+# +--|----------------------------------+
+# |
+# +--|----------------------------------+
+# | | |
+# | + $rp2 |
+# | 192.0.2.18/28 |
+# | |
+# | VRP2 (vrf) |
+# +-------------------------------------+
+
+: ${VXPORT:=4789}
+: ${ALL_TESTS:="
+ default_test
+ plain_test
+ reserved_0_test
+ reserved_10_test
+ reserved_31_test
+ reserved_56_test
+ reserved_63_test
+ "}
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ adf_simple_if_init $h1 192.0.2.1/28
+
+ tc qdisc add dev $h1 clsact
+ defer tc qdisc del dev $h1 clsact
+
+ tc filter add dev $h1 ingress pref 77 \
+ prot ip flower skip_hw ip_proto icmp action drop
+ defer tc filter del dev $h1 ingress pref 77
+}
+
+switch_create()
+{
+ adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ adf_ip_link_set_addr br1 $(mac_get $swp1)
+ adf_ip_link_set_up br1
+
+ adf_ip_link_set_up $rp1
+ adf_ip_addr_add $rp1 192.0.2.17/28
+ adf_ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18
+
+ adf_ip_link_set_master $swp1 br1
+ adf_ip_link_set_up $swp1
+}
+
+vrp2_create()
+{
+ adf_simple_if_init $rp2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ adf_vrf_prepare
+ adf_forwarding_enable
+
+ h1_create
+ switch_create
+
+ vrp2_create
+}
+
+vxlan_header_bytes()
+{
+ local vni=$1; shift
+ local -a extra_bits=("$@")
+ local -a bits
+ local i
+
+ for ((i=0; i < 64; i++)); do
+ bits[i]=0
+ done
+
+ # Bit 4 is the I flag and is always on.
+ bits[4]=1
+
+ for i in ${extra_bits[@]}; do
+ bits[i]=1
+ done
+
+ # Bits 32..55 carry the VNI
+ local mask=0x800000
+ for ((i=0; i < 24; i++)); do
+ bits[$((i + 32))]=$(((vni & mask) != 0))
+ ((mask >>= 1))
+ done
+
+ local bytes
+ for ((i=0; i < 8; i++)); do
+ local byte=0
+ local j
+ for ((j=0; j < 8; j++)); do
+ local bit=${bits[8 * i + j]}
+ ((byte += bit << (7 - j)))
+ done
+ bytes+=$(printf %02x $byte):
+ done
+
+ echo ${bytes%:}
+}
+
+neg_bytes()
+{
+ local bytes=$1; shift
+
+ local -A neg=([0]=f [1]=e [2]=d [3]=c [4]=b [5]=a [6]=9 [7]=8
+ [8]=7 [9]=6 [a]=5 [b]=4 [c]=3 [d]=2 [e]=1 [f]=0 [:]=:)
+ local out
+ local i
+
+ for ((i=0; i < ${#bytes}; i++)); do
+ local c=${bytes:$i:1}
+ out+=${neg[$c]}
+ done
+ echo $out
+}
+
+vxlan_ping_do()
+{
+ local count=$1; shift
+ local dev=$1; shift
+ local next_hop_mac=$1; shift
+ local dest_ip=$1; shift
+ local dest_mac=$1; shift
+ local vni=$1; shift
+ local reserved_bits=$1; shift
+
+ local vxlan_header=$(vxlan_header_bytes $vni $reserved_bits)
+
+ $MZ $dev -c $count -d 100msec -q \
+ -b $next_hop_mac -B $dest_ip \
+ -t udp sp=23456,dp=$VXPORT,p=$(:
+ )"$vxlan_header:"$( : VXLAN
+ )"$dest_mac:"$( : ETH daddr
+ )"00:11:22:33:44:55:"$( : ETH saddr
+ )"08:00:"$( : ETH type
+ )"45:"$( : IP version + IHL
+ )"00:"$( : IP TOS
+ )"00:54:"$( : IP total length
+ )"99:83:"$( : IP identification
+ )"40:00:"$( : IP flags + frag off
+ )"40:"$( : IP TTL
+ )"01:"$( : IP proto
+ )"00:00:"$( : IP header csum
+ )"$(ipv4_to_bytes 192.0.2.3):"$( : IP saddr
+ )"$(ipv4_to_bytes 192.0.2.1):"$( : IP daddr
+ )"08:"$( : ICMP type
+ )"00:"$( : ICMP code
+ )"8b:f2:"$( : ICMP csum
+ )"1f:6a:"$( : ICMP request identifier
+ )"00:01:"$( : ICMP request seq. number
+ )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload
+ )"6d:74:0b:00:00:00:00:00:"$( :
+ )"10:11:12:13:14:15:16:17:"$( :
+ )"18:19:1a:1b:1c:1d:1e:1f:"$( :
+ )"20:21:22:23:24:25:26:27:"$( :
+ )"28:29:2a:2b:2c:2d:2e:2f:"$( :
+ )"30:31:32:33:34:35:36:37"
+}
+
+vxlan_device_add()
+{
+ adf_ip_link_add vx1 up type vxlan id 1000 \
+ local 192.0.2.17 dstport "$VXPORT" \
+ nolearning noudpcsum tos inherit ttl 100 "$@"
+ adf_ip_link_set_master vx1 br1
+}
+
+vxlan_all_reserved_bits()
+{
+ local i
+
+ for ((i=0; i < 64; i++)); do
+ if ((i == 4 || i >= 32 && i < 56)); then
+ continue
+ fi
+ echo $i
+ done
+}
+
+vxlan_ping_vanilla()
+{
+ vxlan_ping_do 10 $rp2 $(mac_get $rp1) 192.0.2.17 $(mac_get $h1) 1000
+}
+
+vxlan_ping_reserved()
+{
+ for bit in $(vxlan_all_reserved_bits); do
+ vxlan_ping_do 1 $rp2 $(mac_get $rp1) \
+ 192.0.2.17 $(mac_get $h1) 1000 "$bit"
+ ((n++))
+ done
+}
+
+vxlan_ping_test()
+{
+ local what=$1; shift
+ local get_stat=$1; shift
+ local expect=$1; shift
+
+ RET=0
+
+ local t0=$($get_stat)
+
+ "$@"
+ check_err $? "Failure when running $@"
+
+ local t1=$($get_stat)
+ local delta=$((t1 - t0))
+
+ ((expect == delta))
+ check_err $? "Expected to capture $expect packets, got $delta."
+
+ log_test "$what"
+}
+
+__default_test_do()
+{
+ local n_allowed_bits=$1; shift
+ local what=$1; shift
+
+ vxlan_ping_test "$what: clean packets" \
+ "tc_rule_stats_get $h1 77 ingress" \
+ 10 vxlan_ping_vanilla
+
+ local t0=$(link_stats_get vx1 rx errors)
+ vxlan_ping_test "$what: mangled packets" \
+ "tc_rule_stats_get $h1 77 ingress" \
+ $n_allowed_bits vxlan_ping_reserved
+ local t1=$(link_stats_get vx1 rx errors)
+
+ RET=0
+ local expect=$((39 - n_allowed_bits))
+ local delta=$((t1 - t0))
+ ((expect == delta))
+ check_err $? "Expected $expect error packets, got $delta."
+ log_test "$what: drops reported"
+}
+
+default_test_do()
+{
+ vxlan_device_add
+ __default_test_do 0 "Default"
+}
+
+default_test()
+{
+ in_defer_scope \
+ default_test_do
+}
+
+plain_test_do()
+{
+ vxlan_device_add reserved_bits 0xf7ffffff000000ff
+ __default_test_do 0 "reserved_bits 0xf7ffffff000000ff"
+}
+
+plain_test()
+{
+ in_defer_scope \
+ plain_test_do
+}
+
+reserved_test()
+{
+ local bit=$1; shift
+
+ local allowed_bytes=$(vxlan_header_bytes 0xffffff $bit)
+ local reserved_bytes=$(neg_bytes $allowed_bytes)
+ local reserved_bits=${reserved_bytes//:/}
+
+ vxlan_device_add reserved_bits 0x$reserved_bits
+ __default_test_do 1 "reserved_bits 0x$reserved_bits"
+}
+
+reserved_0_test()
+{
+ in_defer_scope \
+ reserved_test 0
+}
+
+reserved_10_test()
+{
+ in_defer_scope \
+ reserved_test 10
+}
+
+reserved_31_test()
+{
+ in_defer_scope \
+ reserved_test 31
+}
+
+reserved_56_test()
+{
+ in_defer_scope \
+ reserved_test 56
+}
+
+reserved_63_test()
+{
+ in_defer_scope \
+ reserved_test 63
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
new file mode 100755
index 000000000000..48eb999a3120
--- /dev/null
+++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./lib.sh
+
+PAUSE_ON_FAIL="no"
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+ cleanup_all_ns
+
+ exit "${EXIT_STATUS}"
+}
+
+# Add fake IPv4 and IPv6 networks on the loopback device, to be used as
+# underlay by future GRE devices.
+#
+setup_basenet()
+{
+ ip -netns "${NS0}" link set dev lo up
+ ip -netns "${NS0}" address add dev lo 192.0.2.10/24
+ ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad
+}
+
+# Check the IPv6 configuration of a network device.
+#
+# We currently check the generation of the link-local IPv6 address and the
+# creation of the ff00::/8 multicast route.
+#
+# Parameters:
+#
+# * $1: The network device to test
+# * $2: An extra regular expression that should be matched (to verify the
+# presence of extra attributes)
+# * $3: The expected return code from grep (to allow checking the absence of
+# a link-local address)
+# * $4: The user visible name for the scenario being tested
+#
+check_ipv6_device_config()
+{
+ local DEV="$1"
+ local EXTRA_MATCH="$2"
+ local XRET="$3"
+ local MSG="$4"
+
+ RET=0
+ set +e
+ ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}"
+ check_err_fail "${XRET}" $? "IPv6 link-local address generation"
+
+ ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}"
+ check_err_fail 0 $? "IPv6 multicast route creation"
+
+ log_test "${MSG}"
+ set -e
+}
+
+# Create a GRE device and verify that it gets an IPv6 link-local address as
+# expected.
+#
+# Parameters:
+#
+# * $1: The device type (gre, ip6gre, gretap or ip6gretap)
+# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any")
+# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any")
+# * $4: The IPv6 interface identifier generation mode to use for the GRE
+# device (eui64, none, stable-privacy or random).
+#
+test_gre_device()
+{
+ local GRE_TYPE="$1"
+ local LOCAL_IP="$2"
+ local REMOTE_IP="$3"
+ local MODE="$4"
+ local ADDR_GEN_MODE
+ local MATCH_REGEXP
+ local MSG
+
+ ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}"
+
+ case "${MODE}" in
+ "eui64")
+ ADDR_GEN_MODE=0
+ MATCH_REGEXP=""
+ MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ ;;
+ "none")
+ ADDR_GEN_MODE=1
+ MATCH_REGEXP=""
+ MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=1 # No link-local address should be generated
+ ;;
+ "stable-privacy")
+ ADDR_GEN_MODE=2
+ MATCH_REGEXP="stable-privacy"
+ MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ # Initialise stable_secret (required for stable-privacy mode)
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd"
+ ;;
+ "random")
+ ADDR_GEN_MODE=3
+ MATCH_REGEXP="stable-privacy"
+ MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ ;;
+ esac
+
+ # Check the IPv6 device configuration when it goes up
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
+ ip -netns "${NS0}" link set dev gretest up
+ check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}"
+
+ # Now disable link-local address generation
+ ip -netns "${NS0}" link set dev gretest down
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1
+ ip -netns "${NS0}" link set dev gretest up
+
+ # Check the IPv6 device configuration when link-local address
+ # generation is re-enabled while the device is already up
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
+ check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}"
+
+ ip -netns "${NS0}" link del dev gretest
+}
+
+test_gre4()
+{
+ local GRE_TYPE
+ local MODE
+
+ for GRE_TYPE in "gre" "gretap"; do
+ printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n"
+
+ for MODE in "eui64" "none" "stable-privacy" "random"; do
+ test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}"
+ done
+ done
+}
+
+test_gre6()
+{
+ local GRE_TYPE
+ local MODE
+
+ for GRE_TYPE in "ip6gre" "ip6gretap"; do
+ printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n"
+
+ for MODE in "eui64" "none" "stable-privacy" "random"; do
+ test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}"
+ done
+ done
+}
+
+usage()
+{
+ echo "Usage: $0 [-p]"
+ exit 1
+}
+
+while getopts :p o
+do
+ case $o in
+ p) PAUSE_ON_FAIL="yes";;
+ *) usage;;
+ esac
+done
+
+setup_ns NS0
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_basenet
+
+test_gre4
+test_gre6
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
deleted file mode 100644
index b2184847e388..000000000000
--- a/tools/testing/selftests/net/gro.c
+++ /dev/null
@@ -1,1328 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This testsuite provides conformance testing for GRO coalescing.
- *
- * Test cases:
- * 1.data
- * Data packets of the same size and same header setup with correct
- * sequence numbers coalesce. The one exception being the last data
- * packet coalesced: it can be smaller than the rest and coalesced
- * as long as it is in the same flow.
- * 2.ack
- * Pure ACK does not coalesce.
- * 3.flags
- * Specific test cases: no packets with PSH, SYN, URG, RST set will
- * be coalesced.
- * 4.tcp
- * Packets with incorrect checksum, non-consecutive seqno and
- * different TCP header options shouldn't coalesce. Nit: given that
- * some extension headers have paddings, such as timestamp, headers
- * that are padding differently would not be coalesced.
- * 5.ip:
- * Packets with different (ECN, TTL, TOS) header, ip options or
- * ip fragments (ipv6) shouldn't coalesce.
- * 6.large:
- * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
- *
- * MSS is defined as 4096 - header because if it is too small
- * (i.e. 1500 MTU - header), it will result in many packets,
- * increasing the "large" test case's flakiness. This is because
- * due to time sensitivity in the coalescing window, the receiver
- * may not coalesce all of the packets.
- *
- * Note the timing issue applies to all of the test cases, so some
- * flakiness is to be expected.
- *
- */
-
-#define _GNU_SOURCE
-
-#include <arpa/inet.h>
-#include <errno.h>
-#include <error.h>
-#include <getopt.h>
-#include <linux/filter.h>
-#include <linux/if_packet.h>
-#include <linux/ipv6.h>
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <netinet/in.h>
-#include <netinet/ip.h>
-#include <netinet/ip6.h>
-#include <netinet/tcp.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "../kselftest.h"
-
-#define DPORT 8000
-#define SPORT 1500
-#define PAYLOAD_LEN 100
-#define NUM_PACKETS 4
-#define START_SEQ 100
-#define START_ACK 100
-#define ETH_P_NONE 0
-#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
-#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
-#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
-#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
-#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
-#define MIN_EXTHDR_SIZE 8
-#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00"
-#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11"
-
-#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-
-static const char *addr6_src = "fdaa::2";
-static const char *addr6_dst = "fdaa::1";
-static const char *addr4_src = "192.168.1.200";
-static const char *addr4_dst = "192.168.1.100";
-static int proto = -1;
-static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
-static char *testname = "data";
-static char *ifname = "eth0";
-static char *smac = "aa:00:00:00:00:02";
-static char *dmac = "aa:00:00:00:00:01";
-static bool verbose;
-static bool tx_socket = true;
-static int tcp_offset = -1;
-static int total_hdr_len = -1;
-static int ethhdr_proto = -1;
-static const int num_flush_id_cases = 6;
-
-static void vlog(const char *fmt, ...)
-{
- va_list args;
-
- if (verbose) {
- va_start(args, fmt);
- vfprintf(stderr, fmt, args);
- va_end(args);
- }
-}
-
-static void setup_sock_filter(int fd)
-{
- const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
- const int ethproto_off = offsetof(struct ethhdr, h_proto);
- int optlen = 0;
- int ipproto_off, opt_ipproto_off;
- int next_off;
-
- if (proto == PF_INET)
- next_off = offsetof(struct iphdr, protocol);
- else
- next_off = offsetof(struct ipv6hdr, nexthdr);
- ipproto_off = ETH_HLEN + next_off;
-
- /* Overridden later if exthdrs are used: */
- opt_ipproto_off = ipproto_off;
-
- if (strcmp(testname, "ip") == 0) {
- if (proto == PF_INET)
- optlen = sizeof(struct ip_timestamp);
- else {
- BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
- BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
- BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
-
- /* same size for HBH and Fragment extension header types */
- optlen = MIN_EXTHDR_SIZE;
- opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
- + offsetof(struct ip6_ext, ip6e_nxt);
- }
- }
-
- /* this filter validates the following:
- * - packet is IPv4/IPv6 according to the running test.
- * - packet is TCP. Also handles the case of one extension header and then TCP.
- * - checks the packet tcp dport equals to DPORT. Also handles the case of one
- * extension header and then TCP.
- */
- struct sock_filter filter[] = {
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
- BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
- BPF_STMT(BPF_RET + BPF_K, 0),
- };
-
- struct sock_fprog bpf = {
- .len = ARRAY_SIZE(filter),
- .filter = filter,
- };
-
- if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
- error(1, errno, "error setting filter");
-}
-
-static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
-{
- uint16_t *words = data;
- int i;
-
- for (i = 0; i < len / 2; i++)
- sum += words[i];
- if (len & 1)
- sum += ((char *)data)[len - 1];
- return sum;
-}
-
-static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
-{
- sum = checksum_nofold(data, len, sum);
- while (sum > 0xFFFF)
- sum = (sum & 0xFFFF) + (sum >> 16);
- return ~sum;
-}
-
-static uint16_t tcp_checksum(void *buf, int payload_len)
-{
- struct pseudo_header6 {
- struct in6_addr saddr;
- struct in6_addr daddr;
- uint16_t protocol;
- uint16_t payload_len;
- } ph6;
- struct pseudo_header4 {
- struct in_addr saddr;
- struct in_addr daddr;
- uint16_t protocol;
- uint16_t payload_len;
- } ph4;
- uint32_t sum = 0;
-
- if (proto == PF_INET6) {
- if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1)
- error(1, errno, "inet_pton6 source ip pseudo");
- if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1)
- error(1, errno, "inet_pton6 dest ip pseudo");
- ph6.protocol = htons(IPPROTO_TCP);
- ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
-
- sum = checksum_nofold(&ph6, sizeof(ph6), 0);
- } else if (proto == PF_INET) {
- if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1)
- error(1, errno, "inet_pton source ip pseudo");
- if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1)
- error(1, errno, "inet_pton dest ip pseudo");
- ph4.protocol = htons(IPPROTO_TCP);
- ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
-
- sum = checksum_nofold(&ph4, sizeof(ph4), 0);
- }
-
- return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
-}
-
-static void read_MAC(uint8_t *mac_addr, char *mac)
-{
- if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
- &mac_addr[0], &mac_addr[1], &mac_addr[2],
- &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
- error(1, 0, "sscanf");
-}
-
-static void fill_datalinklayer(void *buf)
-{
- struct ethhdr *eth = buf;
-
- memcpy(eth->h_dest, dst_mac, ETH_ALEN);
- memcpy(eth->h_source, src_mac, ETH_ALEN);
- eth->h_proto = ethhdr_proto;
-}
-
-static void fill_networklayer(void *buf, int payload_len)
-{
- struct ipv6hdr *ip6h = buf;
- struct iphdr *iph = buf;
-
- if (proto == PF_INET6) {
- memset(ip6h, 0, sizeof(*ip6h));
-
- ip6h->version = 6;
- ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
- ip6h->nexthdr = IPPROTO_TCP;
- ip6h->hop_limit = 8;
- if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1)
- error(1, errno, "inet_pton source ip6");
- if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1)
- error(1, errno, "inet_pton dest ip6");
- } else if (proto == PF_INET) {
- memset(iph, 0, sizeof(*iph));
-
- iph->version = 4;
- iph->ihl = 5;
- iph->ttl = 8;
- iph->protocol = IPPROTO_TCP;
- iph->tot_len = htons(sizeof(struct tcphdr) +
- payload_len + sizeof(struct iphdr));
- iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
- if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1)
- error(1, errno, "inet_pton source ip");
- if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1)
- error(1, errno, "inet_pton dest ip");
- iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
- }
-}
-
-static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
- int payload_len, int fin)
-{
- struct tcphdr *tcph = buf;
-
- memset(tcph, 0, sizeof(*tcph));
-
- tcph->source = htons(SPORT);
- tcph->dest = htons(DPORT);
- tcph->seq = ntohl(START_SEQ + seq_offset);
- tcph->ack_seq = ntohl(START_ACK + ack_offset);
- tcph->ack = 1;
- tcph->fin = fin;
- tcph->doff = 5;
- tcph->window = htons(TCP_MAXWIN);
- tcph->urg_ptr = 0;
- tcph->check = tcp_checksum(tcph, payload_len);
-}
-
-static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
-{
- int ret = -1;
-
- ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
- if (ret == -1)
- error(1, errno, "sendto failure");
- if (ret != len)
- error(1, errno, "sendto wrong length");
-}
-
-static void create_packet(void *buf, int seq_offset, int ack_offset,
- int payload_len, int fin)
-{
- memset(buf, 0, total_hdr_len);
- memset(buf + total_hdr_len, 'a', payload_len);
- fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
- payload_len, fin);
- fill_networklayer(buf + ETH_HLEN, payload_len);
- fill_datalinklayer(buf);
-}
-
-/* send one extra flag, not first and not last pkt */
-static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
- int rst, int urg)
-{
- static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- int payload_len, pkt_size, flag, i;
- struct tcphdr *tcph;
-
- payload_len = PAYLOAD_LEN * psh;
- pkt_size = total_hdr_len + payload_len;
- flag = NUM_PACKETS / 2;
-
- create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
-
- tcph = (struct tcphdr *)(flag_buf + tcp_offset);
- tcph->psh = psh;
- tcph->syn = syn;
- tcph->rst = rst;
- tcph->urg = urg;
- tcph->check = 0;
- tcph->check = tcp_checksum(tcph, payload_len);
-
- for (i = 0; i < NUM_PACKETS + 1; i++) {
- if (i == flag) {
- write_packet(fd, flag_buf, pkt_size, daddr);
- continue;
- }
- create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
- }
-}
-
-/* Test for data of same length, smaller than previous
- * and of different lengths
- */
-static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
- int payload_len1, int payload_len2)
-{
- static char buf[ETH_HLEN + IP_MAXPACKET];
-
- create_packet(buf, 0, 0, payload_len1, 0);
- write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
- create_packet(buf, payload_len1, 0, payload_len2, 0);
- write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
-}
-
-/* If incoming segments make tracked segment length exceed
- * legal IP datagram length, do not coalesce
- */
-static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
-{
- static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
- static char last[TOTAL_HDR_LEN + MSS];
- static char new_seg[TOTAL_HDR_LEN + MSS];
- int i;
-
- for (i = 0; i < NUM_LARGE_PKT; i++)
- create_packet(pkts[i], i * MSS, 0, MSS, 0);
- create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
- create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
-
- for (i = 0; i < NUM_LARGE_PKT; i++)
- write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
- write_packet(fd, last, total_hdr_len + remainder, daddr);
- write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
-}
-
-/* Pure acks and dup acks don't coalesce */
-static void send_ack(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN];
-
- create_packet(buf, 0, 0, 0, 0);
- write_packet(fd, buf, total_hdr_len, daddr);
- write_packet(fd, buf, total_hdr_len, daddr);
- create_packet(buf, 0, 1, 0, 0);
- write_packet(fd, buf, total_hdr_len, daddr);
-}
-
-static void recompute_packet(char *buf, char *no_ext, int extlen)
-{
- struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
- struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
-
- memmove(buf, no_ext, total_hdr_len);
- memmove(buf + total_hdr_len + extlen,
- no_ext + total_hdr_len, PAYLOAD_LEN);
-
- tcphdr->doff = tcphdr->doff + (extlen / 4);
- tcphdr->check = 0;
- tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
- if (proto == PF_INET) {
- iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
- } else {
- ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
- }
-}
-
-static void tcp_write_options(char *buf, int kind, int ts)
-{
- struct tcp_option_ts {
- uint8_t kind;
- uint8_t len;
- uint32_t tsval;
- uint32_t tsecr;
- } *opt_ts = (void *)buf;
- struct tcp_option_window {
- uint8_t kind;
- uint8_t len;
- uint8_t shift;
- } *opt_window = (void *)buf;
-
- switch (kind) {
- case TCPOPT_NOP:
- buf[0] = TCPOPT_NOP;
- break;
- case TCPOPT_WINDOW:
- memset(opt_window, 0, sizeof(struct tcp_option_window));
- opt_window->kind = TCPOPT_WINDOW;
- opt_window->len = TCPOLEN_WINDOW;
- opt_window->shift = 0;
- break;
- case TCPOPT_TIMESTAMP:
- memset(opt_ts, 0, sizeof(struct tcp_option_ts));
- opt_ts->kind = TCPOPT_TIMESTAMP;
- opt_ts->len = TCPOLEN_TIMESTAMP;
- opt_ts->tsval = ts;
- opt_ts->tsecr = 0;
- break;
- default:
- error(1, 0, "unimplemented TCP option");
- break;
- }
-}
-
-/* TCP with options is always a permutation of {TS, NOP, NOP}.
- * Implement different orders to verify coalescing stops.
- */
-static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
-{
- switch (order) {
- case 0:
- tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
- tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
- tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
- TCPOPT_TIMESTAMP, ts);
- break;
- case 1:
- tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
- tcp_write_options(buf + total_hdr_len + 1,
- TCPOPT_TIMESTAMP, ts);
- tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
- TCPOPT_NOP, 0);
- break;
- case 2:
- tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
- tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
- TCPOPT_NOP, 0);
- tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
- TCPOPT_NOP, 0);
- break;
- default:
- error(1, 0, "unknown order");
- break;
- }
- recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
-}
-
-/* Packets with invalid checksum don't coalesce. */
-static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
- int pkt_size = total_hdr_len + PAYLOAD_LEN;
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- tcph->check = tcph->check - 1;
- write_packet(fd, buf, pkt_size, daddr);
-}
-
- /* Packets with non-consecutive sequence number don't coalesce.*/
-static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
- int pkt_size = total_hdr_len + PAYLOAD_LEN;
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- tcph->seq = ntohl(htonl(tcph->seq) + 1);
- tcph->check = 0;
- tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
- write_packet(fd, buf, pkt_size, daddr);
-}
-
- /* Packet with different timestamp option or different timestamps
- * don't coalesce.
- */
-static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
- int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt, buf, 0, 0);
- write_packet(fd, extpkt, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt, buf, 0, 0);
- write_packet(fd, extpkt, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt, buf, 100, 0);
- write_packet(fd, extpkt, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt, buf, 100, 1);
- write_packet(fd, extpkt, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt, buf, 100, 2);
- write_packet(fd, extpkt, pkt_size, daddr);
-}
-
-/* Packet with different tcp options don't coalesce. */
-static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
- static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
- int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
- int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt1, buf, 0, 0);
- write_packet(fd, extpkt1, extpkt1_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt1, buf, 0, 0);
- write_packet(fd, extpkt1, extpkt1_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
- tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
- tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
- recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
- write_packet(fd, extpkt2, extpkt2_size, daddr);
-}
-
-static void add_ipv4_ts_option(void *buf, void *optpkt)
-{
- struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
- int optlen = sizeof(struct ip_timestamp);
- struct iphdr *iph;
-
- if (optlen % 4)
- error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
-
- ts->ipt_code = IPOPT_TS;
- ts->ipt_len = optlen;
- ts->ipt_ptr = 5;
- ts->ipt_flg = IPOPT_TS_TSONLY;
-
- memcpy(optpkt, buf, tcp_offset);
- memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
- sizeof(struct tcphdr) + PAYLOAD_LEN);
-
- iph = (struct iphdr *)(optpkt + ETH_HLEN);
- iph->ihl = 5 + (optlen / 4);
- iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
-}
-
-static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload)
-{
- struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
- struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
- char *exthdr_payload_start = (char *)(exthdr + 1);
-
- exthdr->hdrlen = 0;
- exthdr->nexthdr = IPPROTO_TCP;
-
- memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr));
-
- memcpy(optpkt, buf, tcp_offset);
- memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
- sizeof(struct tcphdr) + PAYLOAD_LEN);
-
- iph->nexthdr = exthdr_type;
- iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
-}
-
-static void fix_ip4_checksum(struct iphdr *iph)
-{
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
-}
-
-static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
-{
- static char buf1[MAX_HDR_LEN + PAYLOAD_LEN];
- static char buf2[MAX_HDR_LEN + PAYLOAD_LEN];
- static char buf3[MAX_HDR_LEN + PAYLOAD_LEN];
- bool send_three = false;
- struct iphdr *iph1;
- struct iphdr *iph2;
- struct iphdr *iph3;
-
- iph1 = (struct iphdr *)(buf1 + ETH_HLEN);
- iph2 = (struct iphdr *)(buf2 + ETH_HLEN);
- iph3 = (struct iphdr *)(buf3 + ETH_HLEN);
-
- create_packet(buf1, 0, 0, PAYLOAD_LEN, 0);
- create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
-
- switch (tcase) {
- case 0: /* DF=1, Incrementing - should coalesce */
- iph1->frag_off |= htons(IP_DF);
- iph1->id = htons(8);
-
- iph2->frag_off |= htons(IP_DF);
- iph2->id = htons(9);
- break;
-
- case 1: /* DF=1, Fixed - should coalesce */
- iph1->frag_off |= htons(IP_DF);
- iph1->id = htons(8);
-
- iph2->frag_off |= htons(IP_DF);
- iph2->id = htons(8);
- break;
-
- case 2: /* DF=0, Incrementing - should coalesce */
- iph1->frag_off &= ~htons(IP_DF);
- iph1->id = htons(8);
-
- iph2->frag_off &= ~htons(IP_DF);
- iph2->id = htons(9);
- break;
-
- case 3: /* DF=0, Fixed - should not coalesce */
- iph1->frag_off &= ~htons(IP_DF);
- iph1->id = htons(8);
-
- iph2->frag_off &= ~htons(IP_DF);
- iph2->id = htons(8);
- break;
-
- case 4: /* DF=1, two packets incrementing, and one fixed - should
- * coalesce only the first two packets
- */
- iph1->frag_off |= htons(IP_DF);
- iph1->id = htons(8);
-
- iph2->frag_off |= htons(IP_DF);
- iph2->id = htons(9);
-
- iph3->frag_off |= htons(IP_DF);
- iph3->id = htons(9);
- send_three = true;
- break;
-
- case 5: /* DF=1, two packets fixed, and one incrementing - should
- * coalesce only the first two packets
- */
- iph1->frag_off |= htons(IP_DF);
- iph1->id = htons(8);
-
- iph2->frag_off |= htons(IP_DF);
- iph2->id = htons(8);
-
- iph3->frag_off |= htons(IP_DF);
- iph3->id = htons(9);
- send_three = true;
- break;
- }
-
- fix_ip4_checksum(iph1);
- fix_ip4_checksum(iph2);
- write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr);
- write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr);
-
- if (send_three) {
- fix_ip4_checksum(iph3);
- write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr);
- }
-}
-
-static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt)
-{
- for (int i = 0; i < num_flush_id_cases; i++) {
- sleep(1);
- send_flush_id_case(fd, daddr, i);
- sleep(1);
- write_packet(fd, fin_pkt, total_hdr_len, daddr);
- }
-}
-
-static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1);
- write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
- add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2);
- write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
-}
-
-/* IPv4 options shouldn't coalesce */
-static void send_ip_options(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
- int optlen = sizeof(struct ip_timestamp);
- int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
- add_ipv4_ts_option(buf, optpkt);
- write_packet(fd, optpkt, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
-}
-
-/* IPv4 fragments shouldn't coalesce */
-static void send_fragment4(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[IP_MAXPACKET];
- struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
- int pkt_size = total_hdr_len + PAYLOAD_LEN;
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, pkt_size, daddr);
-
- /* Once fragmented, packet would retain the total_len.
- * Tcp header is prepared as if rest of data is in follow-up frags,
- * but follow up frags aren't actually sent.
- */
- memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
- fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
- fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
- fill_datalinklayer(buf);
-
- iph->frag_off = htons(0x6000); // DF = 1, MF = 1
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
- write_packet(fd, buf, pkt_size, daddr);
-}
-
-/* IPv4 packets with different ttl don't coalesce.*/
-static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
-{
- int pkt_size = total_hdr_len + PAYLOAD_LEN;
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- iph->ttl = 7;
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
- write_packet(fd, buf, pkt_size, daddr);
-}
-
-/* Packets with different tos don't coalesce.*/
-static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
-{
- int pkt_size = total_hdr_len + PAYLOAD_LEN;
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- if (proto == PF_INET) {
- iph->tos = 1;
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
- } else if (proto == PF_INET6) {
- ip6h->priority = 0xf;
- }
- write_packet(fd, buf, pkt_size, daddr);
-}
-
-/* Packets with different ECN don't coalesce.*/
-static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
-{
- int pkt_size = total_hdr_len + PAYLOAD_LEN;
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- if (proto == PF_INET) {
- buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
- } else {
- buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
- }
- write_packet(fd, buf, pkt_size, daddr);
-}
-
-/* IPv6 fragments and packets with extensions don't coalesce.*/
-static void send_fragment6(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
- sizeof(struct ip6_frag)];
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
- struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
- int extlen = sizeof(struct ip6_frag);
- int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
- int extpkt_len = bufpkt_len + extlen;
- int i;
-
- for (i = 0; i < 2; i++) {
- create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, bufpkt_len, daddr);
- }
- sleep(1);
- create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
- memset(extpkt, 0, extpkt_len);
-
- ip6h->nexthdr = IPPROTO_FRAGMENT;
- ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
- frag->ip6f_nxt = IPPROTO_TCP;
-
- memcpy(extpkt, buf, tcp_offset);
- memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
- sizeof(struct tcphdr) + PAYLOAD_LEN);
- write_packet(fd, extpkt, extpkt_len, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, bufpkt_len, daddr);
-}
-
-static void bind_packetsocket(int fd)
-{
- struct sockaddr_ll daddr = {};
-
- daddr.sll_family = AF_PACKET;
- daddr.sll_protocol = ethhdr_proto;
- daddr.sll_ifindex = if_nametoindex(ifname);
- if (daddr.sll_ifindex == 0)
- error(1, errno, "if_nametoindex");
-
- if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
- error(1, errno, "could not bind socket");
-}
-
-static void set_timeout(int fd)
-{
- struct timeval timeout;
-
- timeout.tv_sec = 3;
- timeout.tv_usec = 0;
- if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
- sizeof(timeout)) < 0)
- error(1, errno, "cannot set timeout, setsockopt failed");
-}
-
-static void check_recv_pkts(int fd, int *correct_payload,
- int correct_num_pkts)
-{
- static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
- struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
- struct tcphdr *tcph;
- bool bad_packet = false;
- int tcp_ext_len = 0;
- int ip_ext_len = 0;
- int pkt_size = -1;
- int data_len = 0;
- int num_pkt = 0;
- int i;
-
- vlog("Expected {");
- for (i = 0; i < correct_num_pkts; i++)
- vlog("%d ", correct_payload[i]);
- vlog("}, Total %d packets\nReceived {", correct_num_pkts);
-
- while (1) {
- ip_ext_len = 0;
- pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
- if (pkt_size < 0)
- error(1, errno, "could not receive");
-
- if (iph->version == 4)
- ip_ext_len = (iph->ihl - 5) * 4;
- else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
- ip_ext_len = MIN_EXTHDR_SIZE;
-
- tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
-
- if (tcph->fin)
- break;
-
- tcp_ext_len = (tcph->doff - 5) * 4;
- data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
- /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
- * Ipv4/tcp packets without at least 6 bytes of data will be padded.
- * Packet sockets are protocol agnostic, and will not trim the padding.
- */
- if (pkt_size == ETH_ZLEN && iph->version == 4) {
- data_len = ntohs(iph->tot_len)
- - sizeof(struct tcphdr) - sizeof(struct iphdr);
- }
- vlog("%d ", data_len);
- if (data_len != correct_payload[num_pkt]) {
- vlog("[!=%d]", correct_payload[num_pkt]);
- bad_packet = true;
- }
- num_pkt++;
- }
- vlog("}, Total %d packets.\n", num_pkt);
- if (num_pkt != correct_num_pkts)
- error(1, 0, "incorrect number of packets");
- if (bad_packet)
- error(1, 0, "incorrect packet geometry");
-
- printf("Test succeeded\n\n");
-}
-
-static void gro_sender(void)
-{
- static char fin_pkt[MAX_HDR_LEN];
- struct sockaddr_ll daddr = {};
- int txfd = -1;
-
- txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
- if (txfd < 0)
- error(1, errno, "socket creation");
-
- memset(&daddr, 0, sizeof(daddr));
- daddr.sll_ifindex = if_nametoindex(ifname);
- if (daddr.sll_ifindex == 0)
- error(1, errno, "if_nametoindex");
- daddr.sll_family = AF_PACKET;
- memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
- daddr.sll_halen = ETH_ALEN;
- create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
-
- if (strcmp(testname, "data") == 0) {
- send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "ack") == 0) {
- send_ack(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "flags") == 0) {
- send_flags(txfd, &daddr, 1, 0, 0, 0);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_flags(txfd, &daddr, 0, 1, 0, 0);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_flags(txfd, &daddr, 0, 0, 1, 0);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_flags(txfd, &daddr, 0, 0, 0, 1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "tcp") == 0) {
- send_changed_checksum(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_changed_seq(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_changed_ts(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_diff_opt(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "ip") == 0) {
- send_changed_ECN(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_changed_tos(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- if (proto == PF_INET) {
- /* Modified packets may be received out of order.
- * Sleep function added to enforce test boundaries
- * so that fin pkts are not received prior to other pkts.
- */
- sleep(1);
- send_changed_ttl(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- send_ip_options(txfd, &daddr);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- send_fragment4(txfd, &daddr);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- test_flush_id(txfd, &daddr, fin_pkt);
- } else if (proto == PF_INET6) {
- sleep(1);
- send_fragment6(txfd, &daddr);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- /* send IPv6 packets with ext header with same payload */
- send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- /* send IPv6 packets with ext header with different payload */
- send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- }
- } else if (strcmp(testname, "large") == 0) {
- /* 20 is the difference between min iphdr size
- * and min ipv6hdr size. Like MAX_HDR_SIZE,
- * MAX_PAYLOAD is defined with the larger header of the two.
- */
- int offset = proto == PF_INET ? 20 : 0;
- int remainder = (MAX_PAYLOAD + offset) % MSS;
-
- send_large(txfd, &daddr, remainder);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_large(txfd, &daddr, remainder + 1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else {
- error(1, 0, "Unknown testcase");
- }
-
- if (close(txfd))
- error(1, errno, "socket close");
-}
-
-static void gro_receiver(void)
-{
- static int correct_payload[NUM_PACKETS];
- int rxfd = -1;
-
- rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
- if (rxfd < 0)
- error(1, 0, "socket creation");
- setup_sock_filter(rxfd);
- set_timeout(rxfd);
- bind_packetsocket(rxfd);
-
- memset(correct_payload, 0, sizeof(correct_payload));
-
- if (strcmp(testname, "data") == 0) {
- printf("pure data packet of same size: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("large data packets followed by a smaller one: ");
- correct_payload[0] = PAYLOAD_LEN * 1.5;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("small data packets followed by a larger one: ");
- correct_payload[0] = PAYLOAD_LEN / 2;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
- } else if (strcmp(testname, "ack") == 0) {
- printf("duplicate ack and pure ack: ");
- check_recv_pkts(rxfd, correct_payload, 3);
- } else if (strcmp(testname, "flags") == 0) {
- correct_payload[0] = PAYLOAD_LEN * 3;
- correct_payload[1] = PAYLOAD_LEN * 2;
-
- printf("psh flag ends coalescing: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = 0;
- correct_payload[2] = PAYLOAD_LEN * 2;
- printf("syn flag ends coalescing: ");
- check_recv_pkts(rxfd, correct_payload, 3);
-
- printf("rst flag ends coalescing: ");
- check_recv_pkts(rxfd, correct_payload, 3);
-
- printf("urg flag ends coalescing: ");
- check_recv_pkts(rxfd, correct_payload, 3);
- } else if (strcmp(testname, "tcp") == 0) {
- correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
- correct_payload[2] = PAYLOAD_LEN;
- correct_payload[3] = PAYLOAD_LEN;
-
- printf("changed checksum does not coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("Wrong Seq number doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("Different timestamp doesn't coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 4);
-
- printf("Different options doesn't coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 2);
- } else if (strcmp(testname, "ip") == 0) {
- correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
-
- printf("different ECN doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("different tos doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- if (proto == PF_INET) {
- printf("different ttl doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("ip options doesn't coalesce: ");
- correct_payload[2] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 3);
-
- printf("fragmented ip4 doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- /* is_atomic checks */
- printf("DF=1, Incrementing - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=1, Fixed - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=0, Incrementing - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=0, Fixed - should not coalesce: ");
- correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
- } else if (proto == PF_INET6) {
- /* GRO doesn't check for ipv6 hop limit when flushing.
- * Hence no corresponding test to the ipv4 case.
- */
- printf("fragmented ip6 doesn't coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = PAYLOAD_LEN;
- correct_payload[2] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 3);
-
- printf("ipv6 with ext header does coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("ipv6 with ext header with different payloads doesn't coalesce: ");
- correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
- }
- } else if (strcmp(testname, "large") == 0) {
- int offset = proto == PF_INET ? 20 : 0;
- int remainder = (MAX_PAYLOAD + offset) % MSS;
-
- correct_payload[0] = (MAX_PAYLOAD + offset);
- correct_payload[1] = remainder;
- printf("Shouldn't coalesce if exceed IP max pkt size: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- /* last segment sent individually, doesn't start new segment */
- correct_payload[0] = correct_payload[0] - remainder;
- correct_payload[1] = remainder + 1;
- correct_payload[2] = remainder + 1;
- check_recv_pkts(rxfd, correct_payload, 3);
- } else {
- error(1, 0, "Test case error, should never trigger");
- }
-
- if (close(rxfd))
- error(1, 0, "socket close");
-}
-
-static void parse_args(int argc, char **argv)
-{
- static const struct option opts[] = {
- { "daddr", required_argument, NULL, 'd' },
- { "dmac", required_argument, NULL, 'D' },
- { "iface", required_argument, NULL, 'i' },
- { "ipv4", no_argument, NULL, '4' },
- { "ipv6", no_argument, NULL, '6' },
- { "rx", no_argument, NULL, 'r' },
- { "saddr", required_argument, NULL, 's' },
- { "smac", required_argument, NULL, 'S' },
- { "test", required_argument, NULL, 't' },
- { "verbose", no_argument, NULL, 'v' },
- { 0, 0, 0, 0 }
- };
- int c;
-
- while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) {
- switch (c) {
- case '4':
- proto = PF_INET;
- ethhdr_proto = htons(ETH_P_IP);
- break;
- case '6':
- proto = PF_INET6;
- ethhdr_proto = htons(ETH_P_IPV6);
- break;
- case 'd':
- addr4_dst = addr6_dst = optarg;
- break;
- case 'D':
- dmac = optarg;
- break;
- case 'i':
- ifname = optarg;
- break;
- case 'r':
- tx_socket = false;
- break;
- case 's':
- addr4_src = addr6_src = optarg;
- break;
- case 'S':
- smac = optarg;
- break;
- case 't':
- testname = optarg;
- break;
- case 'v':
- verbose = true;
- break;
- default:
- error(1, 0, "%s invalid option %c\n", __func__, c);
- break;
- }
- }
-}
-
-int main(int argc, char **argv)
-{
- parse_args(argc, argv);
-
- if (proto == PF_INET) {
- tcp_offset = ETH_HLEN + sizeof(struct iphdr);
- total_hdr_len = tcp_offset + sizeof(struct tcphdr);
- } else if (proto == PF_INET6) {
- tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
- total_hdr_len = MAX_HDR_LEN;
- } else {
- error(1, 0, "Protocol family is not ipv4 or ipv6");
- }
-
- read_MAC(src_mac, smac);
- read_MAC(dst_mac, dmac);
-
- if (tx_socket)
- gro_sender();
- else
- gro_receiver();
-
- fprintf(stderr, "Gro::%s test passed.\n", testname);
- return 0;
-}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
deleted file mode 100755
index 02c21ff4ca81..000000000000
--- a/tools/testing/selftests/net/gro.sh
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly SERVER_MAC="aa:00:00:00:00:02"
-readonly CLIENT_MAC="aa:00:00:00:00:01"
-readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
-readonly PROTOS=("ipv4" "ipv6")
-dev=""
-test="all"
-proto="ipv4"
-
-run_test() {
- local server_pid=0
- local exit_code=0
- local protocol=$1
- local test=$2
- local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
- "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
-
- setup_ns
- # Each test is run 3 times to deflake, because given the receive timing,
- # not all packets that should coalesce will be considered in the same flow
- # on every try.
- for tries in {1..3}; do
- # Actual test starts here
- ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
- 1>>log.txt &
- server_pid=$!
- sleep 0.5 # to allow for socket init
- ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \
- 1>>log.txt
- wait "${server_pid}"
- exit_code=$?
- if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \
- ${exit_code} -ne 0 ]]; then
- echo "Ignoring errors due to slow environment" 1>&2
- exit_code=0
- fi
- if [[ "${exit_code}" -eq 0 ]]; then
- break;
- fi
- done
- cleanup_ns
- echo ${exit_code}
-}
-
-run_all_tests() {
- local failed_tests=()
- for proto in "${PROTOS[@]}"; do
- for test in "${TESTS[@]}"; do
- echo "running test ${proto} ${test}" >&2
- exit_code=$(run_test $proto $test)
- if [[ "${exit_code}" -ne 0 ]]; then
- failed_tests+=("${proto}_${test}")
- fi;
- done;
- done
- if [[ ${#failed_tests[@]} -ne 0 ]]; then
- echo "failed tests: ${failed_tests[*]}. \
- Please see log.txt for more logs"
- exit 1
- else
- echo "All Tests Succeeded!"
- fi;
-}
-
-usage() {
- echo "Usage: $0 \
- [-i <DEV>] \
- [-t data|ack|flags|tcp|ip|large] \
- [-p <ipv4|ipv6>]" 1>&2;
- exit 1;
-}
-
-while getopts "i:t:p:" opt; do
- case "${opt}" in
- i)
- dev="${OPTARG}"
- ;;
- t)
- test="${OPTARG}"
- ;;
- p)
- proto="${OPTARG}"
- ;;
- *)
- usage
- ;;
- esac
-done
-
-if [ -n "$dev" ]; then
- source setup_loopback.sh
-else
- source setup_veth.sh
-fi
-
-setup
-trap cleanup EXIT
-if [[ "${test}" == "all" ]]; then
- run_all_tests
-else
- run_test "${proto}" "${test}"
-fi;
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
index 884cd2cc0681..4b6afc0fe9f8 100644
--- a/tools/testing/selftests/net/hsr/Makefile
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -2,7 +2,11 @@
top_srcdir = ../../../../..
-TEST_PROGS := hsr_ping.sh hsr_redbox.sh
+TEST_PROGS := \
+ hsr_ping.sh \
+ hsr_redbox.sh \
+# end of TEST_PROGS
+
TEST_FILES += hsr_common.sh
include ../../lib.mk
diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config
index 241542441c51..205cc4d3d64b 100644
--- a/tools/testing/selftests/net/hsr/config
+++ b/tools/testing/selftests/net/hsr/config
@@ -1,5 +1,6 @@
+CONFIG_BRIDGE=y
+CONFIG_HSR=y
CONFIG_IPV6=y
CONFIG_NET_SCH_NETEM=m
-CONFIG_HSR=y
CONFIG_VETH=y
-CONFIG_BRIDGE=y
+CONFIG_VLAN_8021Q=m
diff --git a/tools/testing/selftests/net/hsr/hsr_common.sh b/tools/testing/selftests/net/hsr/hsr_common.sh
index 8e97b1f2e7e5..1dc882ac1c74 100644
--- a/tools/testing/selftests/net/hsr/hsr_common.sh
+++ b/tools/testing/selftests/net/hsr/hsr_common.sh
@@ -15,7 +15,7 @@ do_ping()
{
local netns="$1"
local connect_addr="$2"
- local ping_args="-q -c 2"
+ local ping_args="-q -c 2 -i 0.1"
if is_v6 "${connect_addr}"; then
$ipv6 || return 0
@@ -36,7 +36,7 @@ do_ping_long()
{
local netns="$1"
local connect_addr="$2"
- local ping_args="-q -c 10"
+ local ping_args="-q -c 10 -i 0.1"
if is_v6 "${connect_addr}"; then
$ipv6 || return 0
diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index f5d207fc770a..5a65f4f836be 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -175,6 +175,100 @@ setup_hsr_interfaces()
ip -net "$ns3" link set hsr3 up
}
+setup_vlan_interfaces() {
+ ip -net "$ns1" link add link hsr1 name hsr1.2 type vlan id 2
+ ip -net "$ns1" link add link hsr1 name hsr1.3 type vlan id 3
+ ip -net "$ns1" link add link hsr1 name hsr1.4 type vlan id 4
+ ip -net "$ns1" link add link hsr1 name hsr1.5 type vlan id 5
+
+ ip -net "$ns2" link add link hsr2 name hsr2.2 type vlan id 2
+ ip -net "$ns2" link add link hsr2 name hsr2.3 type vlan id 3
+ ip -net "$ns2" link add link hsr2 name hsr2.4 type vlan id 4
+ ip -net "$ns2" link add link hsr2 name hsr2.5 type vlan id 5
+
+ ip -net "$ns3" link add link hsr3 name hsr3.2 type vlan id 2
+ ip -net "$ns3" link add link hsr3 name hsr3.3 type vlan id 3
+ ip -net "$ns3" link add link hsr3 name hsr3.4 type vlan id 4
+ ip -net "$ns3" link add link hsr3 name hsr3.5 type vlan id 5
+
+ ip -net "$ns1" addr add 100.64.2.1/24 dev hsr1.2
+ ip -net "$ns1" addr add 100.64.3.1/24 dev hsr1.3
+ ip -net "$ns1" addr add 100.64.4.1/24 dev hsr1.4
+ ip -net "$ns1" addr add 100.64.5.1/24 dev hsr1.5
+
+ ip -net "$ns2" addr add 100.64.2.2/24 dev hsr2.2
+ ip -net "$ns2" addr add 100.64.3.2/24 dev hsr2.3
+ ip -net "$ns2" addr add 100.64.4.2/24 dev hsr2.4
+ ip -net "$ns2" addr add 100.64.5.2/24 dev hsr2.5
+
+ ip -net "$ns3" addr add 100.64.2.3/24 dev hsr3.2
+ ip -net "$ns3" addr add 100.64.3.3/24 dev hsr3.3
+ ip -net "$ns3" addr add 100.64.4.3/24 dev hsr3.4
+ ip -net "$ns3" addr add 100.64.5.3/24 dev hsr3.5
+
+ ip -net "$ns1" link set dev hsr1.2 up
+ ip -net "$ns1" link set dev hsr1.3 up
+ ip -net "$ns1" link set dev hsr1.4 up
+ ip -net "$ns1" link set dev hsr1.5 up
+
+ ip -net "$ns2" link set dev hsr2.2 up
+ ip -net "$ns2" link set dev hsr2.3 up
+ ip -net "$ns2" link set dev hsr2.4 up
+ ip -net "$ns2" link set dev hsr2.5 up
+
+ ip -net "$ns3" link set dev hsr3.2 up
+ ip -net "$ns3" link set dev hsr3.3 up
+ ip -net "$ns3" link set dev hsr3.4 up
+ ip -net "$ns3" link set dev hsr3.5 up
+
+}
+
+hsr_vlan_ping() {
+ do_ping "$ns1" 100.64.2.2
+ do_ping "$ns1" 100.64.3.2
+ do_ping "$ns1" 100.64.4.2
+ do_ping "$ns1" 100.64.5.2
+
+ do_ping "$ns1" 100.64.2.3
+ do_ping "$ns1" 100.64.3.3
+ do_ping "$ns1" 100.64.4.3
+ do_ping "$ns1" 100.64.5.3
+
+ do_ping "$ns2" 100.64.2.1
+ do_ping "$ns2" 100.64.3.1
+ do_ping "$ns2" 100.64.4.1
+ do_ping "$ns2" 100.64.5.1
+
+ do_ping "$ns2" 100.64.2.3
+ do_ping "$ns2" 100.64.3.3
+ do_ping "$ns2" 100.64.4.3
+ do_ping "$ns2" 100.64.5.3
+
+ do_ping "$ns3" 100.64.2.1
+ do_ping "$ns3" 100.64.3.1
+ do_ping "$ns3" 100.64.4.1
+ do_ping "$ns3" 100.64.5.1
+
+ do_ping "$ns3" 100.64.2.2
+ do_ping "$ns3" 100.64.3.2
+ do_ping "$ns3" 100.64.4.2
+ do_ping "$ns3" 100.64.5.2
+}
+
+run_vlan_tests() {
+ vlan_challenged_hsr1=$(ip net exec "$ns1" ethtool -k hsr1 | grep "vlan-challenged" | awk '{print $2}')
+ vlan_challenged_hsr2=$(ip net exec "$ns2" ethtool -k hsr2 | grep "vlan-challenged" | awk '{print $2}')
+ vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}')
+
+ if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then
+ echo "INFO: Running VLAN tests"
+ setup_vlan_interfaces
+ hsr_vlan_ping
+ else
+ echo "INFO: Not Running VLAN tests as the device does not support VLAN"
+ fi
+}
+
check_prerequisites
setup_ns ns1 ns2 ns3
@@ -183,9 +277,13 @@ trap cleanup_all_ns EXIT
setup_hsr_interfaces 0
do_complete_ping_test
+run_vlan_tests
+
setup_ns ns1 ns2 ns3
setup_hsr_interfaces 1
do_complete_ping_test
+run_vlan_tests
+
exit $ret
diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings
new file mode 100644
index 000000000000..0fbc037f2aa8
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/settings
@@ -0,0 +1 @@
+timeout=50
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index d6f0e449c029..b13c89a99ecb 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -178,8 +178,6 @@ setup()
else
ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
- ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
- ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
index 76e604e4810e..7bfeeb133705 100644
--- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
@@ -106,14 +106,14 @@ static void do_tx(int domain, int type, int protocol)
ret = io_uring_queue_init(512, &ring, 0);
if (ret)
- error(1, ret, "io_uring: queue init");
+ error(1, -ret, "io_uring: queue init");
iov.iov_base = payload;
iov.iov_len = cfg_payload_len;
ret = io_uring_register_buffers(&ring, &iov, 1);
if (ret)
- error(1, ret, "io_uring: buffer registration");
+ error(1, -ret, "io_uring: buffer registration");
tstop = gettimeofday_ms() + cfg_runtime_ms;
do {
@@ -149,24 +149,24 @@ static void do_tx(int domain, int type, int protocol)
ret = io_uring_submit(&ring);
if (ret != cfg_nr_reqs)
- error(1, ret, "submit");
+ error(1, -ret, "submit");
if (cfg_cork)
do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
for (i = 0; i < cfg_nr_reqs; i++) {
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret)
- error(1, ret, "wait cqe");
+ error(1, -ret, "wait cqe");
if (cqe->user_data != NONZC_TAG &&
cqe->user_data != ZC_TAG)
- error(1, -EINVAL, "invalid cqe->user_data");
+ error(1, EINVAL, "invalid cqe->user_data");
if (cqe->flags & IORING_CQE_F_NOTIF) {
if (cqe->flags & IORING_CQE_F_MORE)
- error(1, -EINVAL, "invalid notif flags");
+ error(1, EINVAL, "invalid notif flags");
if (compl_cqes <= 0)
- error(1, -EINVAL, "notification mismatch");
+ error(1, EINVAL, "notification mismatch");
compl_cqes--;
i--;
io_uring_cqe_seen(&ring);
@@ -174,14 +174,14 @@ static void do_tx(int domain, int type, int protocol)
}
if (cqe->flags & IORING_CQE_F_MORE) {
if (cqe->user_data != ZC_TAG)
- error(1, cqe->res, "unexpected F_MORE");
+ error(1, -cqe->res, "unexpected F_MORE");
compl_cqes++;
}
if (cqe->res >= 0) {
packets++;
bytes += cqe->res;
} else if (cqe->res != -EAGAIN) {
- error(1, cqe->res, "send failed");
+ error(1, -cqe->res, "send failed");
}
io_uring_cqe_seen(&ring);
}
@@ -190,11 +190,11 @@ static void do_tx(int domain, int type, int protocol)
while (compl_cqes) {
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret)
- error(1, ret, "wait cqe");
+ error(1, -ret, "wait cqe");
if (cqe->flags & IORING_CQE_F_MORE)
- error(1, -EINVAL, "invalid notif flags");
+ error(1, EINVAL, "invalid notif flags");
if (!(cqe->flags & IORING_CQE_F_NOTIF))
- error(1, -EINVAL, "missing notif flag");
+ error(1, EINVAL, "missing notif flag");
io_uring_cqe_seen(&ring);
compl_cqes--;
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
index 12491850ae98..845c26dd01a9 100755
--- a/tools/testing/selftests/net/ioam6.sh
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -3,119 +3,106 @@
#
# Author: Justin Iurman <justin.iurman@uliege.be>
#
-# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data
-# consistency directly inside packets on the receiver side. Tests are divided
-# into three categories: OUTPUT (evaluates the IOAM processing by the sender),
-# INPUT (evaluates the IOAM processing by a receiver) and GLOBAL (evaluates
-# wider use cases that do not fall into the other two categories). Both OUTPUT
-# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL
-# tests use the entire three-node topology (alpha, beta, gamma). Each test is
-# documented inside its own handler in the code below.
+# This script evaluates IOAM for IPv6 by checking local IOAM configurations and
+# IOAM data inside packets. There are three categories of tests: LOCAL, OUTPUT,
+# and INPUT. The former (LOCAL) checks all IOAM related configurations locally
+# without sending packets. OUTPUT tests verify the processing of an IOAM
+# encapsulating node, while INPUT tests verify the processing of an IOAM transit
+# node. Both OUTPUT and INPUT tests send packets. Each test is documented inside
+# its own handler.
#
-# An IOAM domain is configured from Alpha to Gamma but not on the reverse path.
-# When either Beta or Gamma is the destination (depending on the test category),
-# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop.
+# The topology used for OUTPUT and INPUT tests is made of three nodes:
+# - Alpha (the IOAM encapsulating node)
+# - Beta (the IOAM transit node)
+# - Gamma (the receiver) **
#
+# An IOAM domain is configured from Alpha to Beta, but not on the reverse path.
+# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop.
#
-# +-------------------+ +-------------------+
-# | | | |
-# | Alpha netns | | Gamma netns |
-# | | | |
-# | +-------------+ | | +-------------+ |
-# | | veth0 | | | | veth0 | |
-# | | db01::2/64 | | | | db02::2/64 | |
-# | +-------------+ | | +-------------+ |
-# | . | | . |
-# +-------------------+ +-------------------+
-# . .
-# . .
-# . .
-# +----------------------------------------------------+
-# | . . |
-# | +-------------+ +-------------+ |
-# | | veth0 | | veth1 | |
-# | | db01::1/64 | ................ | db02::1/64 | |
-# | +-------------+ +-------------+ |
-# | |
-# | Beta netns |
-# | |
-# +----------------------------------------------------+
+# ** Gamma is required because ioam6_parser.c uses a packet socket and we need
+# to see IOAM data inserted by the very last node (Beta), which would happen
+# _after_ we get a copy of the packet on Beta. Note that using an
+# IPv6 raw socket with IPV6_RECVHOPOPTS on Beta would not be enough: we also
+# need to access the IPv6 header to check some fields (e.g., source and
+# destination addresses), which is not possible in that case. As a
+# consequence, we need Gamma as a receiver to run ioam6_parser.c which uses a
+# packet socket.
#
#
+# +-----------------------+ +-----------------------+
+# | | | |
+# | Alpha netns | | Gamma netns |
+# | | | |
+# | +-------------------+ | | +-------------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 2001:db8:1::2/64 | | | | 2001:db8:2::2/64 | |
+# | +-------------------+ | | +-------------------+ |
+# | . | | . |
+# +-----------.-----------+ +-----------.-----------+
+# . .
+# . .
+# . .
+# +-----------.----------------------------------.-----------+
+# | . . |
+# | +-------------------+ +-------------------+ |
+# | | veth0 | | veth1 | |
+# | | 2001:db8:1::1/64 | ............ | 2001:db8:2::1/64 | |
+# | +-------------------+ +-------------------+ |
+# | |
+# | Beta netns |
+# | |
+# +----------------------------------------------------------+
#
-# =============================================================
-# | Alpha - IOAM configuration |
-# +===========================================================+
-# | Node ID | 1 |
-# +-----------------------------------------------------------+
-# | Node Wide ID | 11111111 |
-# +-----------------------------------------------------------+
-# | Ingress ID | 0xffff (default value) |
-# +-----------------------------------------------------------+
-# | Ingress Wide ID | 0xffffffff (default value) |
-# +-----------------------------------------------------------+
-# | Egress ID | 101 |
-# +-----------------------------------------------------------+
-# | Egress Wide ID | 101101 |
-# +-----------------------------------------------------------+
-# | Namespace Data | 0xdeadbee0 |
-# +-----------------------------------------------------------+
-# | Namespace Wide Data | 0xcafec0caf00dc0de |
-# +-----------------------------------------------------------+
-# | Schema ID | 777 |
-# +-----------------------------------------------------------+
-# | Schema Data | something that will be 4n-aligned |
-# +-----------------------------------------------------------+
#
#
-# =============================================================
-# | Beta - IOAM configuration |
-# +===========================================================+
-# | Node ID | 2 |
-# +-----------------------------------------------------------+
-# | Node Wide ID | 22222222 |
-# +-----------------------------------------------------------+
-# | Ingress ID | 201 |
-# +-----------------------------------------------------------+
-# | Ingress Wide ID | 201201 |
-# +-----------------------------------------------------------+
-# | Egress ID | 202 |
-# +-----------------------------------------------------------+
-# | Egress Wide ID | 202202 |
-# +-----------------------------------------------------------+
-# | Namespace Data | 0xdeadbee1 |
-# +-----------------------------------------------------------+
-# | Namespace Wide Data | 0xcafec0caf11dc0de |
-# +-----------------------------------------------------------+
-# | Schema ID | 666 |
-# +-----------------------------------------------------------+
-# | Schema Data | Hello there -Obi |
-# +-----------------------------------------------------------+
+# +==========================================================+
+# | Alpha - IOAM configuration |
+# +=====================+====================================+
+# | Node ID | 1 |
+# +---------------------+------------------------------------+
+# | Node Wide ID | 11111111 |
+# +---------------------+------------------------------------+
+# | Ingress ID | 0xffff (default value) |
+# +---------------------+------------------------------------+
+# | Ingress Wide ID | 0xffffffff (default value) |
+# +---------------------+------------------------------------+
+# | Egress ID | 101 |
+# +---------------------+------------------------------------+
+# | Egress Wide ID | 101101 |
+# +---------------------+------------------------------------+
+# | Namespace Data | 0xdeadbeef |
+# +---------------------+------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf00dc0de |
+# +---------------------+------------------------------------+
+# | Schema ID | 777 |
+# +---------------------+------------------------------------+
+# | Schema Data | something that will be 4n-aligned |
+# +---------------------+------------------------------------+
#
#
-# =============================================================
-# | Gamma - IOAM configuration |
-# +===========================================================+
-# | Node ID | 3 |
-# +-----------------------------------------------------------+
-# | Node Wide ID | 33333333 |
-# +-----------------------------------------------------------+
-# | Ingress ID | 301 |
-# +-----------------------------------------------------------+
-# | Ingress Wide ID | 301301 |
-# +-----------------------------------------------------------+
-# | Egress ID | 0xffff (default value) |
-# +-----------------------------------------------------------+
-# | Egress Wide ID | 0xffffffff (default value) |
-# +-----------------------------------------------------------+
-# | Namespace Data | 0xdeadbee2 |
-# +-----------------------------------------------------------+
-# | Namespace Wide Data | 0xcafec0caf22dc0de |
-# +-----------------------------------------------------------+
-# | Schema ID | 0xffffff (= None) |
-# +-----------------------------------------------------------+
-# | Schema Data | |
-# +-----------------------------------------------------------+
+# +==========================================================+
+# | Beta - IOAM configuration |
+# +=====================+====================================+
+# | Node ID | 2 |
+# +---------------------+------------------------------------+
+# | Node Wide ID | 22222222 |
+# +---------------------+------------------------------------+
+# | Ingress ID | 201 |
+# +---------------------+------------------------------------+
+# | Ingress Wide ID | 201201 |
+# +---------------------+------------------------------------+
+# | Egress ID | 202 |
+# +---------------------+------------------------------------+
+# | Egress Wide ID | 202202 |
+# +---------------------+------------------------------------+
+# | Namespace Data | 0xffffffff (default value) |
+# +---------------------+------------------------------------+
+# | Namespace Wide Data | 0xffffffffffffffff (default value) |
+# +---------------------+------------------------------------+
+# | Schema ID | 0xffffff (= None) |
+# +---------------------+------------------------------------+
+# | Schema Data | |
+# +---------------------+------------------------------------+
source lib.sh
@@ -128,64 +115,69 @@ source lib.sh
################################################################################
ALPHA=(
- 1 # ID
- 11111111 # Wide ID
- 0xffff # Ingress ID
- 0xffffffff # Ingress Wide ID
- 101 # Egress ID
- 101101 # Egress Wide ID
- 0xdeadbee0 # Namespace Data
- 0xcafec0caf00dc0de # Namespace Wide Data
- 777 # Schema ID (0xffffff = None)
- "something that will be 4n-aligned" # Schema Data
+ 1 # ID
+ 11111111 # Wide ID
+ 0xffff # Ingress ID (default value)
+ 0xffffffff # Ingress Wide ID (default value)
+ 101 # Egress ID
+ 101101 # Egress Wide ID
+ 0xdeadbeef # Namespace Data
+ 0xcafec0caf00dc0de # Namespace Wide Data
+ 777 # Schema ID
+ "something that will be 4n-aligned" # Schema Data
)
BETA=(
- 2
- 22222222
- 201
- 201201
- 202
- 202202
- 0xdeadbee1
- 0xcafec0caf11dc0de
- 666
- "Hello there -Obi"
+ 2 # ID
+ 22222222 # Wide ID
+ 201 # Ingress ID
+ 201201 # Ingress Wide ID
+ 202 # Egress ID
+ 202202 # Egress Wide ID
+ 0xffffffff # Namespace Data (empty value)
+ 0xffffffffffffffff # Namespace Wide Data (empty value)
+ 0xffffff # Schema ID (empty value)
+ "" # Schema Data (empty value)
)
-GAMMA=(
- 3
- 33333333
- 301
- 301301
- 0xffff
- 0xffffffff
- 0xdeadbee2
- 0xcafec0caf22dc0de
- 0xffffff
- ""
-)
+TESTS_LOCAL="
+ local_sysctl_ioam_id
+ local_sysctl_ioam_id_wide
+ local_sysctl_ioam_intf_id
+ local_sysctl_ioam_intf_id_wide
+ local_sysctl_ioam_intf_enabled
+ local_ioam_namespace
+ local_ioam_schema
+ local_ioam_schema_namespace
+ local_route_ns
+ local_route_tunsrc
+ local_route_tundst
+ local_route_trace_type
+ local_route_trace_size
+ local_route_trace_type_bits
+ local_route_trace_size_values
+"
TESTS_OUTPUT="
- out_undef_ns
- out_no_room
- out_bits
- out_full_supp_trace
+ output_undef_ns
+ output_no_room
+ output_no_room_oss
+ output_bits
+ output_sizes
+ output_full_supp_trace
"
TESTS_INPUT="
- in_undef_ns
- in_no_room
- in_oflag
- in_bits
- in_full_supp_trace
+ input_undef_ns
+ input_no_room
+ input_no_room_oss
+ input_disabled
+ input_oflag
+ input_bits
+ input_sizes
+ input_full_supp_trace
"
-TESTS_GLOBAL="
- fwd_full_supp_trace
-"
-
-
################################################################################
# #
# LIBRARY #
@@ -194,66 +186,64 @@ TESTS_GLOBAL="
check_kernel_compatibility()
{
- setup_ns ioam_tmp_node
- ip link add name veth0 netns $ioam_tmp_node type veth \
- peer name veth1 netns $ioam_tmp_node
+ setup_ns ioam_tmp_node &>/dev/null
+ local ret=$?
- ip -netns $ioam_tmp_node link set veth0 up
- ip -netns $ioam_tmp_node link set veth1 up
+ ip link add name veth0 netns $ioam_tmp_node type veth \
+ peer name veth1 netns $ioam_tmp_node &>/dev/null
+ ret=$((ret + $?))
- ip -netns $ioam_tmp_node ioam namespace add 0
- ns_ad=$?
+ ip -netns $ioam_tmp_node link set veth0 up &>/dev/null
+ ret=$((ret + $?))
- ip -netns $ioam_tmp_node ioam namespace show | grep -q "namespace 0"
- ns_sh=$?
+ ip -netns $ioam_tmp_node link set veth1 up &>/dev/null
+ ret=$((ret + $?))
- if [[ $ns_ad != 0 || $ns_sh != 0 ]]
+ if [ $ret != 0 ]
then
- echo "SKIP: kernel version probably too old, missing ioam support"
- ip link del veth0 2>/dev/null || true
- cleanup_ns $ioam_tmp_node || true
+ echo "SKIP: Setup failed."
+ cleanup_ns $ioam_tmp_node
exit $ksft_skip
fi
- ip -netns $ioam_tmp_node route add db02::/64 encap ioam6 mode inline \
- trace prealloc type 0x800000 ns 0 size 4 dev veth0
- tr_ad=$?
+ ip -netns $ioam_tmp_node route add 2001:db8:2::/64 \
+ encap ioam6 trace prealloc type 0x800000 ns 0 size 4 dev veth0 &>/dev/null
+ ret=$?
- ip -netns $ioam_tmp_node -6 route | grep -q "encap ioam6"
- tr_sh=$?
+ ip -netns $ioam_tmp_node -6 route 2>/dev/null | grep -q "encap ioam6"
+ ret=$((ret + $?))
- if [[ $tr_ad != 0 || $tr_sh != 0 ]]
+ if [ $ret != 0 ]
then
- echo "SKIP: cannot attach an ioam trace to a route, did you compile" \
- "without CONFIG_IPV6_IOAM6_LWTUNNEL?"
- ip link del veth0 2>/dev/null || true
- cleanup_ns $ioam_tmp_node || true
+ echo "SKIP: Cannot attach an IOAM trace to a route. Was your kernel" \
+ "compiled without CONFIG_IPV6_IOAM6_LWTUNNEL? Are you running an" \
+ "old kernel? Are you using an old version of iproute2?"
+ cleanup_ns $ioam_tmp_node
exit $ksft_skip
fi
- ip link del veth0 2>/dev/null || true
- cleanup_ns $ioam_tmp_node || true
+ cleanup_ns $ioam_tmp_node
- lsmod | grep -q "ip6_tunnel"
+ lsmod 2>/dev/null | grep -q "ip6_tunnel"
ip6tnl_loaded=$?
- if [ $ip6tnl_loaded = 0 ]
+ if [ $ip6tnl_loaded == 0 ]
then
encap_tests=0
else
modprobe ip6_tunnel &>/dev/null
- lsmod | grep -q "ip6_tunnel"
+ lsmod 2>/dev/null | grep -q "ip6_tunnel"
encap_tests=$?
if [ $encap_tests != 0 ]
then
- ip a | grep -q "ip6tnl0"
+ ip a 2>/dev/null | grep -q "ip6tnl0"
encap_tests=$?
if [ $encap_tests != 0 ]
then
echo "Note: ip6_tunnel not found neither as a module nor inside the" \
- "kernel, tests that require it (encap mode) will be omitted"
+ "kernel. Any tests that require it will be skipped."
fi
fi
fi
@@ -261,477 +251,1400 @@ check_kernel_compatibility()
cleanup()
{
- ip link del ioam-veth-alpha 2>/dev/null || true
- ip link del ioam-veth-gamma 2>/dev/null || true
-
- cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma || true
+ cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma
if [ $ip6tnl_loaded != 0 ]
then
- modprobe -r ip6_tunnel 2>/dev/null || true
+ modprobe -r ip6_tunnel &>/dev/null
fi
}
setup()
{
- setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma
+ setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma &>/dev/null
ip link add name ioam-veth-alpha netns $ioam_node_alpha type veth \
- peer name ioam-veth-betaL netns $ioam_node_beta
+ peer name ioam-veth-betaL netns $ioam_node_beta &>/dev/null
ip link add name ioam-veth-betaR netns $ioam_node_beta type veth \
- peer name ioam-veth-gamma netns $ioam_node_gamma
-
- ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0
- ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0
- ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1
- ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0
-
- ip -netns $ioam_node_alpha addr add db01::2/64 dev veth0
- ip -netns $ioam_node_alpha link set veth0 up
- ip -netns $ioam_node_alpha link set lo up
- ip -netns $ioam_node_alpha route add db02::/64 via db01::1 dev veth0
- ip -netns $ioam_node_alpha route del db01::/64
- ip -netns $ioam_node_alpha route add db01::/64 dev veth0
-
- ip -netns $ioam_node_beta addr add db01::1/64 dev veth0
- ip -netns $ioam_node_beta addr add db02::1/64 dev veth1
- ip -netns $ioam_node_beta link set veth0 up
- ip -netns $ioam_node_beta link set veth1 up
- ip -netns $ioam_node_beta link set lo up
-
- ip -netns $ioam_node_gamma addr add db02::2/64 dev veth0
- ip -netns $ioam_node_gamma link set veth0 up
- ip -netns $ioam_node_gamma link set lo up
- ip -netns $ioam_node_gamma route add db01::/64 via db02::1 dev veth0
-
- # - IOAM config -
- ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]}
- ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]}
- ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]}
- ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]}
- ip -netns $ioam_node_alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]}
- ip -netns $ioam_node_alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}"
- ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]}
-
- ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.all.forwarding=1
- ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]}
- ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]}
- ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
- ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]}
- ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]}
- ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]}
- ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]}
- ip -netns $ioam_node_beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]}
- ip -netns $ioam_node_beta ioam schema add ${BETA[8]} "${BETA[9]}"
- ip -netns $ioam_node_beta ioam namespace set 123 schema ${BETA[8]}
-
- ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]}
- ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]}
- ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
- ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]}
- ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]}
- ip -netns $ioam_node_gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]}
+ peer name ioam-veth-gamma netns $ioam_node_gamma &>/dev/null
+
+ ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 &>/dev/null
+ ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 &>/dev/null
+ ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null
+ ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null
+
+ ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null
+ ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null
+ ip -netns $ioam_node_alpha link set veth0 up &>/dev/null
+ ip -netns $ioam_node_alpha link set lo up &>/dev/null
+ ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ ip -netns $ioam_node_beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null
+ ip -netns $ioam_node_beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null
+ ip -netns $ioam_node_beta link set veth0 up &>/dev/null
+ ip -netns $ioam_node_beta link set veth1 up &>/dev/null
+ ip -netns $ioam_node_beta link set lo up &>/dev/null
+
+ ip -netns $ioam_node_gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null
+ ip -netns $ioam_node_gamma link set veth0 up &>/dev/null
+ ip -netns $ioam_node_gamma link set lo up &>/dev/null
+ ip -netns $ioam_node_gamma route add 2001:db8:1::/64 \
+ via 2001:db8:2::1 dev veth0 &>/dev/null
+
+ # - Alpha: IOAM config -
+ ip netns exec $ioam_node_alpha \
+ sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} &>/dev/null
+ ip netns exec $ioam_node_alpha \
+ sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} &>/dev/null
+ ip netns exec $ioam_node_alpha \
+ sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} &>/dev/null
+ ip netns exec $ioam_node_alpha \
+ sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} &>/dev/null
+ ip -netns $ioam_node_alpha \
+ ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} &>/dev/null
+ ip -netns $ioam_node_alpha \
+ ioam schema add ${ALPHA[8]} "${ALPHA[9]}" &>/dev/null
+ ip -netns $ioam_node_alpha \
+ ioam namespace set 123 schema ${ALPHA[8]} &>/dev/null
+
+ # - Beta: IOAM config -
+ ip netns exec $ioam_node_beta \
+ sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null
+ ip netns exec $ioam_node_beta \
+ sysctl -wq net.ipv6.ioam6_id=${BETA[0]} &>/dev/null
+ ip netns exec $ioam_node_beta \
+ sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} &>/dev/null
+ ip netns exec $ioam_node_beta \
+ sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null
+ ip netns exec $ioam_node_beta \
+ sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} &>/dev/null
+ ip netns exec $ioam_node_beta \
+ sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} &>/dev/null
+ ip netns exec $ioam_node_beta \
+ sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} &>/dev/null
+ ip netns exec $ioam_node_beta \
+ sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} &>/dev/null
+ ip -netns $ioam_node_beta ioam namespace add 123 &>/dev/null
sleep 1
- ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 db02::2 &>/dev/null
+ ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null
if [ $? != 0 ]
then
- echo "Setup FAILED"
- cleanup &>/dev/null
- exit 0
+ echo "SKIP: Setup failed."
+ cleanup
+ exit $ksft_skip
fi
}
log_test_passed()
{
- local desc=$1
- printf "TEST: %-60s [ OK ]\n" "${desc}"
+ printf " - TEST: %-57s [ OK ]\n" "$1"
+ npassed=$((npassed+1))
}
-log_test_failed()
+log_test_skipped()
{
- local desc=$1
- printf "TEST: %-60s [FAIL]\n" "${desc}"
+ printf " - TEST: %-57s [SKIP]\n" "$1"
+ nskipped=$((nskipped+1))
}
-log_results()
+log_test_failed()
{
- echo "- Tests passed: ${npassed}"
- echo "- Tests failed: ${nfailed}"
+ printf " - TEST: %-57s [FAIL]\n" "$1"
+ nfailed=$((nfailed+1))
}
run_test()
{
local name=$1
local desc=$2
- local node_src=$3
- local node_dst=$4
- local ip6_dst=$5
- local trace_type=$6
- local ioam_ns=$7
- local type=$8
-
- ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type &
+ local ip6_src=$3
+ local trace_type=$4
+ local trace_size=$5
+ local ioam_ns=$6
+ local type=$7
+
+ ip netns exec $ioam_node_gamma \
+ ./ioam6_parser veth0 $name $ip6_src 2001:db8:2::2 \
+ $trace_type $trace_size $ioam_ns $type &
local spid=$!
sleep 0.1
- ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null
+ ip netns exec $ioam_node_alpha ping6 -t 64 -c 1 -W 1 2001:db8:2::2 &>/dev/null
if [ $? != 0 ]
then
- nfailed=$((nfailed+1))
log_test_failed "${desc}"
kill -2 $spid &>/dev/null
else
wait $spid
- if [ $? = 0 ]
- then
- npassed=$((npassed+1))
- log_test_passed "${desc}"
- else
- nfailed=$((nfailed+1))
- log_test_failed "${desc}"
- fi
+ [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
fi
}
run()
{
+ local test
+
+ echo
+ printf "+"
+ printf "%0.s-" {1..72}
+ printf "+"
+ echo
+ printf "| %-28s LOCAL tests %-29s |"
echo
- printf "%0.s-" {1..74}
+ printf "+"
+ printf "%0.s-" {1..72}
+ printf "+"
echo
- echo "OUTPUT tests"
- printf "%0.s-" {1..74}
+
+ echo
+ echo "Global config"
+ for test in $TESTS_LOCAL
+ do
+ $test
+ done
+
+ echo
+ echo "Inline mode"
+ for test in $TESTS_LOCAL
+ do
+ $test "inline"
+ done
+
+ echo
+ echo "Encap mode"
+ for test in $TESTS_LOCAL
+ do
+ $test "encap"
+ done
+
+ echo
+ printf "+"
+ printf "%0.s-" {1..72}
+ printf "+"
+ echo
+ printf "| %-28s OUTPUT tests %-28s |"
+ echo
+ printf "+"
+ printf "%0.s-" {1..72}
+ printf "+"
echo
# set OUTPUT settings
- ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0
+ ip netns exec $ioam_node_beta \
+ sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 &>/dev/null
- for t in $TESTS_OUTPUT
+ echo
+ echo "Inline mode"
+ for test in $TESTS_OUTPUT
do
- $t "inline"
- [ $encap_tests = 0 ] && $t "encap"
+ $test "inline"
done
- # clean OUTPUT settings
- ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
- ip -netns $ioam_node_alpha route change db01::/64 dev veth0
+ echo
+ echo "Encap mode"
+ for test in $TESTS_OUTPUT
+ do
+ $test "encap"
+ done
+ echo
+ echo "Encap mode (with tunsrc)"
+ for test in $TESTS_OUTPUT
+ do
+ $test "encap" "tunsrc"
+ done
+
+ # clean OUTPUT settings
+ ip netns exec $ioam_node_beta \
+ sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null
echo
- printf "%0.s-" {1..74}
+ printf "+"
+ printf "%0.s-" {1..72}
+ printf "+"
echo
- echo "INPUT tests"
- printf "%0.s-" {1..74}
+ printf "| %-28s INPUT tests %-29s |"
+ echo
+ printf "+"
+ printf "%0.s-" {1..72}
+ printf "+"
echo
# set INPUT settings
- ip -netns $ioam_node_alpha ioam namespace del 123
+ ip -netns $ioam_node_alpha ioam namespace del 123 &>/dev/null
- for t in $TESTS_INPUT
+ echo
+ echo "Inline mode"
+ for test in $TESTS_INPUT
do
- $t "inline"
- [ $encap_tests = 0 ] && $t "encap"
+ $test "inline"
+ done
+
+ echo
+ echo "Encap mode"
+ for test in $TESTS_INPUT
+ do
+ $test "encap"
done
# clean INPUT settings
- ip -netns $ioam_node_alpha ioam namespace add 123 \
- data ${ALPHA[6]} wide ${ALPHA[7]}
- ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]}
- ip -netns $ioam_node_alpha route change db01::/64 dev veth0
+ ip -netns $ioam_node_alpha \
+ ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} &>/dev/null
+ ip -netns $ioam_node_alpha \
+ ioam namespace set 123 schema ${ALPHA[8]} &>/dev/null
echo
- printf "%0.s-" {1..74}
+ printf "+"
+ printf "%0.s-" {1..72}
+ printf "+"
echo
- echo "GLOBAL tests"
- printf "%0.s-" {1..74}
+ printf "| %-30s Results %-31s |"
+ echo
+ printf "+"
+ printf "%0.s-" {1..72}
+ printf "+"
echo
- for t in $TESTS_GLOBAL
- do
- $t "inline"
- [ $encap_tests = 0 ] && $t "encap"
- done
-
echo
- log_results
+ echo "- Passed: ${npassed}"
+ echo "- Skipped: ${nskipped}"
+ echo "- Failed: ${nfailed}"
+ echo
}
bit2type=(
0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000
0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100
- 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002
+ 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002 0x000001
)
-bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 )
+bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 0 )
################################################################################
# #
-# OUTPUT tests #
+# LOCAL tests #
# #
-# Two nodes (sender/receiver), IOAM disabled on ingress for the receiver. #
################################################################################
-out_undef_ns()
+local_sysctl_ioam_id()
+{
+ ##############################################################################
+ # Make sure the sysctl "net.ipv6.ioam6_id" works as expected. #
+ ##############################################################################
+ local desc="Sysctl net.ipv6.ioam6_id"
+
+ [ ! -z $1 ] && return
+
+ ip netns exec $ioam_node_alpha \
+ sysctl net.ipv6.ioam6_id 2>/dev/null | grep -wq ${ALPHA[0]}
+
+ [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+}
+
+local_sysctl_ioam_id_wide()
{
##############################################################################
- # Make sure that the encap node won't fill the trace if the chosen IOAM #
- # namespace is not configured locally. #
+ # Make sure the sysctl "net.ipv6.ioam6_id_wide" works as expected. #
##############################################################################
- local desc="Unknown IOAM namespace"
+ local desc="Sysctl net.ipv6.ioam6_id_wide"
- [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+ [ ! -z $1 ] && return
- ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
- trace prealloc type 0x800000 ns 0 size 4 dev veth0
+ ip netns exec $ioam_node_alpha \
+ sysctl net.ipv6.ioam6_id_wide 2>/dev/null | grep -wq ${ALPHA[1]}
- run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::1 0x800000 0 $1
+ [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+}
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+local_sysctl_ioam_intf_id()
+{
+ ##############################################################################
+ # Make sure the sysctl "net.ipv6.conf.XX.ioam6_id" works as expected. #
+ ##############################################################################
+ local desc="Sysctl net.ipv6.conf.XX.ioam6_id"
+
+ [ ! -z $1 ] && return
+
+ ip netns exec $ioam_node_alpha \
+ sysctl net.ipv6.conf.veth0.ioam6_id 2>/dev/null | grep -wq ${ALPHA[4]}
+
+ [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
}
-out_no_room()
+local_sysctl_ioam_intf_id_wide()
{
##############################################################################
- # Make sure that the encap node won't fill the trace and will set the #
- # Overflow flag since there is no room enough for its data. #
+ # Make sure the sysctl "net.ipv6.conf.XX.ioam6_id_wide" works as expected. #
##############################################################################
- local desc="Missing trace room"
+ local desc="Sysctl net.ipv6.conf.XX.ioam6_id_wide"
+
+ [ ! -z $1 ] && return
+
+ ip netns exec $ioam_node_alpha \
+ sysctl net.ipv6.conf.veth0.ioam6_id_wide 2>/dev/null | grep -wq ${ALPHA[5]}
+
+ [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+}
- [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+local_sysctl_ioam_intf_enabled()
+{
+ ##############################################################################
+ # Make sure the sysctl "net.ipv6.conf.XX.ioam6_enabled" works as expected. #
+ ##############################################################################
+ local desc="Sysctl net.ipv6.conf.XX.ioam6_enabled"
- ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
- trace prealloc type 0xc00000 ns 123 size 4 dev veth0
+ [ ! -z $1 ] && return
- run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::1 0xc00000 123 $1
+ ip netns exec $ioam_node_beta \
+ sysctl net.ipv6.conf.veth0.ioam6_enabled 2>/dev/null | grep -wq 1
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+ [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
}
-out_bits()
+local_ioam_namespace()
{
##############################################################################
- # Make sure that, for each trace type bit, the encap node will either: #
- # (i) fill the trace with its data when it is a supported bit #
- # (ii) not fill the trace with its data when it is an unsupported bit #
+ # Make sure the creation of an IOAM Namespace works as expected. #
##############################################################################
- local desc="Trace type with bit <n> only"
+ local desc="Create an IOAM Namespace"
- local tmp=${bit2size[22]}
- bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
+ [ ! -z $1 ] && return
- [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+ ip -netns $ioam_node_alpha \
+ ioam namespace show 2>/dev/null | grep -wq 123
+ local ret=$?
- for i in {0..22}
- do
- ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
- trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
- dev veth0 &>/dev/null
+ ip -netns $ioam_node_alpha \
+ ioam namespace show 2>/dev/null | grep -wq ${ALPHA[6]}
+ ret=$((ret + $?))
- local cmd_res=$?
- local descr="${desc/<n>/$i}"
+ ip -netns $ioam_node_alpha \
+ ioam namespace show 2>/dev/null | grep -wq ${ALPHA[7]}
+ ret=$((ret + $?))
+
+ [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+}
+
+local_ioam_schema()
+{
+ ##############################################################################
+ # Make sure the creation of an IOAM Schema works as expected. #
+ ##############################################################################
+ local desc="Create an IOAM Schema"
+
+ [ ! -z $1 ] && return
+
+ ip -netns $ioam_node_alpha \
+ ioam schema show 2>/dev/null | grep -wq ${ALPHA[8]}
+ local ret=$?
+
+ local sc_data=$(
+ for i in `seq 0 $((${#ALPHA[9]}-1))`
+ do
+ chr=${ALPHA[9]:i:1}
+ printf "%x " "'${chr}"
+ done
+ )
+
+ ip -netns $ioam_node_alpha \
+ ioam schema show 2>/dev/null | grep -q "$sc_data"
+ ret=$((ret + $?))
+
+ [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+}
+
+local_ioam_schema_namespace()
+{
+ ##############################################################################
+ # Make sure the binding of a Schema to a Namespace works as expected. #
+ ##############################################################################
+ local desc="Bind an IOAM Schema to an IOAM Namespace"
+
+ [ ! -z $1 ] && return
+
+ ip -netns $ioam_node_alpha \
+ ioam namespace show 2>/dev/null | grep -wq ${ALPHA[8]}
+ local ret=$?
+
+ ip -netns $ioam_node_alpha \
+ ioam schema show 2>/dev/null | grep -wq 123
+ ret=$((ret + $?))
+
+ [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+}
+
+local_route_ns()
+{
+ ##############################################################################
+ # Make sure the Namespace-ID is always provided, whatever the mode. #
+ ##############################################################################
+ local desc="Mandatory Namespace-ID"
+ local mode
+
+ [ -z $1 ] && return
+
+ [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1"
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type 0x800000 size 4 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+ local ret1=$?
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+ local ret2=$?
+
+ [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \
+ || log_test_passed "${desc}"
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+}
+
+local_route_tunsrc()
+{
+ ##############################################################################
+ # Make sure the Tunnel Source is only (and possibly) used with encap mode. #
+ ##############################################################################
+ local desc
+ local mode
+ local mode_tunsrc
- if [[ $i -ge 12 && $i -le 21 ]]
+ [ -z $1 ] && return
+
+ if [ "$1" == "encap" ]
+ then
+ desc="Optional Tunnel Source"
+ mode="$1 tundst 2001:db8:2::2"
+ mode_tunsrc="$1 tunsrc 2001:db8:1::50 tundst 2001:db8:2::2"
+ else
+ desc="Unneeded Tunnel Source"
+ mode="$1"
+ mode_tunsrc="$1 tunsrc 2001:db8:1::50"
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+ local ret1=$?
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode_tunsrc trace prealloc type 0x800000 ns 0 size 4 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+ local ret2=$?
+
+ if [ "$1" == "encap" ]
+ then
+ [[ $ret1 != 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \
+ || log_test_passed "${desc}"
+ else
+ [[ $ret1 != 0 || $ret2 == 0 ]] && log_test_failed "${desc}" \
+ || log_test_passed "${desc}"
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+}
+
+local_route_tundst()
+{
+ ##############################################################################
+ # Make sure the Tunnel Destination is only (and always) used with encap mode.#
+ ##############################################################################
+ local desc
+
+ [ -z $1 ] && return
+
+ [ "$1" == "encap" ] && desc="Mandatory Tunnel Destination" \
+ || desc="Unneeded Tunnel Destination"
+
+ local mode="$1"
+ local mode_tundst="$1 tundst 2001:db8:2::2"
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+ local ret1=$?
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode_tundst trace prealloc type 0x800000 ns 0 size 4 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+ local ret2=$?
+
+ if [ "$1" == "encap" ]
+ then
+ [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \
+ || log_test_passed "${desc}"
+ else
+ [[ $ret1 != 0 || $ret2 == 0 ]] && log_test_failed "${desc}" \
+ || log_test_passed "${desc}"
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+}
+
+local_route_trace_type()
+{
+ ##############################################################################
+ # Make sure the Trace Type is always provided, whatever the mode. #
+ ##############################################################################
+ local desc="Mandatory Trace Type"
+ local mode
+
+ [ -z $1 ] && return
+
+ [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1"
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc ns 0 size 4 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+ local ret1=$?
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+ local ret2=$?
+
+ [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \
+ || log_test_passed "${desc}"
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+}
+
+local_route_trace_size()
+{
+ ##############################################################################
+ # Make sure the Trace Size is always provided, whatever the mode. #
+ ##############################################################################
+ local desc="Mandatory Trace Size"
+ local mode
+
+ [ -z $1 ] && return
+
+ [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1"
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+ local ret1=$?
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+ local ret2=$?
+
+ [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \
+ || log_test_passed "${desc}"
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+}
+
+local_route_trace_type_bits()
+{
+ ##############################################################################
+ # Make sure only allowed bits (0-11 and 22) are accepted. #
+ ##############################################################################
+ local desc="Trace Type bits"
+ local mode
+
+ [ -z $1 ] && return
+
+ [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1"
+
+ local i
+ for i in {0..23}
+ do
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type ${bit2type[$i]} ns 0 size 4 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ if [[ ($? == 0 && (($i -ge 12 && $i -le 21) || $i == 23)) ||
+ ($? != 0 && (($i -ge 0 && $i -le 11) || $i == 22)) ]]
then
- if [ $cmd_res != 0 ]
- then
- npassed=$((npassed+1))
- log_test_passed "$descr ($1 mode)"
- else
- nfailed=$((nfailed+1))
- log_test_failed "$descr ($1 mode)"
- fi
- else
- run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \
- $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1
+ local err=1
+ break
fi
done
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+ [ -z $err ] && log_test_passed "${desc}" || log_test_failed "${desc}"
- bit2size[22]=$tmp
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
}
-out_full_supp_trace()
+local_route_trace_size_values()
{
##############################################################################
- # Make sure that the encap node will correctly fill a full trace. Be careful,#
- # "full trace" here does NOT mean all bits (only supported ones). #
+ # Make sure only allowed sizes (multiples of four in [4,244]) are accepted. #
##############################################################################
- local desc="Full supported trace"
+ local desc="Trace Size values"
+ local mode
- [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+ [ -z $1 ] && return
- ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
- trace prealloc type 0xfff002 ns 123 size 100 dev veth0
+ [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1"
- run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::1 0xfff002 123 $1
+ # we also try the next multiple of four after the MAX to check it's refused
+ local i
+ for i in {0..248}
+ do
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size $i \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+ if [[ ($? == 0 && ($i == 0 || $i == 248 || $(( $i % 4 )) != 0)) ||
+ ($? != 0 && $i != 0 && $i != 248 && $(( $i % 4 )) == 0) ]]
+ then
+ local err=1
+ break
+ fi
+ done
+
+ [ -z $err ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
}
################################################################################
# #
-# INPUT tests #
+# OUTPUT tests #
# #
-# Two nodes (sender/receiver), the sender MUST NOT fill the trace upon #
-# insertion -> the IOAM namespace configured on the sender is removed #
-# and is used in the inserted trace to force the sender not to fill it. #
################################################################################
-in_undef_ns()
+output_undef_ns()
{
##############################################################################
- # Make sure that the receiving node won't fill the trace if the related IOAM #
- # namespace is not configured locally. #
+ # Make sure an IOAM encapsulating node does NOT fill the trace when the #
+ # corresponding IOAM Namespace-ID is not configured locally. #
##############################################################################
- local desc="Unknown IOAM namespace"
+ local desc="Unknown IOAM Namespace-ID"
+ local ns=0
+ local tr_type=0x800000
+ local tr_size=4
+ local mode="$1"
+ local saddr="2001:db8:1::2"
+
+ if [ "$1" == "encap" ]
+ then
+ if [ $encap_tests != 0 ]
+ then
+ log_test_skipped "${desc}"
+ return
+ fi
+
+ if [ "$2" == "tunsrc" ]
+ then
+ saddr="2001:db8:1::50"
+ mode+=" tunsrc 2001:db8:1::50"
+ fi
+
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
- [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
- ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
- trace prealloc type 0x800000 ns 0 size 4 dev veth0
+ if [ $? == 0 ]
+ then
+ run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1
+ else
+ log_test_failed "${desc}"
+ fi
- run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::1 0x800000 0 $1
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
}
-in_no_room()
+output_no_room()
{
##############################################################################
- # Make sure that the receiving node won't fill the trace and will set the #
- # Overflow flag if there is no room enough for its data. #
+ # Make sure an IOAM encapsulating node does NOT fill the trace AND sets the #
+ # Overflow flag when there is not enough room for its data. #
##############################################################################
- local desc="Missing trace room"
+ local desc="Missing room for data"
+ local ns=123
+ local tr_type=0xc00000
+ local tr_size=4
+ local mode="$1"
+ local saddr="2001:db8:1::2"
+
+ if [ "$1" == "encap" ]
+ then
+ if [ $encap_tests != 0 ]
+ then
+ log_test_skipped "${desc}"
+ return
+ fi
- [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+ if [ "$2" == "tunsrc" ]
+ then
+ saddr="2001:db8:1::50"
+ mode+=" tunsrc 2001:db8:1::50"
+ fi
- ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
- trace prealloc type 0xc00000 ns 123 size 4 dev veth0
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
- run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::1 0xc00000 123 $1
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+ if [ $? == 0 ]
+ then
+ run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1
+ else
+ log_test_failed "${desc}"
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
}
-in_bits()
+output_no_room_oss()
{
##############################################################################
- # Make sure that, for each trace type bit, the receiving node will either: #
- # (i) fill the trace with its data when it is a supported bit #
- # (ii) not fill the trace with its data when it is an unsupported bit #
+ # Make sure an IOAM encapsulating node does NOT fill the trace AND sets the #
+ # Overflow flag when there is not enough room for the Opaque State Snapshot. #
##############################################################################
- local desc="Trace type with bit <n> only"
+ local desc="Missing room for Opaque State Snapshot"
+ local ns=123
+ local tr_type=0x000002
+ local tr_size=4
+ local mode="$1"
+ local saddr="2001:db8:1::2"
+
+ if [ "$1" == "encap" ]
+ then
+ if [ $encap_tests != 0 ]
+ then
+ log_test_skipped "${desc}"
+ return
+ fi
- local tmp=${bit2size[22]}
- bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
+ if [ "$2" == "tunsrc" ]
+ then
+ saddr="2001:db8:1::50"
+ mode+=" tunsrc 2001:db8:1::50"
+ fi
+
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ if [ $? == 0 ]
+ then
+ run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1
+ else
+ log_test_failed "${desc}"
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
+}
+
+output_bits()
+{
+ ##############################################################################
+ # Make sure an IOAM encapsulating node implements all supported bits by #
+ # checking it correctly fills the trace with its data. #
+ ##############################################################################
+ local desc="Trace Type with supported bit <n> only"
+ local ns=123
+ local mode="$1"
+ local saddr="2001:db8:1::2"
- [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+ if [ "$1" == "encap" ]
+ then
+ if [ "$2" == "tunsrc" ]
+ then
+ saddr="2001:db8:1::50"
+ mode+=" tunsrc 2001:db8:1::50"
+ fi
+
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
+
+ local tmp=${bit2size[22]}
+ bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
+ local i
for i in {0..11} {22..22}
do
- ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
- trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
- dev veth0
+ local descr="${desc/<n>/$i}"
+
+ if [[ "$1" == "encap" && $encap_tests != 0 ]]
+ then
+ log_test_skipped "${descr}"
+ continue
+ fi
- run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \
- $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc \
+ type ${bit2type[$i]} ns $ns size ${bit2size[$i]} \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ if [ $? == 0 ]
+ then
+ run_test "output_bit$i" "${descr}" $saddr \
+ ${bit2type[$i]} ${bit2size[$i]} $ns $1
+ else
+ log_test_failed "${descr}"
+ fi
done
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
bit2size[22]=$tmp
}
-in_oflag()
+output_sizes()
{
##############################################################################
- # Make sure that the receiving node won't fill the trace since the Overflow #
- # flag is set. #
+ # Make sure an IOAM encapsulating node allocates supported sizes correctly. #
##############################################################################
- local desc="Overflow flag is set"
+ local desc="Trace Size of <n> bytes"
+ local ns=0
+ local tr_type=0x800000
+ local mode="$1"
+ local saddr="2001:db8:1::2"
- # Exception:
- # Here, we need the sender to set the Overflow flag. For that, we will add
- # back the IOAM namespace that was previously configured on the sender.
- ip -netns $ioam_node_alpha ioam namespace add 123
+ if [ "$1" == "encap" ]
+ then
+ if [ "$2" == "tunsrc" ]
+ then
+ saddr="2001:db8:1::50"
+ mode+=" tunsrc 2001:db8:1::50"
+ fi
- [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
- ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
- trace prealloc type 0xc00000 ns 123 size 4 dev veth0
+ local i
+ for i in $(seq 4 4 244)
+ do
+ local descr="${desc/<n>/$i}"
- run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::1 0xc00000 123 $1
+ if [[ "$1" == "encap" && $encap_tests != 0 ]]
+ then
+ log_test_skipped "${descr}"
+ continue
+ fi
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $i \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
- # And we clean the exception for this test to get things back to normal for
- # other INPUT tests
- ip -netns $ioam_node_alpha ioam namespace del 123
+ if [ $? == 0 ]
+ then
+ run_test "output_size$i" "${descr}" $saddr $tr_type $i $ns $1
+ else
+ log_test_failed "${descr}"
+ fi
+ done
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
}
-in_full_supp_trace()
+output_full_supp_trace()
{
##############################################################################
- # Make sure that the receiving node will correctly fill a full trace. Be #
- # careful, "full trace" here does NOT mean all bits (only supported ones). #
+ # Make sure an IOAM encapsulating node correctly fills a trace when all #
+ # supported bits are set. #
##############################################################################
local desc="Full supported trace"
+ local ns=123
+ local tr_type=0xfff002
+ local tr_size
+ local mode="$1"
+ local saddr="2001:db8:1::2"
- [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+ if [ "$1" == "encap" ]
+ then
+ if [ $encap_tests != 0 ]
+ then
+ log_test_skipped "${desc}"
+ return
+ fi
- ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
- trace prealloc type 0xfff002 ns 123 size 80 dev veth0
+ if [ "$2" == "tunsrc" ]
+ then
+ saddr="2001:db8:1::50"
+ mode+=" tunsrc 2001:db8:1::50"
+ fi
- run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
- db01::1 0xfff002 123 $1
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
- [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+ local i
+ tr_size=$(( ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
+ for i in {0..11} {22..22}
+ do
+ tr_size=$((tr_size + bit2size[$i]))
+ done
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ if [ $? == 0 ]
+ then
+ run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1
+ else
+ log_test_failed "${desc}"
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
}
################################################################################
# #
-# GLOBAL tests #
+# INPUT tests #
# #
-# Three nodes (sender/router/receiver), IOAM fully enabled on every node. #
################################################################################
-fwd_full_supp_trace()
+input_undef_ns()
+{
+ ##############################################################################
+ # Make sure an IOAM node does NOT fill the trace when the corresponding IOAM #
+ # Namespace-ID is not configured locally. #
+ ##############################################################################
+ local desc="Unknown IOAM Namespace-ID"
+ local ns=0
+ local tr_type=0x800000
+ local tr_size=4
+ local mode="$1"
+
+ if [ "$1" == "encap" ]
+ then
+ if [ $encap_tests != 0 ]
+ then
+ log_test_skipped "${desc}"
+ return
+ fi
+
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ if [ $? == 0 ]
+ then
+ run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1
+ else
+ log_test_failed "${desc}"
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
+}
+
+input_no_room()
+{
+ ##############################################################################
+ # Make sure an IOAM node does NOT fill the trace AND sets the Overflow flag #
+ # when there is not enough room for its data. #
+ ##############################################################################
+ local desc="Missing room for data"
+ local ns=123
+ local tr_type=0xc00000
+ local tr_size=4
+ local mode="$1"
+
+ if [ "$1" == "encap" ]
+ then
+ if [ $encap_tests != 0 ]
+ then
+ log_test_skipped "${desc}"
+ return
+ fi
+
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ if [ $? == 0 ]
+ then
+ run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1
+ else
+ log_test_failed "${desc}"
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
+}
+
+input_no_room_oss()
+{
+ ##############################################################################
+ # Make sure an IOAM node does NOT fill the trace AND sets the Overflow flag #
+ # when there is not enough room for the Opaque State Snapshot. #
+ ##############################################################################
+ local desc="Missing room for Opaque State Snapshot"
+ local ns=123
+ local tr_type=0x000002
+ local tr_size=4
+ local mode="$1"
+
+ if [ "$1" == "encap" ]
+ then
+ if [ $encap_tests != 0 ]
+ then
+ log_test_skipped "${desc}"
+ return
+ fi
+
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ if [ $? == 0 ]
+ then
+ run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1
+ else
+ log_test_failed "${desc}"
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
+}
+
+input_disabled()
+{
+ ##############################################################################
+ # Make sure an IOAM node does NOT fill the trace when IOAM is not enabled on #
+ # the corresponding (ingress) interface. #
+ ##############################################################################
+ local desc="IOAM disabled on ingress interface"
+ local ns=123
+ local tr_type=0x800000
+ local tr_size=4
+ local mode="$1"
+
+ if [ "$1" == "encap" ]
+ then
+ if [ $encap_tests != 0 ]
+ then
+ log_test_skipped "${desc}"
+ return
+ fi
+
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
+
+ # Exception: disable IOAM on ingress interface
+ ip netns exec $ioam_node_beta \
+ sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 &>/dev/null
+ local ret=$?
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+ ret=$((ret + $?))
+
+ if [ $ret == 0 ]
+ then
+ run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1
+ else
+ log_test_failed "${desc}"
+ fi
+
+ # Clean Exception
+ ip netns exec $ioam_node_beta \
+ sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
+}
+
+input_oflag()
+{
+ ##############################################################################
+ # Make sure an IOAM node does NOT fill the trace when the Overflow flag is #
+ # set. #
+ ##############################################################################
+ local desc="Overflow flag is set"
+ local ns=123
+ local tr_type=0xc00000
+ local tr_size=4
+ local mode="$1"
+
+ if [ "$1" == "encap" ]
+ then
+ if [ $encap_tests != 0 ]
+ then
+ log_test_skipped "${desc}"
+ return
+ fi
+
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
+
+ # Exception:
+ # Here, we need the sender to set the Overflow flag. For that, we will add
+ # back the IOAM namespace that was previously configured on the sender.
+ ip -netns $ioam_node_alpha ioam namespace add 123 &>/dev/null
+ local ret=$?
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+ ret=$((ret + $?))
+
+ if [ $ret == 0 ]
+ then
+ run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1
+ else
+ log_test_failed "${desc}"
+ fi
+
+ # Clean Exception
+ ip -netns $ioam_node_alpha ioam namespace del 123 &>/dev/null
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
+}
+
+input_bits()
+{
+ ##############################################################################
+ # Make sure an IOAM node implements all supported bits by checking it #
+ # correctly fills the trace with its data. #
+ ##############################################################################
+ local desc="Trace Type with supported bit <n> only"
+ local ns=123
+ local mode="$1"
+
+ if [ "$1" == "encap" ]
+ then
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
+
+ local tmp=${bit2size[22]}
+ bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
+
+ local i
+ for i in {0..11} {22..22}
+ do
+ local descr="${desc/<n>/$i}"
+
+ if [[ "$1" == "encap" && $encap_tests != 0 ]]
+ then
+ log_test_skipped "${descr}"
+ continue
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc \
+ type ${bit2type[$i]} ns $ns size ${bit2size[$i]} \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ if [ $? == 0 ]
+ then
+ run_test "input_bit$i" "${descr}" 2001:db8:1::2 \
+ ${bit2type[$i]} ${bit2size[$i]} $ns $1
+ else
+ log_test_failed "${descr}"
+ fi
+ done
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
+
+ bit2size[22]=$tmp
+}
+
+input_sizes()
{
##############################################################################
- # Make sure that all three nodes correctly filled the full supported trace #
- # by checking that the trace data is consistent with the predefined config. #
+ # Make sure an IOAM node handles all supported sizes correctly. #
##############################################################################
- local desc="Forward - Full supported trace"
+ local desc="Trace Size of <n> bytes"
+ local ns=123
+ local tr_type=0x800000
+ local mode="$1"
+
+ if [ "$1" == "encap" ]
+ then
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
- [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1"
- [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 up
+ local i
+ for i in $(seq 4 4 244)
+ do
+ local descr="${desc/<n>/$i}"
- ip -netns $ioam_node_alpha route change db02::/64 encap ioam6 mode $mode \
- trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0
+ if [[ "$1" == "encap" && $encap_tests != 0 ]]
+ then
+ log_test_skipped "${descr}"
+ continue
+ fi
- run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \
- db02::2 0xfff002 123 $1
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $i \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
- [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down
+ if [ $? == 0 ]
+ then
+ run_test "input_size$i" "${descr}" 2001:db8:1::2 $tr_type $i $ns $1
+ else
+ log_test_failed "${descr}"
+ fi
+ done
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
+}
+
+input_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure an IOAM node correctly fills a trace when all supported bits are #
+ # set. #
+ ##############################################################################
+ local desc="Full supported trace"
+ local ns=123
+ local tr_type=0xfff002
+ local tr_size
+ local mode="$1"
+
+ if [ "$1" == "encap" ]
+ then
+ if [ $encap_tests != 0 ]
+ then
+ log_test_skipped "${desc}"
+ return
+ fi
+
+ mode+=" tundst 2001:db8:2::2"
+ ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null
+ fi
+
+ local i
+ tr_size=$(( ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
+ for i in {0..11} {22..22}
+ do
+ tr_size=$((tr_size + bit2size[$i]))
+ done
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 \
+ encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ if [ $? == 0 ]
+ then
+ run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1
+ else
+ log_test_failed "${desc}"
+ fi
+
+ ip -netns $ioam_node_alpha \
+ route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \
+ link set ip6tnl0 down &>/dev/null
}
@@ -742,30 +1655,29 @@ fwd_full_supp_trace()
################################################################################
npassed=0
+nskipped=0
nfailed=0
if [ "$(id -u)" -ne 0 ]
then
- echo "SKIP: Need root privileges"
+ echo "SKIP: Need root privileges."
exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]
then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ip ioam &>/dev/null
-if [ $? = 1 ]
-then
- echo "SKIP: iproute2 too old, missing ioam command"
+ echo "SKIP: Could not run test without ip tool."
exit $ksft_skip
fi
check_kernel_compatibility
-
-cleanup &>/dev/null
setup
run
-cleanup &>/dev/null
+cleanup
+
+if [ $nfailed != 0 ]
+then
+ exit $ksft_fail
+fi
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
index 895e5bb5044b..de4b5c9e8a74 100644
--- a/tools/testing/selftests/net/ioam6_parser.c
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -8,8 +8,10 @@
#include <errno.h>
#include <limits.h>
#include <linux/const.h>
+#include <linux/if_ether.h>
#include <linux/ioam6.h>
#include <linux/ipv6.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@@ -40,7 +42,7 @@ static struct ioam_config node1 = {
.egr_id = 101,
.ingr_wide = 0xffffffff, /* default value */
.egr_wide = 101101,
- .ns_data = 0xdeadbee0,
+ .ns_data = 0xdeadbeef,
.ns_wide = 0xcafec0caf00dc0de,
.sc_id = 777,
.sc_data = "something that will be 4n-aligned",
@@ -54,33 +56,22 @@ static struct ioam_config node2 = {
.egr_id = 202,
.ingr_wide = 201201,
.egr_wide = 202202,
- .ns_data = 0xdeadbee1,
- .ns_wide = 0xcafec0caf11dc0de,
- .sc_id = 666,
- .sc_data = "Hello there -Obi",
- .hlim = 63,
-};
-
-static struct ioam_config node3 = {
- .id = 3,
- .wide = 33333333,
- .ingr_id = 301,
- .egr_id = 0xffff, /* default value */
- .ingr_wide = 301301,
- .egr_wide = 0xffffffff, /* default value */
- .ns_data = 0xdeadbee2,
- .ns_wide = 0xcafec0caf22dc0de,
+ .ns_data = 0xffffffff, /* default value */
+ .ns_wide = 0xffffffffffffffff, /* default value */
.sc_id = 0xffffff, /* default value */
.sc_data = NULL,
- .hlim = 62,
+ .hlim = 63,
};
enum {
/**********
* OUTPUT *
**********/
+ __TEST_OUT_MIN,
+
TEST_OUT_UNDEF_NS,
TEST_OUT_NO_ROOM,
+ TEST_OUT_NO_ROOM_OSS,
TEST_OUT_BIT0,
TEST_OUT_BIT1,
TEST_OUT_BIT2,
@@ -94,13 +85,80 @@ enum {
TEST_OUT_BIT10,
TEST_OUT_BIT11,
TEST_OUT_BIT22,
+ TEST_OUT_SIZE4,
+ TEST_OUT_SIZE8,
+ TEST_OUT_SIZE12,
+ TEST_OUT_SIZE16,
+ TEST_OUT_SIZE20,
+ TEST_OUT_SIZE24,
+ TEST_OUT_SIZE28,
+ TEST_OUT_SIZE32,
+ TEST_OUT_SIZE36,
+ TEST_OUT_SIZE40,
+ TEST_OUT_SIZE44,
+ TEST_OUT_SIZE48,
+ TEST_OUT_SIZE52,
+ TEST_OUT_SIZE56,
+ TEST_OUT_SIZE60,
+ TEST_OUT_SIZE64,
+ TEST_OUT_SIZE68,
+ TEST_OUT_SIZE72,
+ TEST_OUT_SIZE76,
+ TEST_OUT_SIZE80,
+ TEST_OUT_SIZE84,
+ TEST_OUT_SIZE88,
+ TEST_OUT_SIZE92,
+ TEST_OUT_SIZE96,
+ TEST_OUT_SIZE100,
+ TEST_OUT_SIZE104,
+ TEST_OUT_SIZE108,
+ TEST_OUT_SIZE112,
+ TEST_OUT_SIZE116,
+ TEST_OUT_SIZE120,
+ TEST_OUT_SIZE124,
+ TEST_OUT_SIZE128,
+ TEST_OUT_SIZE132,
+ TEST_OUT_SIZE136,
+ TEST_OUT_SIZE140,
+ TEST_OUT_SIZE144,
+ TEST_OUT_SIZE148,
+ TEST_OUT_SIZE152,
+ TEST_OUT_SIZE156,
+ TEST_OUT_SIZE160,
+ TEST_OUT_SIZE164,
+ TEST_OUT_SIZE168,
+ TEST_OUT_SIZE172,
+ TEST_OUT_SIZE176,
+ TEST_OUT_SIZE180,
+ TEST_OUT_SIZE184,
+ TEST_OUT_SIZE188,
+ TEST_OUT_SIZE192,
+ TEST_OUT_SIZE196,
+ TEST_OUT_SIZE200,
+ TEST_OUT_SIZE204,
+ TEST_OUT_SIZE208,
+ TEST_OUT_SIZE212,
+ TEST_OUT_SIZE216,
+ TEST_OUT_SIZE220,
+ TEST_OUT_SIZE224,
+ TEST_OUT_SIZE228,
+ TEST_OUT_SIZE232,
+ TEST_OUT_SIZE236,
+ TEST_OUT_SIZE240,
+ TEST_OUT_SIZE244,
TEST_OUT_FULL_SUPP_TRACE,
+ __TEST_OUT_MAX,
+
/*********
* INPUT *
*********/
+ __TEST_IN_MIN,
+
TEST_IN_UNDEF_NS,
TEST_IN_NO_ROOM,
+ TEST_IN_NO_ROOM_OSS,
+ TEST_IN_DISABLED,
TEST_IN_OFLAG,
TEST_IN_BIT0,
TEST_IN_BIT1,
@@ -115,36 +173,107 @@ enum {
TEST_IN_BIT10,
TEST_IN_BIT11,
TEST_IN_BIT22,
+ TEST_IN_SIZE4,
+ TEST_IN_SIZE8,
+ TEST_IN_SIZE12,
+ TEST_IN_SIZE16,
+ TEST_IN_SIZE20,
+ TEST_IN_SIZE24,
+ TEST_IN_SIZE28,
+ TEST_IN_SIZE32,
+ TEST_IN_SIZE36,
+ TEST_IN_SIZE40,
+ TEST_IN_SIZE44,
+ TEST_IN_SIZE48,
+ TEST_IN_SIZE52,
+ TEST_IN_SIZE56,
+ TEST_IN_SIZE60,
+ TEST_IN_SIZE64,
+ TEST_IN_SIZE68,
+ TEST_IN_SIZE72,
+ TEST_IN_SIZE76,
+ TEST_IN_SIZE80,
+ TEST_IN_SIZE84,
+ TEST_IN_SIZE88,
+ TEST_IN_SIZE92,
+ TEST_IN_SIZE96,
+ TEST_IN_SIZE100,
+ TEST_IN_SIZE104,
+ TEST_IN_SIZE108,
+ TEST_IN_SIZE112,
+ TEST_IN_SIZE116,
+ TEST_IN_SIZE120,
+ TEST_IN_SIZE124,
+ TEST_IN_SIZE128,
+ TEST_IN_SIZE132,
+ TEST_IN_SIZE136,
+ TEST_IN_SIZE140,
+ TEST_IN_SIZE144,
+ TEST_IN_SIZE148,
+ TEST_IN_SIZE152,
+ TEST_IN_SIZE156,
+ TEST_IN_SIZE160,
+ TEST_IN_SIZE164,
+ TEST_IN_SIZE168,
+ TEST_IN_SIZE172,
+ TEST_IN_SIZE176,
+ TEST_IN_SIZE180,
+ TEST_IN_SIZE184,
+ TEST_IN_SIZE188,
+ TEST_IN_SIZE192,
+ TEST_IN_SIZE196,
+ TEST_IN_SIZE200,
+ TEST_IN_SIZE204,
+ TEST_IN_SIZE208,
+ TEST_IN_SIZE212,
+ TEST_IN_SIZE216,
+ TEST_IN_SIZE220,
+ TEST_IN_SIZE224,
+ TEST_IN_SIZE228,
+ TEST_IN_SIZE232,
+ TEST_IN_SIZE236,
+ TEST_IN_SIZE240,
+ TEST_IN_SIZE244,
TEST_IN_FULL_SUPP_TRACE,
- /**********
- * GLOBAL *
- **********/
- TEST_FWD_FULL_SUPP_TRACE,
+ __TEST_IN_MAX,
__TEST_MAX,
};
-static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
- __u32 trace_type, __u16 ioam_ns)
+static int check_header(int tid, struct ioam6_trace_hdr *trace,
+ __u32 trace_type, __u8 trace_size, __u16 ioam_ns)
{
- if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns ||
- __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8))
+ if (__be16_to_cpu(trace->namespace_id) != ioam_ns ||
+ __be32_to_cpu(trace->type_be32) != (trace_type << 8))
return 1;
switch (tid) {
case TEST_OUT_UNDEF_NS:
case TEST_IN_UNDEF_NS:
- return ioam6h->overflow ||
- ioam6h->nodelen != 1 ||
- ioam6h->remlen != 1;
+ case TEST_IN_DISABLED:
+ return trace->overflow == 1 ||
+ trace->nodelen != 1 ||
+ trace->remlen != 1;
case TEST_OUT_NO_ROOM:
case TEST_IN_NO_ROOM:
case TEST_IN_OFLAG:
- return !ioam6h->overflow ||
- ioam6h->nodelen != 2 ||
- ioam6h->remlen != 1;
+ return trace->overflow == 0 ||
+ trace->nodelen != 2 ||
+ trace->remlen != 1;
+
+ case TEST_OUT_NO_ROOM_OSS:
+ return trace->overflow == 0 ||
+ trace->nodelen != 0 ||
+ trace->remlen != 1;
+
+ case TEST_IN_NO_ROOM_OSS:
+ case TEST_OUT_BIT22:
+ case TEST_IN_BIT22:
+ return trace->overflow == 1 ||
+ trace->nodelen != 0 ||
+ trace->remlen != 0;
case TEST_OUT_BIT0:
case TEST_IN_BIT0:
@@ -164,9 +293,9 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
case TEST_IN_BIT7:
case TEST_OUT_BIT11:
case TEST_IN_BIT11:
- return ioam6h->overflow ||
- ioam6h->nodelen != 1 ||
- ioam6h->remlen;
+ return trace->overflow == 1 ||
+ trace->nodelen != 1 ||
+ trace->remlen != 0;
case TEST_OUT_BIT8:
case TEST_IN_BIT8:
@@ -174,22 +303,145 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
case TEST_IN_BIT9:
case TEST_OUT_BIT10:
case TEST_IN_BIT10:
- return ioam6h->overflow ||
- ioam6h->nodelen != 2 ||
- ioam6h->remlen;
-
- case TEST_OUT_BIT22:
- case TEST_IN_BIT22:
- return ioam6h->overflow ||
- ioam6h->nodelen ||
- ioam6h->remlen;
+ return trace->overflow == 1 ||
+ trace->nodelen != 2 ||
+ trace->remlen != 0;
+
+ case TEST_OUT_SIZE4:
+ case TEST_OUT_SIZE8:
+ case TEST_OUT_SIZE12:
+ case TEST_OUT_SIZE16:
+ case TEST_OUT_SIZE20:
+ case TEST_OUT_SIZE24:
+ case TEST_OUT_SIZE28:
+ case TEST_OUT_SIZE32:
+ case TEST_OUT_SIZE36:
+ case TEST_OUT_SIZE40:
+ case TEST_OUT_SIZE44:
+ case TEST_OUT_SIZE48:
+ case TEST_OUT_SIZE52:
+ case TEST_OUT_SIZE56:
+ case TEST_OUT_SIZE60:
+ case TEST_OUT_SIZE64:
+ case TEST_OUT_SIZE68:
+ case TEST_OUT_SIZE72:
+ case TEST_OUT_SIZE76:
+ case TEST_OUT_SIZE80:
+ case TEST_OUT_SIZE84:
+ case TEST_OUT_SIZE88:
+ case TEST_OUT_SIZE92:
+ case TEST_OUT_SIZE96:
+ case TEST_OUT_SIZE100:
+ case TEST_OUT_SIZE104:
+ case TEST_OUT_SIZE108:
+ case TEST_OUT_SIZE112:
+ case TEST_OUT_SIZE116:
+ case TEST_OUT_SIZE120:
+ case TEST_OUT_SIZE124:
+ case TEST_OUT_SIZE128:
+ case TEST_OUT_SIZE132:
+ case TEST_OUT_SIZE136:
+ case TEST_OUT_SIZE140:
+ case TEST_OUT_SIZE144:
+ case TEST_OUT_SIZE148:
+ case TEST_OUT_SIZE152:
+ case TEST_OUT_SIZE156:
+ case TEST_OUT_SIZE160:
+ case TEST_OUT_SIZE164:
+ case TEST_OUT_SIZE168:
+ case TEST_OUT_SIZE172:
+ case TEST_OUT_SIZE176:
+ case TEST_OUT_SIZE180:
+ case TEST_OUT_SIZE184:
+ case TEST_OUT_SIZE188:
+ case TEST_OUT_SIZE192:
+ case TEST_OUT_SIZE196:
+ case TEST_OUT_SIZE200:
+ case TEST_OUT_SIZE204:
+ case TEST_OUT_SIZE208:
+ case TEST_OUT_SIZE212:
+ case TEST_OUT_SIZE216:
+ case TEST_OUT_SIZE220:
+ case TEST_OUT_SIZE224:
+ case TEST_OUT_SIZE228:
+ case TEST_OUT_SIZE232:
+ case TEST_OUT_SIZE236:
+ case TEST_OUT_SIZE240:
+ case TEST_OUT_SIZE244:
+ return trace->overflow == 1 ||
+ trace->nodelen != 1 ||
+ trace->remlen != trace_size / 4;
+
+ case TEST_IN_SIZE4:
+ case TEST_IN_SIZE8:
+ case TEST_IN_SIZE12:
+ case TEST_IN_SIZE16:
+ case TEST_IN_SIZE20:
+ case TEST_IN_SIZE24:
+ case TEST_IN_SIZE28:
+ case TEST_IN_SIZE32:
+ case TEST_IN_SIZE36:
+ case TEST_IN_SIZE40:
+ case TEST_IN_SIZE44:
+ case TEST_IN_SIZE48:
+ case TEST_IN_SIZE52:
+ case TEST_IN_SIZE56:
+ case TEST_IN_SIZE60:
+ case TEST_IN_SIZE64:
+ case TEST_IN_SIZE68:
+ case TEST_IN_SIZE72:
+ case TEST_IN_SIZE76:
+ case TEST_IN_SIZE80:
+ case TEST_IN_SIZE84:
+ case TEST_IN_SIZE88:
+ case TEST_IN_SIZE92:
+ case TEST_IN_SIZE96:
+ case TEST_IN_SIZE100:
+ case TEST_IN_SIZE104:
+ case TEST_IN_SIZE108:
+ case TEST_IN_SIZE112:
+ case TEST_IN_SIZE116:
+ case TEST_IN_SIZE120:
+ case TEST_IN_SIZE124:
+ case TEST_IN_SIZE128:
+ case TEST_IN_SIZE132:
+ case TEST_IN_SIZE136:
+ case TEST_IN_SIZE140:
+ case TEST_IN_SIZE144:
+ case TEST_IN_SIZE148:
+ case TEST_IN_SIZE152:
+ case TEST_IN_SIZE156:
+ case TEST_IN_SIZE160:
+ case TEST_IN_SIZE164:
+ case TEST_IN_SIZE168:
+ case TEST_IN_SIZE172:
+ case TEST_IN_SIZE176:
+ case TEST_IN_SIZE180:
+ case TEST_IN_SIZE184:
+ case TEST_IN_SIZE188:
+ case TEST_IN_SIZE192:
+ case TEST_IN_SIZE196:
+ case TEST_IN_SIZE200:
+ case TEST_IN_SIZE204:
+ case TEST_IN_SIZE208:
+ case TEST_IN_SIZE212:
+ case TEST_IN_SIZE216:
+ case TEST_IN_SIZE220:
+ case TEST_IN_SIZE224:
+ case TEST_IN_SIZE228:
+ case TEST_IN_SIZE232:
+ case TEST_IN_SIZE236:
+ case TEST_IN_SIZE240:
+ case TEST_IN_SIZE244:
+ return trace->overflow == 1 ||
+ trace->nodelen != 1 ||
+ trace->remlen != (trace_size / 4) - trace->nodelen;
case TEST_OUT_FULL_SUPP_TRACE:
case TEST_IN_FULL_SUPP_TRACE:
- case TEST_FWD_FULL_SUPP_TRACE:
- return ioam6h->overflow ||
- ioam6h->nodelen != 15 ||
- ioam6h->remlen;
+ return trace->overflow == 1 ||
+ trace->nodelen != 15 ||
+ trace->remlen != 0;
default:
break;
@@ -198,167 +450,137 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
return 1;
}
-static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
- const struct ioam_config cnf)
+static int check_data(struct ioam6_trace_hdr *trace, __u8 trace_size,
+ const struct ioam_config cnf, bool is_output)
{
- unsigned int len;
+ unsigned int len, i;
__u8 aligned;
__u64 raw64;
__u32 raw32;
+ __u8 *p;
- if (ioam6h->type.bit0) {
- raw32 = __be32_to_cpu(*((__u32 *)*p));
- if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff))
- return 1;
- *p += sizeof(__u32);
- }
-
- if (ioam6h->type.bit1) {
- raw32 = __be32_to_cpu(*((__u32 *)*p));
- if (cnf.ingr_id != (raw32 >> 16) ||
- cnf.egr_id != (raw32 & 0xffff))
- return 1;
- *p += sizeof(__u32);
- }
-
- if (ioam6h->type.bit2)
- *p += sizeof(__u32);
-
- if (ioam6h->type.bit3)
- *p += sizeof(__u32);
-
- if (ioam6h->type.bit4) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
- return 1;
- *p += sizeof(__u32);
- }
-
- if (ioam6h->type.bit5) {
- if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data)
- return 1;
- *p += sizeof(__u32);
- }
-
- if (ioam6h->type.bit6)
- *p += sizeof(__u32);
+ if (trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
+ trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
+ trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
+ trace->type.bit21 | trace->type.bit23)
+ return 1;
- if (ioam6h->type.bit7) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ for (i = 0; i < trace->remlen * 4; i++) {
+ if (trace->data[i] != 0)
return 1;
- *p += sizeof(__u32);
}
- if (ioam6h->type.bit8) {
- raw64 = __be64_to_cpu(*((__u64 *)*p));
- if (cnf.hlim != (raw64 >> 56) ||
- cnf.wide != (raw64 & 0xffffffffffffff))
- return 1;
- *p += sizeof(__u64);
- }
+ if (trace->remlen * 4 == trace_size)
+ return 0;
- if (ioam6h->type.bit9) {
- if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide)
- return 1;
- *p += sizeof(__u32);
+ p = trace->data + trace->remlen * 4;
- if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide)
+ if (trace->type.bit0) {
+ raw32 = __be32_to_cpu(*((__u32 *)p));
+ if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff))
return 1;
- *p += sizeof(__u32);
+ p += sizeof(__u32);
}
- if (ioam6h->type.bit10) {
- if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide)
+ if (trace->type.bit1) {
+ raw32 = __be32_to_cpu(*((__u32 *)p));
+ if (cnf.ingr_id != (raw32 >> 16) ||
+ cnf.egr_id != (raw32 & 0xffff))
return 1;
- *p += sizeof(__u64);
+ p += sizeof(__u32);
}
- if (ioam6h->type.bit11) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ if (trace->type.bit2) {
+ raw32 = __be32_to_cpu(*((__u32 *)p));
+ if ((is_output && raw32 != 0xffffffff) ||
+ (!is_output && (raw32 == 0 || raw32 == 0xffffffff)))
return 1;
- *p += sizeof(__u32);
+ p += sizeof(__u32);
}
- if (ioam6h->type.bit12) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ if (trace->type.bit3) {
+ raw32 = __be32_to_cpu(*((__u32 *)p));
+ if ((is_output && raw32 != 0xffffffff) ||
+ (!is_output && (raw32 == 0 || raw32 == 0xffffffff)))
return 1;
- *p += sizeof(__u32);
+ p += sizeof(__u32);
}
- if (ioam6h->type.bit13) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ if (trace->type.bit4) {
+ if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff)
return 1;
- *p += sizeof(__u32);
+ p += sizeof(__u32);
}
- if (ioam6h->type.bit14) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ if (trace->type.bit5) {
+ if (__be32_to_cpu(*((__u32 *)p)) != cnf.ns_data)
return 1;
- *p += sizeof(__u32);
+ p += sizeof(__u32);
}
- if (ioam6h->type.bit15) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ if (trace->type.bit6) {
+ if (__be32_to_cpu(*((__u32 *)p)) == 0xffffffff)
return 1;
- *p += sizeof(__u32);
+ p += sizeof(__u32);
}
- if (ioam6h->type.bit16) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ if (trace->type.bit7) {
+ if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff)
return 1;
- *p += sizeof(__u32);
+ p += sizeof(__u32);
}
- if (ioam6h->type.bit17) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ if (trace->type.bit8) {
+ raw64 = __be64_to_cpu(*((__u64 *)p));
+ if (cnf.hlim != (raw64 >> 56) ||
+ cnf.wide != (raw64 & 0xffffffffffffff))
return 1;
- *p += sizeof(__u32);
+ p += sizeof(__u64);
}
- if (ioam6h->type.bit18) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ if (trace->type.bit9) {
+ if (__be32_to_cpu(*((__u32 *)p)) != cnf.ingr_wide)
return 1;
- *p += sizeof(__u32);
- }
+ p += sizeof(__u32);
- if (ioam6h->type.bit19) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ if (__be32_to_cpu(*((__u32 *)p)) != cnf.egr_wide)
return 1;
- *p += sizeof(__u32);
+ p += sizeof(__u32);
}
- if (ioam6h->type.bit20) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ if (trace->type.bit10) {
+ if (__be64_to_cpu(*((__u64 *)p)) != cnf.ns_wide)
return 1;
- *p += sizeof(__u32);
+ p += sizeof(__u64);
}
- if (ioam6h->type.bit21) {
- if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ if (trace->type.bit11) {
+ if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff)
return 1;
- *p += sizeof(__u32);
+ p += sizeof(__u32);
}
- if (ioam6h->type.bit22) {
+ if (trace->type.bit22) {
len = cnf.sc_data ? strlen(cnf.sc_data) : 0;
aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0;
- raw32 = __be32_to_cpu(*((__u32 *)*p));
+ raw32 = __be32_to_cpu(*((__u32 *)p));
if (aligned != (raw32 >> 24) * 4 ||
cnf.sc_id != (raw32 & 0xffffff))
return 1;
- *p += sizeof(__u32);
+ p += sizeof(__u32);
if (cnf.sc_data) {
- if (strncmp((char *)*p, cnf.sc_data, len))
+ if (strncmp((char *)p, cnf.sc_data, len))
return 1;
- *p += len;
+ p += len;
aligned -= len;
while (aligned--) {
- if (**p != '\0')
+ if (*p != '\0')
return 1;
- *p += sizeof(__u8);
+ p += sizeof(__u8);
}
}
}
@@ -366,151 +588,351 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
return 0;
}
-static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h,
- __u32 trace_type, __u16 ioam_ns)
+static int check_ioam_trace(int tid, struct ioam6_trace_hdr *trace,
+ __u32 trace_type, __u8 trace_size, __u16 ioam_ns)
{
- __u8 *p;
-
- if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns))
+ if (check_header(tid, trace, trace_type, trace_size, ioam_ns))
return 1;
- p = ioam6h->data + ioam6h->remlen * 4;
-
- switch (tid) {
- case TEST_OUT_BIT0:
- case TEST_OUT_BIT1:
- case TEST_OUT_BIT2:
- case TEST_OUT_BIT3:
- case TEST_OUT_BIT4:
- case TEST_OUT_BIT5:
- case TEST_OUT_BIT6:
- case TEST_OUT_BIT7:
- case TEST_OUT_BIT8:
- case TEST_OUT_BIT9:
- case TEST_OUT_BIT10:
- case TEST_OUT_BIT11:
- case TEST_OUT_BIT22:
- case TEST_OUT_FULL_SUPP_TRACE:
- return check_ioam6_data(&p, ioam6h, node1);
-
- case TEST_IN_BIT0:
- case TEST_IN_BIT1:
- case TEST_IN_BIT2:
- case TEST_IN_BIT3:
- case TEST_IN_BIT4:
- case TEST_IN_BIT5:
- case TEST_IN_BIT6:
- case TEST_IN_BIT7:
- case TEST_IN_BIT8:
- case TEST_IN_BIT9:
- case TEST_IN_BIT10:
- case TEST_IN_BIT11:
- case TEST_IN_BIT22:
- case TEST_IN_FULL_SUPP_TRACE:
- {
- __u32 tmp32 = node2.egr_wide;
- __u16 tmp16 = node2.egr_id;
- int res;
-
- node2.egr_id = 0xffff;
- node2.egr_wide = 0xffffffff;
+ if (tid > __TEST_OUT_MIN && tid < __TEST_OUT_MAX)
+ return check_data(trace, trace_size, node1, true);
- res = check_ioam6_data(&p, ioam6h, node2);
-
- node2.egr_id = tmp16;
- node2.egr_wide = tmp32;
-
- return res;
- }
-
- case TEST_FWD_FULL_SUPP_TRACE:
- if (check_ioam6_data(&p, ioam6h, node3))
- return 1;
- if (check_ioam6_data(&p, ioam6h, node2))
- return 1;
- return check_ioam6_data(&p, ioam6h, node1);
-
- default:
- break;
- }
+ if (tid > __TEST_IN_MIN && tid < __TEST_IN_MAX)
+ return check_data(trace, trace_size, node2, false);
return 1;
}
static int str2id(const char *tname)
{
- if (!strcmp("out_undef_ns", tname))
+ if (!strcmp("output_undef_ns", tname))
return TEST_OUT_UNDEF_NS;
- if (!strcmp("out_no_room", tname))
+ if (!strcmp("output_no_room", tname))
return TEST_OUT_NO_ROOM;
- if (!strcmp("out_bit0", tname))
+ if (!strcmp("output_no_room_oss", tname))
+ return TEST_OUT_NO_ROOM_OSS;
+ if (!strcmp("output_bit0", tname))
return TEST_OUT_BIT0;
- if (!strcmp("out_bit1", tname))
+ if (!strcmp("output_bit1", tname))
return TEST_OUT_BIT1;
- if (!strcmp("out_bit2", tname))
+ if (!strcmp("output_bit2", tname))
return TEST_OUT_BIT2;
- if (!strcmp("out_bit3", tname))
+ if (!strcmp("output_bit3", tname))
return TEST_OUT_BIT3;
- if (!strcmp("out_bit4", tname))
+ if (!strcmp("output_bit4", tname))
return TEST_OUT_BIT4;
- if (!strcmp("out_bit5", tname))
+ if (!strcmp("output_bit5", tname))
return TEST_OUT_BIT5;
- if (!strcmp("out_bit6", tname))
+ if (!strcmp("output_bit6", tname))
return TEST_OUT_BIT6;
- if (!strcmp("out_bit7", tname))
+ if (!strcmp("output_bit7", tname))
return TEST_OUT_BIT7;
- if (!strcmp("out_bit8", tname))
+ if (!strcmp("output_bit8", tname))
return TEST_OUT_BIT8;
- if (!strcmp("out_bit9", tname))
+ if (!strcmp("output_bit9", tname))
return TEST_OUT_BIT9;
- if (!strcmp("out_bit10", tname))
+ if (!strcmp("output_bit10", tname))
return TEST_OUT_BIT10;
- if (!strcmp("out_bit11", tname))
+ if (!strcmp("output_bit11", tname))
return TEST_OUT_BIT11;
- if (!strcmp("out_bit22", tname))
+ if (!strcmp("output_bit22", tname))
return TEST_OUT_BIT22;
- if (!strcmp("out_full_supp_trace", tname))
+ if (!strcmp("output_size4", tname))
+ return TEST_OUT_SIZE4;
+ if (!strcmp("output_size8", tname))
+ return TEST_OUT_SIZE8;
+ if (!strcmp("output_size12", tname))
+ return TEST_OUT_SIZE12;
+ if (!strcmp("output_size16", tname))
+ return TEST_OUT_SIZE16;
+ if (!strcmp("output_size20", tname))
+ return TEST_OUT_SIZE20;
+ if (!strcmp("output_size24", tname))
+ return TEST_OUT_SIZE24;
+ if (!strcmp("output_size28", tname))
+ return TEST_OUT_SIZE28;
+ if (!strcmp("output_size32", tname))
+ return TEST_OUT_SIZE32;
+ if (!strcmp("output_size36", tname))
+ return TEST_OUT_SIZE36;
+ if (!strcmp("output_size40", tname))
+ return TEST_OUT_SIZE40;
+ if (!strcmp("output_size44", tname))
+ return TEST_OUT_SIZE44;
+ if (!strcmp("output_size48", tname))
+ return TEST_OUT_SIZE48;
+ if (!strcmp("output_size52", tname))
+ return TEST_OUT_SIZE52;
+ if (!strcmp("output_size56", tname))
+ return TEST_OUT_SIZE56;
+ if (!strcmp("output_size60", tname))
+ return TEST_OUT_SIZE60;
+ if (!strcmp("output_size64", tname))
+ return TEST_OUT_SIZE64;
+ if (!strcmp("output_size68", tname))
+ return TEST_OUT_SIZE68;
+ if (!strcmp("output_size72", tname))
+ return TEST_OUT_SIZE72;
+ if (!strcmp("output_size76", tname))
+ return TEST_OUT_SIZE76;
+ if (!strcmp("output_size80", tname))
+ return TEST_OUT_SIZE80;
+ if (!strcmp("output_size84", tname))
+ return TEST_OUT_SIZE84;
+ if (!strcmp("output_size88", tname))
+ return TEST_OUT_SIZE88;
+ if (!strcmp("output_size92", tname))
+ return TEST_OUT_SIZE92;
+ if (!strcmp("output_size96", tname))
+ return TEST_OUT_SIZE96;
+ if (!strcmp("output_size100", tname))
+ return TEST_OUT_SIZE100;
+ if (!strcmp("output_size104", tname))
+ return TEST_OUT_SIZE104;
+ if (!strcmp("output_size108", tname))
+ return TEST_OUT_SIZE108;
+ if (!strcmp("output_size112", tname))
+ return TEST_OUT_SIZE112;
+ if (!strcmp("output_size116", tname))
+ return TEST_OUT_SIZE116;
+ if (!strcmp("output_size120", tname))
+ return TEST_OUT_SIZE120;
+ if (!strcmp("output_size124", tname))
+ return TEST_OUT_SIZE124;
+ if (!strcmp("output_size128", tname))
+ return TEST_OUT_SIZE128;
+ if (!strcmp("output_size132", tname))
+ return TEST_OUT_SIZE132;
+ if (!strcmp("output_size136", tname))
+ return TEST_OUT_SIZE136;
+ if (!strcmp("output_size140", tname))
+ return TEST_OUT_SIZE140;
+ if (!strcmp("output_size144", tname))
+ return TEST_OUT_SIZE144;
+ if (!strcmp("output_size148", tname))
+ return TEST_OUT_SIZE148;
+ if (!strcmp("output_size152", tname))
+ return TEST_OUT_SIZE152;
+ if (!strcmp("output_size156", tname))
+ return TEST_OUT_SIZE156;
+ if (!strcmp("output_size160", tname))
+ return TEST_OUT_SIZE160;
+ if (!strcmp("output_size164", tname))
+ return TEST_OUT_SIZE164;
+ if (!strcmp("output_size168", tname))
+ return TEST_OUT_SIZE168;
+ if (!strcmp("output_size172", tname))
+ return TEST_OUT_SIZE172;
+ if (!strcmp("output_size176", tname))
+ return TEST_OUT_SIZE176;
+ if (!strcmp("output_size180", tname))
+ return TEST_OUT_SIZE180;
+ if (!strcmp("output_size184", tname))
+ return TEST_OUT_SIZE184;
+ if (!strcmp("output_size188", tname))
+ return TEST_OUT_SIZE188;
+ if (!strcmp("output_size192", tname))
+ return TEST_OUT_SIZE192;
+ if (!strcmp("output_size196", tname))
+ return TEST_OUT_SIZE196;
+ if (!strcmp("output_size200", tname))
+ return TEST_OUT_SIZE200;
+ if (!strcmp("output_size204", tname))
+ return TEST_OUT_SIZE204;
+ if (!strcmp("output_size208", tname))
+ return TEST_OUT_SIZE208;
+ if (!strcmp("output_size212", tname))
+ return TEST_OUT_SIZE212;
+ if (!strcmp("output_size216", tname))
+ return TEST_OUT_SIZE216;
+ if (!strcmp("output_size220", tname))
+ return TEST_OUT_SIZE220;
+ if (!strcmp("output_size224", tname))
+ return TEST_OUT_SIZE224;
+ if (!strcmp("output_size228", tname))
+ return TEST_OUT_SIZE228;
+ if (!strcmp("output_size232", tname))
+ return TEST_OUT_SIZE232;
+ if (!strcmp("output_size236", tname))
+ return TEST_OUT_SIZE236;
+ if (!strcmp("output_size240", tname))
+ return TEST_OUT_SIZE240;
+ if (!strcmp("output_size244", tname))
+ return TEST_OUT_SIZE244;
+ if (!strcmp("output_full_supp_trace", tname))
return TEST_OUT_FULL_SUPP_TRACE;
- if (!strcmp("in_undef_ns", tname))
+ if (!strcmp("input_undef_ns", tname))
return TEST_IN_UNDEF_NS;
- if (!strcmp("in_no_room", tname))
+ if (!strcmp("input_no_room", tname))
return TEST_IN_NO_ROOM;
- if (!strcmp("in_oflag", tname))
+ if (!strcmp("input_no_room_oss", tname))
+ return TEST_IN_NO_ROOM_OSS;
+ if (!strcmp("input_disabled", tname))
+ return TEST_IN_DISABLED;
+ if (!strcmp("input_oflag", tname))
return TEST_IN_OFLAG;
- if (!strcmp("in_bit0", tname))
+ if (!strcmp("input_bit0", tname))
return TEST_IN_BIT0;
- if (!strcmp("in_bit1", tname))
+ if (!strcmp("input_bit1", tname))
return TEST_IN_BIT1;
- if (!strcmp("in_bit2", tname))
+ if (!strcmp("input_bit2", tname))
return TEST_IN_BIT2;
- if (!strcmp("in_bit3", tname))
+ if (!strcmp("input_bit3", tname))
return TEST_IN_BIT3;
- if (!strcmp("in_bit4", tname))
+ if (!strcmp("input_bit4", tname))
return TEST_IN_BIT4;
- if (!strcmp("in_bit5", tname))
+ if (!strcmp("input_bit5", tname))
return TEST_IN_BIT5;
- if (!strcmp("in_bit6", tname))
+ if (!strcmp("input_bit6", tname))
return TEST_IN_BIT6;
- if (!strcmp("in_bit7", tname))
+ if (!strcmp("input_bit7", tname))
return TEST_IN_BIT7;
- if (!strcmp("in_bit8", tname))
+ if (!strcmp("input_bit8", tname))
return TEST_IN_BIT8;
- if (!strcmp("in_bit9", tname))
+ if (!strcmp("input_bit9", tname))
return TEST_IN_BIT9;
- if (!strcmp("in_bit10", tname))
+ if (!strcmp("input_bit10", tname))
return TEST_IN_BIT10;
- if (!strcmp("in_bit11", tname))
+ if (!strcmp("input_bit11", tname))
return TEST_IN_BIT11;
- if (!strcmp("in_bit22", tname))
+ if (!strcmp("input_bit22", tname))
return TEST_IN_BIT22;
- if (!strcmp("in_full_supp_trace", tname))
+ if (!strcmp("input_size4", tname))
+ return TEST_IN_SIZE4;
+ if (!strcmp("input_size8", tname))
+ return TEST_IN_SIZE8;
+ if (!strcmp("input_size12", tname))
+ return TEST_IN_SIZE12;
+ if (!strcmp("input_size16", tname))
+ return TEST_IN_SIZE16;
+ if (!strcmp("input_size20", tname))
+ return TEST_IN_SIZE20;
+ if (!strcmp("input_size24", tname))
+ return TEST_IN_SIZE24;
+ if (!strcmp("input_size28", tname))
+ return TEST_IN_SIZE28;
+ if (!strcmp("input_size32", tname))
+ return TEST_IN_SIZE32;
+ if (!strcmp("input_size36", tname))
+ return TEST_IN_SIZE36;
+ if (!strcmp("input_size40", tname))
+ return TEST_IN_SIZE40;
+ if (!strcmp("input_size44", tname))
+ return TEST_IN_SIZE44;
+ if (!strcmp("input_size48", tname))
+ return TEST_IN_SIZE48;
+ if (!strcmp("input_size52", tname))
+ return TEST_IN_SIZE52;
+ if (!strcmp("input_size56", tname))
+ return TEST_IN_SIZE56;
+ if (!strcmp("input_size60", tname))
+ return TEST_IN_SIZE60;
+ if (!strcmp("input_size64", tname))
+ return TEST_IN_SIZE64;
+ if (!strcmp("input_size68", tname))
+ return TEST_IN_SIZE68;
+ if (!strcmp("input_size72", tname))
+ return TEST_IN_SIZE72;
+ if (!strcmp("input_size76", tname))
+ return TEST_IN_SIZE76;
+ if (!strcmp("input_size80", tname))
+ return TEST_IN_SIZE80;
+ if (!strcmp("input_size84", tname))
+ return TEST_IN_SIZE84;
+ if (!strcmp("input_size88", tname))
+ return TEST_IN_SIZE88;
+ if (!strcmp("input_size92", tname))
+ return TEST_IN_SIZE92;
+ if (!strcmp("input_size96", tname))
+ return TEST_IN_SIZE96;
+ if (!strcmp("input_size100", tname))
+ return TEST_IN_SIZE100;
+ if (!strcmp("input_size104", tname))
+ return TEST_IN_SIZE104;
+ if (!strcmp("input_size108", tname))
+ return TEST_IN_SIZE108;
+ if (!strcmp("input_size112", tname))
+ return TEST_IN_SIZE112;
+ if (!strcmp("input_size116", tname))
+ return TEST_IN_SIZE116;
+ if (!strcmp("input_size120", tname))
+ return TEST_IN_SIZE120;
+ if (!strcmp("input_size124", tname))
+ return TEST_IN_SIZE124;
+ if (!strcmp("input_size128", tname))
+ return TEST_IN_SIZE128;
+ if (!strcmp("input_size132", tname))
+ return TEST_IN_SIZE132;
+ if (!strcmp("input_size136", tname))
+ return TEST_IN_SIZE136;
+ if (!strcmp("input_size140", tname))
+ return TEST_IN_SIZE140;
+ if (!strcmp("input_size144", tname))
+ return TEST_IN_SIZE144;
+ if (!strcmp("input_size148", tname))
+ return TEST_IN_SIZE148;
+ if (!strcmp("input_size152", tname))
+ return TEST_IN_SIZE152;
+ if (!strcmp("input_size156", tname))
+ return TEST_IN_SIZE156;
+ if (!strcmp("input_size160", tname))
+ return TEST_IN_SIZE160;
+ if (!strcmp("input_size164", tname))
+ return TEST_IN_SIZE164;
+ if (!strcmp("input_size168", tname))
+ return TEST_IN_SIZE168;
+ if (!strcmp("input_size172", tname))
+ return TEST_IN_SIZE172;
+ if (!strcmp("input_size176", tname))
+ return TEST_IN_SIZE176;
+ if (!strcmp("input_size180", tname))
+ return TEST_IN_SIZE180;
+ if (!strcmp("input_size184", tname))
+ return TEST_IN_SIZE184;
+ if (!strcmp("input_size188", tname))
+ return TEST_IN_SIZE188;
+ if (!strcmp("input_size192", tname))
+ return TEST_IN_SIZE192;
+ if (!strcmp("input_size196", tname))
+ return TEST_IN_SIZE196;
+ if (!strcmp("input_size200", tname))
+ return TEST_IN_SIZE200;
+ if (!strcmp("input_size204", tname))
+ return TEST_IN_SIZE204;
+ if (!strcmp("input_size208", tname))
+ return TEST_IN_SIZE208;
+ if (!strcmp("input_size212", tname))
+ return TEST_IN_SIZE212;
+ if (!strcmp("input_size216", tname))
+ return TEST_IN_SIZE216;
+ if (!strcmp("input_size220", tname))
+ return TEST_IN_SIZE220;
+ if (!strcmp("input_size224", tname))
+ return TEST_IN_SIZE224;
+ if (!strcmp("input_size228", tname))
+ return TEST_IN_SIZE228;
+ if (!strcmp("input_size232", tname))
+ return TEST_IN_SIZE232;
+ if (!strcmp("input_size236", tname))
+ return TEST_IN_SIZE236;
+ if (!strcmp("input_size240", tname))
+ return TEST_IN_SIZE240;
+ if (!strcmp("input_size244", tname))
+ return TEST_IN_SIZE244;
+ if (!strcmp("input_full_supp_trace", tname))
return TEST_IN_FULL_SUPP_TRACE;
- if (!strcmp("fwd_full_supp_trace", tname))
- return TEST_FWD_FULL_SUPP_TRACE;
return -1;
}
+static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2)
+{
+ return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) |
+ (a1->s6_addr32[1] ^ a2->s6_addr32[1]) |
+ (a1->s6_addr32[2] ^ a2->s6_addr32[2]) |
+ (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0;
+}
+
static int get_u32(__u32 *val, const char *arg, int base)
{
unsigned long res;
@@ -555,119 +977,124 @@ static int get_u16(__u16 *val, const char *arg, int base)
return 0;
}
-static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
- [TEST_OUT_UNDEF_NS] = check_ioam_header,
- [TEST_OUT_NO_ROOM] = check_ioam_header,
- [TEST_OUT_BIT0] = check_ioam_header_and_data,
- [TEST_OUT_BIT1] = check_ioam_header_and_data,
- [TEST_OUT_BIT2] = check_ioam_header_and_data,
- [TEST_OUT_BIT3] = check_ioam_header_and_data,
- [TEST_OUT_BIT4] = check_ioam_header_and_data,
- [TEST_OUT_BIT5] = check_ioam_header_and_data,
- [TEST_OUT_BIT6] = check_ioam_header_and_data,
- [TEST_OUT_BIT7] = check_ioam_header_and_data,
- [TEST_OUT_BIT8] = check_ioam_header_and_data,
- [TEST_OUT_BIT9] = check_ioam_header_and_data,
- [TEST_OUT_BIT10] = check_ioam_header_and_data,
- [TEST_OUT_BIT11] = check_ioam_header_and_data,
- [TEST_OUT_BIT22] = check_ioam_header_and_data,
- [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data,
- [TEST_IN_UNDEF_NS] = check_ioam_header,
- [TEST_IN_NO_ROOM] = check_ioam_header,
- [TEST_IN_OFLAG] = check_ioam_header,
- [TEST_IN_BIT0] = check_ioam_header_and_data,
- [TEST_IN_BIT1] = check_ioam_header_and_data,
- [TEST_IN_BIT2] = check_ioam_header_and_data,
- [TEST_IN_BIT3] = check_ioam_header_and_data,
- [TEST_IN_BIT4] = check_ioam_header_and_data,
- [TEST_IN_BIT5] = check_ioam_header_and_data,
- [TEST_IN_BIT6] = check_ioam_header_and_data,
- [TEST_IN_BIT7] = check_ioam_header_and_data,
- [TEST_IN_BIT8] = check_ioam_header_and_data,
- [TEST_IN_BIT9] = check_ioam_header_and_data,
- [TEST_IN_BIT10] = check_ioam_header_and_data,
- [TEST_IN_BIT11] = check_ioam_header_and_data,
- [TEST_IN_BIT22] = check_ioam_header_and_data,
- [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data,
- [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data,
-};
+static int get_u8(__u8 *val, const char *arg, int base)
+{
+ unsigned long res;
+ char *ptr;
+
+ if (!arg || !*arg)
+ return -1;
+ res = strtoul(arg, &ptr, base);
+
+ if (!ptr || ptr == arg || *ptr)
+ return -1;
+
+ if (res == ULONG_MAX && errno == ERANGE)
+ return -1;
+
+ if (res > 0xFFUL)
+ return -1;
+
+ *val = res;
+ return 0;
+}
int main(int argc, char **argv)
{
- int fd, size, hoplen, tid, ret = 1, on = 1;
- struct ioam6_hdr *opt;
- struct cmsghdr *cmsg;
- struct msghdr msg;
- struct iovec iov;
- __u8 buffer[512];
+ __u8 buffer[512], *ptr, nexthdr, tr_size;
+ struct ioam6_trace_hdr *trace;
+ unsigned int hoplen, ret = 1;
+ struct ipv6_hopopt_hdr *hbh;
+ int fd, size, testname_id;
+ struct in6_addr src, dst;
+ struct ioam6_hdr *ioam6;
+ struct timeval timeout;
+ struct ipv6hdr *ipv6;
__u32 tr_type;
__u16 ioam_ns;
- __u8 *ptr;
- if (argc != 5)
+ if (argc != 9)
goto out;
- tid = str2id(argv[1]);
- if (tid < 0 || !func[tid])
- goto out;
+ testname_id = str2id(argv[2]);
- if (get_u32(&tr_type, argv[2], 16) ||
- get_u16(&ioam_ns, argv[3], 0))
+ if (testname_id < 0 ||
+ inet_pton(AF_INET6, argv[3], &src) != 1 ||
+ inet_pton(AF_INET6, argv[4], &dst) != 1 ||
+ get_u32(&tr_type, argv[5], 16) ||
+ get_u8(&tr_size, argv[6], 0) ||
+ get_u16(&ioam_ns, argv[7], 0))
goto out;
- fd = socket(PF_INET6, SOCK_RAW,
- !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6);
+ nexthdr = (!strcmp(argv[8], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6);
+
+ hoplen = sizeof(*hbh);
+ hoplen += 2; // 2-byte padding for alignment
+ hoplen += sizeof(*ioam6); // IOAM option header
+ hoplen += sizeof(*trace); // IOAM trace header
+ hoplen += tr_size; // IOAM trace size
+ hoplen += (tr_size % 8); // optional padding
+
+ fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6));
if (fd < 0)
goto out;
- setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on));
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ argv[1], strlen(argv[1])))
+ goto close;
- iov.iov_len = 1;
- iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer)));
- if (!iov.iov_base)
+ timeout.tv_sec = 1;
+ timeout.tv_usec = 0;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO,
+ (const char *)&timeout, sizeof(timeout)))
goto close;
recv:
- memset(&msg, 0, sizeof(msg));
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_control = buffer;
- msg.msg_controllen = CMSG_SPACE(sizeof(buffer));
-
- size = recvmsg(fd, &msg, 0);
+ size = recv(fd, buffer, sizeof(buffer), 0);
if (size <= 0)
goto close;
- for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
- if (cmsg->cmsg_level != IPPROTO_IPV6 ||
- cmsg->cmsg_type != IPV6_HOPOPTS ||
- cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr))
- continue;
+ ipv6 = (struct ipv6hdr *)buffer;
+
+ /* Skip packets that do not have the expected src/dst address or that
+ * do not have a Hop-by-hop.
+ */
+ if (!ipv6_addr_equal(&ipv6->saddr, &src) ||
+ !ipv6_addr_equal(&ipv6->daddr, &dst) ||
+ ipv6->nexthdr != IPPROTO_HOPOPTS)
+ goto recv;
+
+ /* Check Hbh's Next Header and Size. */
+ hbh = (struct ipv6_hopopt_hdr *)(buffer + sizeof(*ipv6));
+ if (hbh->nexthdr != nexthdr || hbh->hdrlen != (hoplen >> 3) - 1)
+ goto close;
- ptr = (__u8 *)CMSG_DATA(cmsg);
+ /* Check we have a 2-byte padding for alignment. */
+ ptr = (__u8 *)hbh + sizeof(*hbh);
+ if (ptr[0] != IPV6_TLV_PADN && ptr[1] != 0)
+ goto close;
- hoplen = (ptr[1] + 1) << 3;
- ptr += sizeof(struct ipv6_hopopt_hdr);
+ /* Check we now have the IOAM option. */
+ ptr += 2;
+ if (ptr[0] != IPV6_TLV_IOAM)
+ goto close;
- while (hoplen > 0) {
- opt = (struct ioam6_hdr *)ptr;
+ /* Check its size and the IOAM option type. */
+ ioam6 = (struct ioam6_hdr *)ptr;
+ if (ioam6->opt_len != sizeof(*ioam6) - 2 + sizeof(*trace) + tr_size ||
+ ioam6->type != IOAM6_TYPE_PREALLOC)
+ goto close;
- if (opt->opt_type == IPV6_TLV_IOAM &&
- opt->type == IOAM6_TYPE_PREALLOC) {
- ptr += sizeof(*opt);
- ret = func[tid](tid,
- (struct ioam6_trace_hdr *)ptr,
- tr_type, ioam_ns);
- goto close;
- }
+ trace = (struct ioam6_trace_hdr *)(ptr + sizeof(*ioam6));
- ptr += opt->opt_len + 2;
- hoplen -= opt->opt_len + 2;
- }
- }
+ /* Check the trailing 4-byte padding (potentially). */
+ ptr = (__u8 *)trace + sizeof(*trace) + tr_size;
+ if (tr_size % 8 && ptr[0] != IPV6_TLV_PADN && ptr[1] != 2 &&
+ ptr[2] != 0 && ptr[3] != 0)
+ goto close;
- goto recv;
+ /* Check the IOAM header and data. */
+ ret = check_ioam_trace(testname_id, trace, tr_type, tr_size, ioam_ns);
close:
- free(iov.iov_base);
close(fd);
out:
return ret;
diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c
index 29451d2244b7..e6834a6cfc8f 100644
--- a/tools/testing/selftests/net/ip_local_port_range.c
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -10,7 +10,7 @@
#include <fcntl.h>
#include <netinet/ip.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#ifndef IP_LOCAL_PORT_RANGE
#define IP_LOCAL_PORT_RANGE 51
diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh
index 6c6ad346eaa0..4ff746db1256 100755
--- a/tools/testing/selftests/net/ip_local_port_range.sh
+++ b/tools/testing/selftests/net/ip_local_port_range.sh
@@ -2,4 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
./in_netns.sh \
- sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range'
+ sh -c 'sysctl -q -w net.mptcp.enabled=1 && \
+ sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && \
+ ./ip_local_port_range'
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index be4a30a0d02a..0ccf484b1d9d 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -34,7 +34,7 @@
#include <time.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define printk(fmt, ...) \
ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__)
@@ -227,7 +227,8 @@ static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
attr->rta_len = RTA_LENGTH(size);
attr->rta_type = rta_type;
- memcpy(RTA_DATA(attr), payload, size);
+ if (payload)
+ memcpy(RTA_DATA(attr), payload, size);
return 0;
}
diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh
new file mode 100755
index 000000000000..bf0243366caa
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test IPv6 force_forwarding interface property
+#
+# This test verifies that the force_forwarding property works correctly:
+# - When global forwarding is disabled, packets are not forwarded normally
+# - When force_forwarding is enabled on an interface, packets are forwarded
+# regardless of the global forwarding setting
+
+source lib.sh
+
+cleanup() {
+ cleanup_ns $ns1 $ns2 $ns3
+}
+
+trap cleanup EXIT
+
+setup_test() {
+ # Create three namespaces: sender, router, receiver
+ setup_ns ns1 ns2 ns3
+
+ # Create veth pairs: ns1 <-> ns2 <-> ns3
+ ip link add name veth12 type veth peer name veth21
+ ip link add name veth23 type veth peer name veth32
+
+ # Move interfaces to namespaces
+ ip link set veth12 netns $ns1
+ ip link set veth21 netns $ns2
+ ip link set veth23 netns $ns2
+ ip link set veth32 netns $ns3
+
+ # Configure interfaces
+ ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 nodad
+ ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 nodad
+ ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 nodad
+ ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 nodad
+
+ # Bring up interfaces
+ ip -n $ns1 link set veth12 up
+ ip -n $ns2 link set veth21 up
+ ip -n $ns2 link set veth23 up
+ ip -n $ns3 link set veth32 up
+
+ # Add routes
+ ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2
+ ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1
+
+ # Disable global forwarding
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0
+}
+
+test_force_forwarding() {
+ local ret=0
+
+ echo "TEST: force_forwarding functionality"
+
+ # Check if force_forwarding sysctl exists
+ if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then
+ echo "SKIP: force_forwarding not available"
+ return $ksft_skip
+ fi
+
+ # Test 1: Without force_forwarding, ping should fail
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0
+
+ if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+ echo "FAIL: ping succeeded when forwarding disabled"
+ ret=1
+ else
+ echo "PASS: forwarding disabled correctly"
+ fi
+
+ # Test 2: With force_forwarding enabled, ping should succeed
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1
+
+ if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+ echo "PASS: force_forwarding enabled forwarding"
+ else
+ echo "FAIL: ping failed with force_forwarding enabled"
+ ret=1
+ fi
+
+ return $ret
+}
+
+echo "IPv6 force_forwarding test"
+echo "=========================="
+
+setup_test
+test_force_forwarding
+ret=$?
+
+if [ $ret -eq 0 ]; then
+ echo "OK"
+ exit 0
+elif [ $ret -eq $ksft_skip ]; then
+ echo "SKIP"
+ exit $ksft_skip
+else
+ echo "FAIL"
+ exit 1
+fi
diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c
new file mode 100644
index 000000000000..672c9fe086a7
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_fragmentation.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: Brett A C Sheffield <bacs@librecast.net>
+ *
+ * Kernel selftest for the IPv6 fragmentation regression which affected stable
+ * kernels:
+ *
+ * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com
+ *
+ * Commit: a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable
+ * without some prerequisite commits.
+ *
+ * This caused a regression when sending IPv6 UDP packets by preventing
+ * fragmentation and instead returning -1 (EMSGSIZE).
+ *
+ * This selftest demonstrates the issue by sending an IPv6 UDP packet to
+ * localhost (::1) on the loopback interface from the autoconfigured link-local
+ * address.
+ *
+ * sendmsg(2) returns bytes sent correctly on a working kernel, and returns -1
+ * (EMSGSIZE) when the regression is present.
+ *
+ * The regression was not present in the mainline kernel, but add this test to
+ * catch similar breakage in future.
+ */
+
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sched.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include "kselftest.h"
+
+#define MTU 1500
+#define LARGER_THAN_MTU 8192
+
+static void setup(void)
+{
+ struct ifreq ifr = {
+ .ifr_name = "lo"
+ };
+ int ctl;
+
+ /* we need to set MTU, so do this in a namespace to play nicely */
+ if (unshare(CLONE_NEWNET) == -1)
+ error(KSFT_FAIL, errno, "unshare");
+
+ ctl = socket(AF_LOCAL, SOCK_STREAM, 0);
+ if (ctl == -1)
+ error(KSFT_FAIL, errno, "socket");
+
+ /* ensure MTU is smaller than what we plan to send */
+ ifr.ifr_mtu = MTU;
+ if (ioctl(ctl, SIOCSIFMTU, &ifr) == -1)
+ error(KSFT_FAIL, errno, "ioctl: set MTU");
+
+ /* bring up interface */
+ if (ioctl(ctl, SIOCGIFFLAGS, &ifr) == -1)
+ error(KSFT_FAIL, errno, "ioctl SIOCGIFFLAGS");
+ ifr.ifr_flags = ifr.ifr_flags | IFF_UP;
+ if (ioctl(ctl, SIOCSIFFLAGS, &ifr) == -1)
+ error(KSFT_FAIL, errno, "ioctl: bring interface up");
+
+ if (close(ctl) == -1)
+ error(KSFT_FAIL, errno, "close");
+}
+
+int main(void)
+{
+ struct in6_addr addr = {
+ .s6_addr[15] = 0x01, /* ::1 */
+ };
+ struct sockaddr_in6 sa = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = addr,
+ .sin6_port = htons(9) /* port 9/udp (DISCARD) */
+ };
+ static char buf[LARGER_THAN_MTU] = {0};
+ struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_name = (struct sockaddr *)&sa,
+ .msg_namelen = sizeof(sa),
+ };
+ ssize_t rc;
+ int s;
+
+ printf("Testing IPv6 fragmentation\n");
+ setup();
+ s = socket(AF_INET6, SOCK_DGRAM, 0);
+send_again:
+ rc = sendmsg(s, &msg, 0);
+ if (rc == -1) {
+ /* if interface wasn't ready, try again */
+ if (errno == EADDRNOTAVAIL) {
+ usleep(1000);
+ goto send_again;
+ }
+ error(KSFT_FAIL, errno, "sendmsg");
+ } else if (rc != LARGER_THAN_MTU) {
+ error(KSFT_FAIL, errno, "sendmsg returned %zi, expected %i",
+ rc, LARGER_THAN_MTU);
+ }
+ printf("[PASS] sendmsg() returned %zi\n", rc);
+ if (close(s) == -1)
+ error(KSFT_FAIL, errno, "close");
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
new file mode 100755
index 000000000000..c6866e42f95c
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
@@ -0,0 +1,261 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing for potential kernel soft lockup during IPv6 routing table
+# refresh under heavy outgoing IPv6 traffic. If a kernel soft lockup
+# occurs, a kernel panic will be triggered to prevent associated issues.
+#
+#
+# Test Environment Layout
+#
+# ┌----------------┐ ┌----------------┐
+# | SOURCE_NS | | SINK_NS |
+# | NAMESPACE | | NAMESPACE |
+# |(iperf3 clients)| |(iperf3 servers)|
+# | | | |
+# | | | |
+# | ┌-----------| nexthops |---------┐ |
+# | |veth_source|<--------------------------------------->|veth_sink|<┐ |
+# | └-----------|2001:0DB8:1::0:1/96 2001:0DB8:1::1:1/96 |---------┘ | |
+# | | ^ 2001:0DB8:1::1:2/96 | | |
+# | | . . | fwd | |
+# | ┌---------┐ | . . | | |
+# | | IPv6 | | . . | V |
+# | | routing | | . 2001:0DB8:1::1:80/96| ┌-----┐ |
+# | | table | | . | | lo | |
+# | | nexthop | | . └--------┴-----┴-┘
+# | | update | | ............................> 2001:0DB8:2::1:1/128
+# | └-------- ┘ |
+# └----------------┘
+#
+# The test script sets up two network namespaces, source_ns and sink_ns,
+# connected via a veth link. Within source_ns, it continuously updates the
+# IPv6 routing table by flushing and inserting IPV6_NEXTHOP_ADDR_COUNT nexthop
+# IPs destined for SINK_LOOPBACK_IP_ADDR in sink_ns. This refresh occurs at a
+# rate of 1/ROUTING_TABLE_REFRESH_PERIOD per second for TEST_DURATION seconds.
+#
+# Simultaneously, multiple iperf3 clients within source_ns generate heavy
+# outgoing IPv6 traffic. Each client is assigned a unique port number starting
+# at 5000 and incrementing sequentially. Each client targets a unique iperf3
+# server running in sink_ns, connected to the SINK_LOOPBACK_IFACE interface
+# using the same port number.
+#
+# The number of iperf3 servers and clients is set to half of the total
+# available cores on each machine.
+#
+# NOTE: We have tested this script on machines with various CPU specifications,
+# ranging from lower to higher performance as listed below. The test script
+# effectively triggered a kernel soft lockup on machines running an unpatched
+# kernel in under a minute:
+#
+# - 1x Intel Xeon E-2278G 8-Core Processor @ 3.40GHz
+# - 1x Intel Xeon E-2378G Processor 8-Core @ 2.80GHz
+# - 1x AMD EPYC 7401P 24-Core Processor @ 2.00GHz
+# - 1x AMD EPYC 7402P 24-Core Processor @ 2.80GHz
+# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz
+# - 1x Ampere Altra Q80-30 80-Core Processor @ 3.00GHz
+# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz
+# - 2x Intel Xeon Silver 4214 24-Core Processor @ 2.20GHz
+# - 1x AMD EPYC 7502P 32-Core @ 2.50GHz
+# - 1x Intel Xeon Gold 6314U 32-Core Processor @ 2.30GHz
+# - 2x Intel Xeon Gold 6338 32-Core Processor @ 2.00GHz
+#
+# On less performant machines, you may need to increase the TEST_DURATION
+# parameter to enhance the likelihood of encountering a race condition leading
+# to a kernel soft lockup and avoid a false negative result.
+#
+# NOTE: The test may not produce the expected result in virtualized
+# environments (e.g., qemu) due to differences in timing and CPU handling,
+# which can affect the conditions needed to trigger a soft lockup.
+
+source lib.sh
+
+TEST_DURATION=300
+ROUTING_TABLE_REFRESH_PERIOD=0.01
+
+IPERF3_BITRATE="300m"
+
+
+IPV6_NEXTHOP_ADDR_COUNT="128"
+IPV6_NEXTHOP_ADDR_MASK="96"
+IPV6_NEXTHOP_PREFIX="2001:0DB8:1"
+
+
+SOURCE_TEST_IFACE="veth_source"
+SOURCE_TEST_IP_ADDR="2001:0DB8:1::0:1/96"
+
+SINK_TEST_IFACE="veth_sink"
+# ${SINK_TEST_IFACE} is populated with the following range of IPv6 addresses:
+# 2001:0DB8:1::1:1 to 2001:0DB8:1::1:${IPV6_NEXTHOP_ADDR_COUNT}
+SINK_LOOPBACK_IFACE="lo"
+SINK_LOOPBACK_IP_MASK="128"
+SINK_LOOPBACK_IP_ADDR="2001:0DB8:2::1:1"
+
+nexthop_ip_list=""
+termination_signal=""
+kernel_softlokup_panic_prev_val=""
+
+terminate_ns_processes_by_pattern() {
+ local ns=$1
+ local pattern=$2
+
+ for pid in $(ip netns pids ${ns}); do
+ [ -e /proc/$pid/cmdline ] && grep -qe "${pattern}" /proc/$pid/cmdline && kill -9 $pid
+ done
+}
+
+cleanup() {
+ echo "info: cleaning up namespaces and terminating all processes within them..."
+
+
+ # Terminate iperf3 instances running in the source_ns. To avoid race
+ # conditions, first iterate over the PIDs and terminate those
+ # associated with the bash shells running the
+ # `while true; do iperf3 -c ...; done` loops. In a second iteration,
+ # terminate the individual `iperf3 -c ...` instances.
+ terminate_ns_processes_by_pattern ${source_ns} while
+ terminate_ns_processes_by_pattern ${source_ns} iperf3
+
+ # Repeat the same process for sink_ns
+ terminate_ns_processes_by_pattern ${sink_ns} while
+ terminate_ns_processes_by_pattern ${sink_ns} iperf3
+
+ # Check if any iperf3 instances are still running. This could happen
+ # if a core has entered an infinite loop and the timeout for detecting
+ # the soft lockup has not expired, but either the test interval has
+ # already elapsed or the test was terminated manually (e.g., with ^C)
+ for pid in $(ip netns pids ${source_ns}); do
+ if [ -e /proc/$pid/cmdline ] && grep -qe 'iperf3' /proc/$pid/cmdline; then
+ echo "FAIL: unable to terminate some iperf3 instances. Soft lockup is underway. A kernel panic is on the way!"
+ exit ${ksft_fail}
+ fi
+ done
+
+ if [ "$termination_signal" == "SIGINT" ]; then
+ echo "SKIP: Termination due to ^C (SIGINT)"
+ elif [ "$termination_signal" == "SIGALRM" ]; then
+ echo "PASS: No kernel soft lockup occurred during this ${TEST_DURATION} second test"
+ fi
+
+ cleanup_ns ${source_ns} ${sink_ns}
+
+ sysctl -qw kernel.softlockup_panic=${kernel_softlokup_panic_prev_val}
+}
+
+setup_prepare() {
+ setup_ns source_ns sink_ns
+
+ ip -n ${source_ns} link add name ${SOURCE_TEST_IFACE} type veth peer name ${SINK_TEST_IFACE} netns ${sink_ns}
+
+ # Setting up the Source namespace
+ ip -n ${source_ns} addr add ${SOURCE_TEST_IP_ADDR} dev ${SOURCE_TEST_IFACE}
+ ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} qlen 10000
+ ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} up
+ ip netns exec ${source_ns} sysctl -qw net.ipv6.fib_multipath_hash_policy=1
+
+ # Setting up the Sink namespace
+ ip -n ${sink_ns} addr add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} dev ${SINK_LOOPBACK_IFACE}
+ ip -n ${sink_ns} link set dev ${SINK_LOOPBACK_IFACE} up
+ ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_LOOPBACK_IFACE}.forwarding=1
+
+ ip -n ${sink_ns} link set ${SINK_TEST_IFACE} up
+ ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_TEST_IFACE}.forwarding=1
+
+
+ # Populate nexthop IPv6 addresses on the test interface in the sink_ns
+ echo "info: populating ${IPV6_NEXTHOP_ADDR_COUNT} IPv6 addresses on the ${SINK_TEST_IFACE} interface ..."
+ for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do
+ ip -n ${sink_ns} addr add ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" "${IP}")/${IPV6_NEXTHOP_ADDR_MASK} dev ${SINK_TEST_IFACE};
+ done
+
+ # Preparing list of nexthops
+ for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do
+ nexthop_ip_list=$nexthop_ip_list" nexthop via ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" $IP) dev ${SOURCE_TEST_IFACE} weight 1"
+ done
+}
+
+
+test_soft_lockup_during_routing_table_refresh() {
+ # Start num_of_iperf_servers iperf3 servers in the sink_ns namespace,
+ # each listening on ports starting at 5001 and incrementing
+ # sequentially. Since iperf3 instances may terminate unexpectedly, a
+ # while loop is used to automatically restart them in such cases.
+ echo "info: starting ${num_of_iperf_servers} iperf3 servers in the sink_ns namespace ..."
+ for i in $(seq 1 ${num_of_iperf_servers}); do
+ cmd="iperf3 --bind ${SINK_LOOPBACK_IP_ADDR} -s -p $(printf '5%03d' ${i}) --rcv-timeout 200 &>/dev/null"
+ ip netns exec ${sink_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null
+ done
+
+ # Wait for the iperf3 servers to be ready
+ for i in $(seq ${num_of_iperf_servers}); do
+ port=$(printf '5%03d' ${i});
+ wait_local_port_listen ${sink_ns} ${port} tcp
+ done
+
+ # Continuously refresh the routing table in the background within
+ # the source_ns namespace
+ ip netns exec ${source_ns} bash -c "
+ while \$(ip netns list | grep -q ${source_ns}); do
+ ip -6 route add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} ${nexthop_ip_list};
+ sleep ${ROUTING_TABLE_REFRESH_PERIOD};
+ ip -6 route delete ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK};
+ done &"
+
+ # Start num_of_iperf_servers iperf3 clients in the source_ns namespace,
+ # each sending TCP traffic on sequential ports starting at 5001.
+ # Since iperf3 instances may terminate unexpectedly (e.g., if the route
+ # to the server is deleted in the background during a route refresh), a
+ # while loop is used to automatically restart them in such cases.
+ echo "info: starting ${num_of_iperf_servers} iperf3 clients in the source_ns namespace ..."
+ for i in $(seq 1 ${num_of_iperf_servers}); do
+ cmd="iperf3 -c ${SINK_LOOPBACK_IP_ADDR} -p $(printf '5%03d' ${i}) --length 64 --bitrate ${IPERF3_BITRATE} -t 0 --connect-timeout 150 &>/dev/null"
+ ip netns exec ${source_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null
+ done
+
+ echo "info: IPv6 routing table is being updated at the rate of $(echo "1/${ROUTING_TABLE_REFRESH_PERIOD}" | bc)/s for ${TEST_DURATION} seconds ..."
+ echo "info: A kernel soft lockup, if detected, results in a kernel panic!"
+
+ wait
+}
+
+# Make sure 'iperf3' is installed, skip the test otherwise
+if [ ! -x "$(command -v "iperf3")" ]; then
+ echo "SKIP: 'iperf3' is not installed. Skipping the test."
+ exit ${ksft_skip}
+fi
+
+# Determine the number of cores on the machine
+num_of_iperf_servers=$(( $(nproc)/2 ))
+
+# Check if we are running on a multi-core machine, skip the test otherwise
+if [ "${num_of_iperf_servers}" -eq 0 ]; then
+ echo "SKIP: This test is not valid on a single core machine!"
+ exit ${ksft_skip}
+fi
+
+# Since the kernel soft lockup we're testing causes at least one core to enter
+# an infinite loop, destabilizing the host and likely affecting subsequent
+# tests, we trigger a kernel panic instead of reporting a failure and
+# continuing
+kernel_softlokup_panic_prev_val=$(sysctl -n kernel.softlockup_panic)
+sysctl -qw kernel.softlockup_panic=1
+
+handle_sigint() {
+ termination_signal="SIGINT"
+ cleanup
+ exit ${ksft_skip}
+}
+
+handle_sigalrm() {
+ termination_signal="SIGALRM"
+ cleanup
+ exit ${ksft_pass}
+}
+
+trap handle_sigint SIGINT
+trap handle_sigalrm SIGALRM
+
+(sleep ${TEST_DURATION} && kill -s SIGALRM $$)&
+
+setup_prepare
+test_soft_lockup_during_routing_table_refresh
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index d0219032f773..f448bafb3f20 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -1,11 +1,17 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+net_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+source "$net_dir/lib/sh/defer.sh"
+
##############################################################################
# Defines
: "${WAIT_TIMEOUT:=20}"
+# Whether to pause on after a failure.
+: "${PAUSE_ON_FAIL:=no}"
+
BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
# Kselftest framework constants.
@@ -17,6 +23,11 @@ ksft_skip=4
# namespace list created by setup_ns
NS_LIST=()
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
##############################################################################
# Helpers
@@ -32,7 +43,7 @@ __ksft_status_merge()
weights[$i]=$((weight++))
done
- if [[ ${weights[$a]} > ${weights[$b]} ]]; then
+ if [[ ${weights[$a]} -ge ${weights[$b]} ]]; then
echo "$a"
return 0
else
@@ -125,6 +136,21 @@ slowwait_for_counter()
slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@"
}
+# Check for existence of tools which are built as part of selftests
+# but may also already exist in $PATH
+check_gen_prog()
+{
+ local prog_name=$1; shift
+
+ if ! which $prog_name >/dev/null 2>/dev/null; then
+ PATH=$PWD:$PATH
+ if ! which $prog_name >/dev/null; then
+ echo "'$prog_name' command not found; skipping tests"
+ exit $ksft_skip
+ fi
+ fi
+}
+
remove_ns_list()
{
local item=$1
@@ -146,6 +172,7 @@ cleanup_ns()
for ns in "$@"; do
[ -z "${ns}" ] && continue
+ ip netns pids "${ns}" 2> /dev/null | xargs -r kill || true
ip netns delete "${ns}" &> /dev/null || true
if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then
echo "Warn: Failed to remove namespace $ns"
@@ -190,11 +217,61 @@ setup_ns()
return $ksft_skip
fi
ip -n "${!ns_name}" link set lo up
+ ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ns_list+=("${!ns_name}")
done
NS_LIST+=("${ns_list[@]}")
}
+# Create netdevsim with given id and net namespace.
+create_netdevsim() {
+ local id="$1"
+ local ns="$2"
+
+ modprobe netdevsim &> /dev/null
+ udevadm settle
+
+ echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null
+ local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net)
+ ip -netns $ns link set dev $dev name nsim$id
+ ip -netns $ns link set dev nsim$id up
+
+ echo nsim$id
+}
+
+create_netdevsim_port() {
+ local nsim_id="$1"
+ local ns="$2"
+ local port_id="$3"
+ local perm_addr="$4"
+ local orig_dev
+ local new_dev
+ local nsim_path
+
+ nsim_path="/sys/bus/netdevsim/devices/netdevsim$nsim_id"
+
+ echo "$port_id $perm_addr" | ip netns exec "$ns" tee "$nsim_path"/new_port > /dev/null || return 1
+
+ orig_dev=$(ip netns exec "$ns" find "$nsim_path"/net/ -maxdepth 1 -name 'e*' | tail -n 1)
+ orig_dev=$(basename "$orig_dev")
+ new_dev="nsim${nsim_id}p$port_id"
+
+ ip -netns "$ns" link set dev "$orig_dev" name "$new_dev"
+ ip -netns "$ns" link set dev "$new_dev" up
+
+ echo "$new_dev"
+}
+
+# Remove netdevsim with given id.
+cleanup_netdevsim() {
+ local id="$1"
+
+ if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then
+ echo "$id" > /sys/bus/netdevsim/del_device
+ fi
+}
+
tc_rule_stats_get()
{
local dev=$1; shift
@@ -217,3 +294,378 @@ tc_rule_handle_stats_get()
| jq ".[] | select(.options.handle == $handle) | \
.options.actions[0].stats$selector"
}
+
+# attach a qdisc with two children match/no-match and a flower filter to match
+tc_set_flower_counter() {
+ local -r ns=$1
+ local -r ipver=$2
+ local -r dev=$3
+ local -r flower_expr=$4
+
+ tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \
+ priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+ tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo
+ tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo
+
+ tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \
+ flower $flower_expr classid 1:2
+}
+
+tc_get_flower_counter() {
+ local -r ns=$1
+ local -r dev=$2
+
+ tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets
+}
+
+ret_set_ksft_status()
+{
+ local ksft_status=$1; shift
+ local msg=$1; shift
+
+ RET=$(ksft_status_merge $RET $ksft_status)
+ if (( $? )); then
+ retmsg=$msg
+ fi
+}
+
+log_test_result()
+{
+ local test_name=$1; shift
+ local opt_str=$1; shift
+ local result=$1; shift
+ local retmsg=$1
+
+ printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result"
+ if [[ $retmsg ]]; then
+ printf "\t%s\n" "$retmsg"
+ fi
+}
+
+pause_on_fail()
+{
+ if [[ $PAUSE_ON_FAIL == yes ]]; then
+ echo "Hit enter to continue, 'q' to quit"
+ read a
+ [[ $a == q ]] && exit 1
+ fi
+}
+
+handle_test_result_pass()
+{
+ local test_name=$1; shift
+ local opt_str=$1; shift
+
+ log_test_result "$test_name" "$opt_str" " OK "
+}
+
+handle_test_result_fail()
+{
+ local test_name=$1; shift
+ local opt_str=$1; shift
+
+ log_test_result "$test_name" "$opt_str" FAIL "$retmsg"
+ pause_on_fail
+}
+
+handle_test_result_xfail()
+{
+ local test_name=$1; shift
+ local opt_str=$1; shift
+
+ log_test_result "$test_name" "$opt_str" XFAIL "$retmsg"
+ pause_on_fail
+}
+
+handle_test_result_skip()
+{
+ local test_name=$1; shift
+ local opt_str=$1; shift
+
+ log_test_result "$test_name" "$opt_str" SKIP "$retmsg"
+}
+
+log_test()
+{
+ local test_name=$1
+ local opt_str=$2
+
+ if [[ $# -eq 2 ]]; then
+ opt_str="($opt_str)"
+ fi
+
+ if ((RET == ksft_pass)); then
+ handle_test_result_pass "$test_name" "$opt_str"
+ elif ((RET == ksft_xfail)); then
+ handle_test_result_xfail "$test_name" "$opt_str"
+ elif ((RET == ksft_skip)); then
+ handle_test_result_skip "$test_name" "$opt_str"
+ else
+ handle_test_result_fail "$test_name" "$opt_str"
+ fi
+
+ EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET)
+ return $RET
+}
+
+log_test_skip()
+{
+ RET=$ksft_skip retmsg= log_test "$@"
+}
+
+log_test_xfail()
+{
+ RET=$ksft_xfail retmsg= log_test "$@"
+}
+
+log_info()
+{
+ local msg=$1
+
+ echo "INFO: $msg"
+}
+
+tests_run()
+{
+ local current_test
+
+ for current_test in ${TESTS:-$ALL_TESTS}; do
+ in_defer_scope \
+ $current_test
+ done
+}
+
+# Whether FAILs should be interpreted as XFAILs. Internal.
+FAIL_TO_XFAIL=
+
+check_err()
+{
+ local err=$1
+ local msg=$2
+
+ if ((err)); then
+ if [[ $FAIL_TO_XFAIL = yes ]]; then
+ ret_set_ksft_status $ksft_xfail "$msg"
+ else
+ ret_set_ksft_status $ksft_fail "$msg"
+ fi
+ fi
+}
+
+check_fail()
+{
+ local err=$1
+ local msg=$2
+
+ check_err $((!err)) "$msg"
+}
+
+check_err_fail()
+{
+ local should_fail=$1; shift
+ local err=$1; shift
+ local what=$1; shift
+
+ if ((should_fail)); then
+ check_fail $err "$what succeeded, but should have failed"
+ else
+ check_err $err "$what failed"
+ fi
+}
+
+xfail()
+{
+ FAIL_TO_XFAIL=yes "$@"
+}
+
+xfail_on_slow()
+{
+ if [[ $KSFT_MACHINE_SLOW = yes ]]; then
+ FAIL_TO_XFAIL=yes "$@"
+ else
+ "$@"
+ fi
+}
+
+omit_on_slow()
+{
+ if [[ $KSFT_MACHINE_SLOW != yes ]]; then
+ "$@"
+ fi
+}
+
+xfail_on_veth()
+{
+ local dev=$1; shift
+ local kind
+
+ kind=$(ip -j -d link show dev $dev |
+ jq -r '.[].linkinfo.info_kind')
+ if [[ $kind = veth ]]; then
+ FAIL_TO_XFAIL=yes "$@"
+ else
+ "$@"
+ fi
+}
+
+mac_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+kill_process()
+{
+ local pid=$1; shift
+
+ # Suppress noise from killing the process.
+ { kill $pid && wait $pid; } 2>/dev/null
+}
+
+check_command()
+{
+ local cmd=$1; shift
+
+ if [[ ! -x "$(command -v "$cmd")" ]]; then
+ log_test_skip "$cmd not installed"
+ return $EXIT_STATUS
+ fi
+}
+
+require_command()
+{
+ local cmd=$1; shift
+
+ if ! check_command "$cmd"; then
+ exit $EXIT_STATUS
+ fi
+}
+
+adf_ip_link_add()
+{
+ local name=$1; shift
+
+ ip link add name "$name" "$@" && \
+ defer ip link del dev "$name"
+}
+
+adf_ip_link_set_master()
+{
+ local member=$1; shift
+ local master=$1; shift
+
+ ip link set dev "$member" master "$master" && \
+ defer ip link set dev "$member" nomaster
+}
+
+adf_ip_link_set_addr()
+{
+ local name=$1; shift
+ local addr=$1; shift
+
+ local old_addr=$(mac_get "$name")
+ ip link set dev "$name" address "$addr" && \
+ defer ip link set dev "$name" address "$old_addr"
+}
+
+ip_link_has_flag()
+{
+ local name=$1; shift
+ local flag=$1; shift
+
+ local state=$(ip -j link show "$name" |
+ jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)')
+ [[ $state == true ]]
+}
+
+ip_link_is_up()
+{
+ ip_link_has_flag "$1" UP
+}
+
+adf_ip_link_set_up()
+{
+ local name=$1; shift
+
+ if ! ip_link_is_up "$name"; then
+ ip link set dev "$name" up && \
+ defer ip link set dev "$name" down
+ fi
+}
+
+adf_ip_link_set_down()
+{
+ local name=$1; shift
+
+ if ip_link_is_up "$name"; then
+ ip link set dev "$name" down && \
+ defer ip link set dev "$name" up
+ fi
+}
+
+adf_ip_addr_add()
+{
+ local name=$1; shift
+
+ ip addr add dev "$name" "$@" && \
+ defer ip addr del dev "$name" "$@"
+}
+
+adf_ip_route_add()
+{
+ ip route add "$@" && \
+ defer ip route del "$@"
+}
+
+adf_bridge_vlan_add()
+{
+ bridge vlan add "$@" && \
+ defer bridge vlan del "$@"
+}
+
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+ local protocol="${3}"
+ local pattern
+ local i
+
+ pattern=":$(printf "%04X" "${port}") "
+
+ # for tcp protocol additionally check the socket state
+ [ ${protocol} = "tcp" ] && pattern="${pattern}0A"
+ for i in $(seq 10); do
+ if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
+ /proc/net/"${protocol}"* | grep -q "${pattern}"; then
+ break
+ fi
+ sleep 0.1
+ done
+}
+
+cmd_jq()
+{
+ local cmd=$1
+ local jq_exp=$2
+ local jq_opts=$3
+ local ret
+ local output
+
+ output="$($cmd)"
+ # it the command fails, return error right away
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ return $ret
+ fi
+ output=$(echo $output | jq -r $jq_opts "$jq_exp")
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ return $ret
+ fi
+ echo $output
+ # return success only in case of non-empty output
+ [ ! -z "$output" ]
+}
diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore
index 1ebc6187f421..bbc97d6bf556 100644
--- a/tools/testing/selftests/net/lib/.gitignore
+++ b/tools/testing/selftests/net/lib/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
csum
+xdp_helper
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index 82c3264b115e..5339f56329e1 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -1,15 +1,24 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES)
# Additional include paths needed by kselftest.h
CFLAGS += -I../../
-TEST_FILES := ../../../../../Documentation/netlink/specs
-TEST_FILES += ../../../../net/ynl
+TEST_FILES := \
+ ../../../../net/ynl \
+ ../../../../../Documentation/netlink/specs \
+ ksft_setup_loopback.sh \
+# end of TEST_FILES
-TEST_GEN_FILES += csum
+TEST_GEN_FILES := \
+ $(patsubst %.c,%.o,$(wildcard *.bpf.c)) \
+ csum \
+ xdp_helper \
+# end of TEST_GEN_FILES
-TEST_INCLUDES := $(wildcard py/*.py)
+TEST_INCLUDES := $(wildcard py/*.py sh/*.sh)
include ../../lib.mk
+
+include ../bpf.mk
diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c
index b9f3fc3c3426..27437590eeb5 100644
--- a/tools/testing/selftests/net/lib/csum.c
+++ b/tools/testing/selftests/net/lib/csum.c
@@ -654,10 +654,16 @@ static int recv_verify_packet_ipv4(void *nh, int len)
{
struct iphdr *iph = nh;
uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+ uint16_t ip_len;
if (len < sizeof(*iph) || iph->protocol != proto)
return -1;
+ ip_len = ntohs(iph->tot_len);
+ if (ip_len > len || ip_len < sizeof(*iph))
+ return -1;
+
+ len = ip_len;
iph_addr_p = &iph->saddr;
if (proto == IPPROTO_TCP)
return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
@@ -669,16 +675,20 @@ static int recv_verify_packet_ipv6(void *nh, int len)
{
struct ipv6hdr *ip6h = nh;
uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+ uint16_t payload_len;
if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
return -1;
- iph_addr_p = &ip6h->saddr;
+ payload_len = ntohs(ip6h->payload_len);
+ if (payload_len > len - sizeof(*ip6h))
+ return -1;
+ iph_addr_p = &ip6h->saddr;
if (proto == IPPROTO_TCP)
- return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
+ return recv_verify_packet_tcp(ip6h + 1, payload_len);
else
- return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
+ return recv_verify_packet_udp(ip6h + 1, payload_len);
}
/* return whether auxdata includes TP_STATUS_CSUM_VALID */
diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h
new file mode 100644
index 000000000000..17dc34a612c6
--- /dev/null
+++ b/tools/testing/selftests/net/lib/ksft.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(__NET_KSFT_H__)
+#define __NET_KSFT_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static inline void ksft_ready(void)
+{
+ const char msg[7] = "ready\n";
+ char *env_str;
+ int fd;
+
+ env_str = getenv("KSFT_READY_FD");
+ if (env_str) {
+ fd = atoi(env_str);
+ if (!fd) {
+ fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
+ env_str);
+ return;
+ }
+ } else {
+ fd = STDOUT_FILENO;
+ }
+
+ write(fd, msg, sizeof(msg));
+ if (fd != STDOUT_FILENO)
+ close(fd);
+}
+
+static inline void ksft_wait(void)
+{
+ char *env_str;
+ char byte;
+ int fd;
+
+ env_str = getenv("KSFT_WAIT_FD");
+ if (env_str) {
+ fd = atoi(env_str);
+ if (!fd) {
+ fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
+ env_str);
+ return;
+ }
+ } else {
+ /* Not running in KSFT env, wait for input from STDIN instead */
+ fd = STDIN_FILENO;
+ }
+
+ read(fd, &byte, sizeof(byte));
+ if (fd != STDIN_FILENO)
+ close(fd);
+}
+
+#endif
diff --git a/tools/testing/selftests/net/lib/ksft_setup_loopback.sh b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh
new file mode 100755
index 000000000000..3defbb1919c5
--- /dev/null
+++ b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Setup script for running ksft tests over a real interface in loopback mode.
+# This scripts replaces the historical setup_loopback.sh. It puts
+# a (presumably) real hardware interface into loopback mode, creates macvlan
+# interfaces on top and places them in a network namespace for isolation.
+#
+# NETIF env variable must be exported to indicate the real target device.
+# Note that the test will override NETIF with one of the macvlans, the
+# actual ksft test will only see the macvlans.
+#
+# Example use:
+# export NETIF=eth0
+# ./net/lib/ksft_setup_loopback.sh ./drivers/net/gro.py
+
+if [ -z "$NETIF" ]; then
+ echo "Error: NETIF variable not set"
+ exit 1
+fi
+if ! [ -d "/sys/class/net/$NETIF" ]; then
+ echo "Error: Can't find $NETIF, invalid netdevice"
+ exit 1
+fi
+
+# Save original settings for cleanup
+readonly FLUSH_PATH="/sys/class/net/${NETIF}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${NETIF}/napi_defer_hard_irqs"
+FLUSH_TIMEOUT="$(< "${FLUSH_PATH}")"
+readonly FLUSH_TIMEOUT
+HARD_IRQS="$(< "${IRQ_PATH}")"
+readonly HARD_IRQS
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+readonly SERVER_NS
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+readonly CLIENT_NS
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+# ksft expects addresses to communicate with remote
+export LOCAL_V6=2001:db8:1::1
+export REMOTE_V6=2001:db8:1::2
+
+cleanup() {
+ local exit_code=$?
+
+ echo "Cleaning up..."
+
+ # Remove macvlan interfaces and namespaces
+ ip -netns "${SERVER_NS}" link del dev server 2>/dev/null || true
+ ip netns del "${SERVER_NS}" 2>/dev/null || true
+ ip -netns "${CLIENT_NS}" link del dev client 2>/dev/null || true
+ ip netns del "${CLIENT_NS}" 2>/dev/null || true
+
+ # Disable loopback
+ ethtool -K "${NETIF}" loopback off 2>/dev/null || true
+ sleep 1
+
+ echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+ echo "${HARD_IRQS}" >"${IRQ_PATH}"
+
+ exit $exit_code
+}
+
+trap cleanup EXIT INT TERM
+
+# Enable loopback mode
+echo "Enabling loopback on ${NETIF}..."
+ethtool -K "${NETIF}" loopback on || {
+ echo "Failed to enable loopback mode"
+ exit 1
+}
+# The interface may need time to get carrier back, but selftests
+# will wait for carrier, so no need to wait / sleep here.
+
+# Use timer on host to trigger the network stack
+# Also disable device interrupt to not depend on NIC interrupt
+# Reduce test flakiness caused by unexpected interrupts
+echo 100000 >"${FLUSH_PATH}"
+echo 50 >"${IRQ_PATH}"
+
+# Create server namespace with macvlan
+ip netns add "${SERVER_NS}"
+ip link add link "${NETIF}" dev server address "${SERVER_MAC}" type macvlan
+ip link set dev server netns "${SERVER_NS}"
+ip -netns "${SERVER_NS}" link set dev server up
+ip -netns "${SERVER_NS}" addr add $LOCAL_V6/64 dev server
+ip -netns "${SERVER_NS}" link set dev lo up
+
+# Create client namespace with macvlan
+ip netns add "${CLIENT_NS}"
+ip link add link "${NETIF}" dev client address "${CLIENT_MAC}" type macvlan
+ip link set dev client netns "${CLIENT_NS}"
+ip -netns "${CLIENT_NS}" link set dev client up
+ip -netns "${CLIENT_NS}" addr add $REMOTE_V6/64 dev client
+ip -netns "${CLIENT_NS}" link set dev lo up
+
+echo "Setup complete!"
+echo " Device: ${NETIF}"
+echo " Server NS: ${SERVER_NS}"
+echo " Client NS: ${CLIENT_NS}"
+echo ""
+
+# Setup environment variables for tests
+export NETIF=server
+export REMOTE_TYPE=netns
+export REMOTE_ARGS="${CLIENT_NS}"
+
+# Run the command
+ip netns exec "${SERVER_NS}" "$@"
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index b6d498d125fe..40f9ce307dd1 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -1,8 +1,33 @@
# SPDX-License-Identifier: GPL-2.0
+"""
+Python selftest helpers for netdev.
+"""
+
from .consts import KSRC
-from .ksft import *
-from .netns import NetNS
-from .nsim import *
-from .utils import *
-from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily
+from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \
+ ksft_ne, ksft_true, ksft_not_none, ksft_in, ksft_not_in, ksft_is, \
+ ksft_ge, ksft_gt, ksft_lt, ksft_raises, ksft_busy_wait, \
+ ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit, \
+ ksft_variants, KsftNamedVariant
+from .netns import NetNS, NetNSEnter
+from .nsim import NetdevSim, NetdevSimDev
+from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \
+ bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file
+from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
+from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily
+
+__all__ = ["KSRC",
+ "KsftFailEx", "KsftSkipEx", "KsftXfailEx", "ksft_pr", "ksft_eq",
+ "ksft_ne", "ksft_true", "ksft_not_none", "ksft_in", "ksft_not_in",
+ "ksft_is", "ksft_ge", "ksft_gt", "ksft_lt", "ksft_raises",
+ "ksft_busy_wait", "ktap_result", "ksft_disruptive", "ksft_setup",
+ "ksft_run", "ksft_exit", "ksft_variants", "KsftNamedVariant",
+ "NetNS", "NetNSEnter",
+ "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer",
+ "bpftool", "ip", "ethtool", "bpftrace", "rand_port",
+ "wait_port_listen", "wait_file",
+ "NetdevSim", "NetdevSimDev",
+ "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError",
+ "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily",
+ "RtnlAddrFamily"]
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index f26c20df9db4..531e7fa1b3ea 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -1,15 +1,18 @@
# SPDX-License-Identifier: GPL-2.0
-import builtins
+import functools
import inspect
+import signal
import sys
import time
import traceback
+from collections import namedtuple
from .consts import KSFT_MAIN_NAME
from .utils import global_defer_queue
KSFT_RESULT = None
KSFT_RESULT_ALL = True
+KSFT_DISRUPTIVE = True
class KsftFailEx(Exception):
@@ -24,7 +27,12 @@ class KsftXfailEx(Exception):
pass
+class KsftTerminate(KeyboardInterrupt):
+ pass
+
+
def ksft_pr(*objs, **kwargs):
+ kwargs["flush"] = True
print("#", *objs, **kwargs)
@@ -32,8 +40,18 @@ def _fail(*args):
global KSFT_RESULT
KSFT_RESULT = False
- frame = inspect.stack()[2]
- ksft_pr("At " + frame.filename + " line " + str(frame.lineno) + ":")
+ stack = inspect.stack()
+ started = False
+ for frame in reversed(stack[2:]):
+ # Start printing from the test case function
+ if not started:
+ if frame.function == 'ksft_run':
+ started = True
+ continue
+
+ ksft_pr("Check| At " + frame.filename + ", line " + str(frame.lineno) +
+ ", in " + frame.function + ":")
+ ksft_pr("Check| " + frame.code_context[0].strip())
ksft_pr(*args)
@@ -43,21 +61,47 @@ def ksft_eq(a, b, comment=""):
_fail("Check failed", a, "!=", b, comment)
+def ksft_ne(a, b, comment=""):
+ global KSFT_RESULT
+ if a == b:
+ _fail("Check failed", a, "==", b, comment)
+
+
def ksft_true(a, comment=""):
if not a:
_fail("Check failed", a, "does not eval to True", comment)
+def ksft_not_none(a, comment=""):
+ if a is None:
+ _fail("Check failed", a, "is None", comment)
+
+
def ksft_in(a, b, comment=""):
if a not in b:
_fail("Check failed", a, "not in", b, comment)
+def ksft_not_in(a, b, comment=""):
+ if a in b:
+ _fail("Check failed", a, "in", b, comment)
+
+
+def ksft_is(a, b, comment=""):
+ if a is not b:
+ _fail("Check failed", a, "is not", b, comment)
+
+
def ksft_ge(a, b, comment=""):
if a < b:
_fail("Check failed", a, "<", b, comment)
+def ksft_gt(a, b, comment=""):
+ if a <= b:
+ _fail("Check failed", a, "<=", b, comment)
+
+
def ksft_lt(a, b, comment=""):
if a >= b:
_fail("Check failed", a, ">=", b, comment)
@@ -92,7 +136,7 @@ def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""):
time.sleep(sleep)
-def ktap_result(ok, cnt=1, case="", comment=""):
+def ktap_result(ok, cnt=1, case_name="", comment=""):
global KSFT_RESULT_ALL
KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok
@@ -102,11 +146,11 @@ def ktap_result(ok, cnt=1, case="", comment=""):
res += "ok "
res += str(cnt) + " "
res += KSFT_MAIN_NAME
- if case:
- res += "." + str(case.__name__)
+ if case_name:
+ res += "." + case_name
if comment:
res += " # " + comment
- print(res)
+ print(res, flush=True)
def ksft_flush_defer():
@@ -119,7 +163,7 @@ def ksft_flush_defer():
entry = global_defer_queue.pop()
try:
entry.exec_only()
- except:
+ except Exception:
ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!")
tb = traceback.format_exc()
for line in tb.strip().split('\n'):
@@ -127,9 +171,102 @@ def ksft_flush_defer():
KSFT_RESULT = False
-def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+KsftCaseFunction = namedtuple("KsftCaseFunction",
+ ['name', 'original_func', 'variants'])
+
+
+def ksft_disruptive(func):
+ """
+ Decorator that marks the test as disruptive (e.g. the test
+ that can down the interface). Disruptive tests can be skipped
+ by passing DISRUPTIVE=False environment variable.
+ """
+
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ if not KSFT_DISRUPTIVE:
+ raise KsftSkipEx("marked as disruptive")
+ return func(*args, **kwargs)
+ return wrapper
+
+
+class KsftNamedVariant:
+ """ Named string name + argument list tuple for @ksft_variants """
+
+ def __init__(self, name, *params):
+ self.params = params
+ self.name = name or "_".join([str(x) for x in self.params])
+
+
+def ksft_variants(params):
+ """
+ Decorator defining the sets of inputs for a test.
+ The parameters will be included in the name of the resulting sub-case.
+ Parameters can be either single object, tuple or a KsftNamedVariant.
+ The argument can be a list or a generator.
+
+ Example:
+
+ @ksft_variants([
+ (1, "a"),
+ (2, "b"),
+ KsftNamedVariant("three", 3, "c"),
+ ])
+ def my_case(cfg, a, b):
+ pass # ...
+
+ ksft_run(cases=[my_case], args=(cfg, ))
+
+ Will generate cases:
+ my_case.1_a
+ my_case.2_b
+ my_case.three
+ """
+
+ return lambda func: KsftCaseFunction(func.__name__, func, params)
+
+
+def ksft_setup(env):
+ """
+ Setup test framework global state from the environment.
+ """
+
+ def get_bool(env, name):
+ value = env.get(name, "").lower()
+ if value in ["yes", "true"]:
+ return True
+ if value in ["no", "false"]:
+ return False
+ try:
+ return bool(int(value))
+ except Exception:
+ raise Exception(f"failed to parse {name}")
+
+ if "DISRUPTIVE" in env:
+ global KSFT_DISRUPTIVE
+ KSFT_DISRUPTIVE = get_bool(env, "DISRUPTIVE")
+
+ return env
+
+
+def _ksft_intr(signum, frame):
+ # ksft runner.sh sends 2 SIGTERMs in a row on a timeout
+ # if we don't ignore the second one it will stop us from handling cleanup
+ global term_cnt
+ term_cnt += 1
+ if term_cnt == 1:
+ raise KsftTerminate()
+ else:
+ ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...")
+
+
+def _ksft_generate_test_cases(cases, globs, case_pfx, args):
+ """Generate a flat list of (func, args, name) tuples"""
+
cases = cases or []
+ test_cases = []
+ # If using the globs method find all relevant functions
if globs and case_pfx:
for key, value in globs.items():
if not callable(value):
@@ -139,22 +276,47 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cases.append(value)
break
+ for func in cases:
+ if isinstance(func, KsftCaseFunction):
+ # Parametrized test - create case for each param
+ for param in func.variants:
+ if not isinstance(param, KsftNamedVariant):
+ if not isinstance(param, tuple):
+ param = (param, )
+ param = KsftNamedVariant(None, *param)
+
+ test_cases.append((func.original_func,
+ (*args, *param.params),
+ func.name + "." + param.name))
+ else:
+ test_cases.append((func, args, func.__name__))
+
+ return test_cases
+
+
+def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+ test_cases = _ksft_generate_test_cases(cases, globs, case_pfx, args)
+
+ global term_cnt
+ term_cnt = 0
+ prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr)
+
totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
- print("KTAP version 1")
- print("1.." + str(len(cases)))
+ print("TAP version 13", flush=True)
+ print("1.." + str(len(test_cases)), flush=True)
global KSFT_RESULT
cnt = 0
stop = False
- for case in cases:
+ for func, args, name in test_cases:
KSFT_RESULT = True
cnt += 1
comment = ""
cnt_key = ""
try:
- case(*args)
+ func(*args)
except KsftSkipEx as e:
comment = "SKIP " + str(e)
cnt_key = 'skip'
@@ -167,21 +329,37 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
for line in tb.strip().split('\n'):
ksft_pr("Exception|", line)
if stop:
- ksft_pr("Stopping tests due to KeyboardInterrupt.")
+ ksft_pr(f"Stopping tests due to {type(e).__name__}.")
KSFT_RESULT = False
cnt_key = 'fail'
- ksft_flush_defer()
+ try:
+ ksft_flush_defer()
+ except BaseException as e:
+ tb = traceback.format_exc()
+ for line in tb.strip().split('\n'):
+ ksft_pr("Exception|", line)
+ if isinstance(e, KeyboardInterrupt):
+ ksft_pr()
+ ksft_pr("WARN: defer() interrupted, cleanup may be incomplete.")
+ ksft_pr(" Attempting to finish cleanup before exiting.")
+ ksft_pr(" Interrupt again to exit immediately.")
+ ksft_pr()
+ stop = True
+ # Flush was interrupted, try to finish the job best we can
+ ksft_flush_defer()
if not cnt_key:
cnt_key = 'pass' if KSFT_RESULT else 'fail'
- ktap_result(KSFT_RESULT, cnt, case, comment=comment)
+ ktap_result(KSFT_RESULT, cnt, name, comment=comment)
totals[cnt_key] += 1
if stop:
break
+ signal.signal(signal.SIGTERM, prev_sigterm)
+
print(
f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
)
diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py
index ecff85f9074f..8e9317044eef 100644
--- a/tools/testing/selftests/net/lib/py/netns.py
+++ b/tools/testing/selftests/net/lib/py/netns.py
@@ -1,9 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
from .utils import ip
+import ctypes
import random
import string
+libc = ctypes.cdll.LoadLibrary('libc.so.6')
+
class NetNS:
def __init__(self, name=None):
@@ -29,3 +32,18 @@ class NetNS:
def __repr__(self):
return f"NetNS({self.name})"
+
+
+class NetNSEnter:
+ def __init__(self, ns_name):
+ self.ns_path = f"/run/netns/{ns_name}"
+
+ def __enter__(self):
+ self.saved = open("/proc/thread-self/ns/net")
+ with open(self.ns_path) as ns_file:
+ libc.setns(ns_file.fileno(), 0)
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ libc.setns(self.saved.fileno(), 0)
+ self.saved.close()
diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py
index f571a8b3139b..7c640ed64c0b 100644
--- a/tools/testing/selftests/net/lib/py/nsim.py
+++ b/tools/testing/selftests/net/lib/py/nsim.py
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
+import errno
import json
import os
import random
@@ -26,7 +27,7 @@ class NetdevSim:
self.port_index = port_index
self.ns = ns
self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
- ret = ip("-j link show dev %s" % ifname, ns=ns)
+ ret = ip("-d -j link show dev %s" % ifname, ns=ns)
self.dev = json.loads(ret.stdout)[0]
self.ifindex = self.dev["ifindex"]
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 72590c3f90f1..106ee1f2df86 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -1,33 +1,80 @@
# SPDX-License-Identifier: GPL-2.0
-import errno
import json as _json
-import random
+import os
import re
+import select
import socket
import subprocess
import time
class CmdExitFailure(Exception):
- pass
+ def __init__(self, msg, cmd_obj):
+ super().__init__(msg)
+ self.cmd = cmd_obj
+
+
+def fd_read_timeout(fd, timeout):
+ rlist, _, _ = select.select([fd], [], [], timeout)
+ if rlist:
+ return os.read(fd, 1024)
+ raise TimeoutError("Timeout waiting for fd read")
class cmd:
- def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5):
+ """
+ Execute a command on local or remote host.
+
+ @shell defaults to false, and class will try to split @comm into a list
+ if it's a string with spaces.
+
+ Use bkg() instead to run a command in the background.
+ """
+ def __init__(self, comm, shell=None, fail=True, ns=None, background=False,
+ host=None, timeout=5, ksft_ready=None, ksft_wait=None):
if ns:
comm = f'ip netns exec {ns} ' + comm
self.stdout = None
self.stderr = None
self.ret = None
+ self.ksft_term_fd = None
self.comm = comm
if host:
self.proc = host.cmd(comm)
else:
+ # If user doesn't explicitly request shell try to avoid it.
+ if shell is None and isinstance(comm, str) and ' ' in comm:
+ comm = comm.split()
+
+ # ksft_wait lets us wait for the background process to fully start,
+ # we pass an FD to the child process, and wait for it to write back.
+ # Similarly term_fd tells child it's time to exit.
+ pass_fds = []
+ env = os.environ.copy()
+ if ksft_wait is not None:
+ wait_fd, self.ksft_term_fd = os.pipe()
+ pass_fds.append(wait_fd)
+ env["KSFT_WAIT_FD"] = str(wait_fd)
+ ksft_ready = True # ksft_wait implies ready
+ if ksft_ready is not None:
+ rfd, ready_fd = os.pipe()
+ pass_fds.append(ready_fd)
+ env["KSFT_READY_FD"] = str(ready_fd)
+
self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
+ stderr=subprocess.PIPE, pass_fds=pass_fds,
+ env=env)
+ if ksft_wait is not None:
+ os.close(wait_fd)
+ if ksft_ready is not None:
+ os.close(ready_fd)
+ msg = fd_read_timeout(rfd, ksft_wait)
+ os.close(rfd)
+ if not msg:
+ raise Exception("Did not receive ready message")
if not background:
self.process(terminate=False, fail=fail, timeout=timeout)
@@ -35,6 +82,8 @@ class cmd:
if fail is None:
fail = not terminate
+ if self.ksft_term_fd:
+ os.write(self.ksft_term_fd, b"1")
if terminate:
self.proc.terminate()
stdout, stderr = self.proc.communicate(timeout)
@@ -48,22 +97,48 @@ class cmd:
if len(stderr) > 0 and stderr[-1] == "\n":
stderr = stderr[:-1]
raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" %
- (self.proc.args, stdout, stderr))
+ (self.proc.args, stdout, stderr), self)
class bkg(cmd):
- def __init__(self, comm, shell=True, fail=None, ns=None, host=None,
- exit_wait=False):
+ """
+ Run a command in the background.
+
+ Examples usage:
+
+ Run a command on remote host, and wait for it to finish.
+ This is usually paired with wait_port_listen() to make sure
+ the command has initialized:
+
+ with bkg("socat ...", exit_wait=True, host=cfg.remote) as nc:
+ ...
+
+ Run a command and expect it to let us know that it's ready
+ by writing to a special file descriptor passed via KSFT_READY_FD.
+ Command will be terminated when we exit the context manager:
+
+ with bkg("my_binary", ksft_wait=5):
+ """
+ def __init__(self, comm, shell=None, fail=None, ns=None, host=None,
+ exit_wait=False, ksft_ready=None, ksft_wait=None):
super().__init__(comm, background=True,
- shell=shell, fail=fail, ns=ns, host=host)
- self.terminate = not exit_wait
+ shell=shell, fail=fail, ns=ns, host=host,
+ ksft_ready=ksft_ready, ksft_wait=ksft_wait)
+ self.terminate = not exit_wait and not ksft_wait
+ self._exit_wait = exit_wait
self.check_fail = fail
+ if shell and self.terminate:
+ print("# Warning: combining shell and terminate is risky!")
+ print("# SIGTERM may not reach the child on zsh/ksh!")
+
def __enter__(self):
return self
def __exit__(self, ex_type, ex_value, ex_tb):
- return self.process(terminate=self.terminate, fail=self.check_fail)
+ # Force termination on exception
+ terminate = self.terminate or (self._exit_wait and ex_type)
+ return self.process(terminate=terminate, fail=self.check_fail)
global_defer_queue = []
@@ -71,8 +146,6 @@ global_defer_queue = []
class defer:
def __init__(self, func, *args, **kwargs):
- global global_defer_queue
-
if not callable(func):
raise Exception("defer created with un-callable object, did you call the function instead of passing its name?")
@@ -111,6 +184,10 @@ def tool(name, args, json=None, ns=None, host=None):
return cmd_obj
+def bpftool(args, json=None, ns=None, host=None):
+ return tool('bpftool', args, json=json, ns=ns, host=host)
+
+
def ip(args, json=None, ns=None, host=None):
if ns:
args = f'-netns {ns} ' + args
@@ -121,20 +198,48 @@ def ethtool(args, json=None, ns=None, host=None):
return tool('ethtool', args, json=json, ns=ns, host=host)
-def rand_port():
+def bpftrace(expr, json=None, ns=None, host=None, timeout=None):
"""
- Get a random unprivileged port, try to make sure it's not already used.
+ Run bpftrace and return map data (if json=True).
+ The output of bpftrace is inconvenient, so the helper converts
+ to a dict indexed by map name, e.g.:
+ {
+ "@": { ... },
+ "@map2": { ... },
+ }
"""
- for _ in range(1000):
- port = random.randint(10000, 65535)
- try:
- with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
- s.bind(("", port))
- return port
- except OSError as e:
- if e.errno != errno.EADDRINUSE:
- raise
- raise Exception("Can't find any free unprivileged port")
+ cmd_arr = ['bpftrace']
+ # Throw in --quiet if json, otherwise the output has two objects
+ if json:
+ cmd_arr += ['-f', 'json', '-q']
+ if timeout:
+ expr += ' interval:s:' + str(timeout) + ' { exit(); }'
+ cmd_arr += ['-e', expr]
+ cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False)
+ if json:
+ # bpftrace prints objects as lines
+ ret = {}
+ for l in cmd_obj.stdout.split('\n'):
+ if not l.strip():
+ continue
+ one = _json.loads(l)
+ if one.get('type') != 'map':
+ continue
+ for k, v in one["data"].items():
+ if k.startswith('@'):
+ k = k.lstrip('@')
+ ret[k] = v
+ return ret
+ return cmd_obj
+
+
+def rand_port(stype=socket.SOCK_STREAM):
+ """
+ Get a random unprivileged port.
+ """
+ with socket.socket(socket.AF_INET6, stype) as s:
+ s.bind(("", 0))
+ return s.getsockname()[1]
def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5):
@@ -153,3 +258,21 @@ def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadlin
if time.monotonic() > end:
raise Exception("Waiting for port listen timed out")
time.sleep(sleep)
+
+
+def wait_file(fname, test_fn, sleep=0.005, deadline=5, encoding='utf-8'):
+ """
+ Wait for file contents on the local system to satisfy a condition.
+ test_fn() should take one argument (file contents) and return whether
+ condition is met.
+ """
+ end = time.monotonic() + deadline
+
+ with open(fname, "r", encoding=encoding) as fp:
+ while True:
+ if test_fn(fp.read()):
+ break
+ fp.seek(0)
+ if time.monotonic() > end:
+ raise TimeoutError("Wait for file contents failed", fname)
+ time.sleep(sleep)
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 1ace58370c06..32c223e93b2c 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -13,14 +13,14 @@ try:
SPEC_PATH = KSFT_DIR / "net/lib/specs"
sys.path.append(tools_full_path.as_posix())
- from net.lib.ynl.lib import YnlFamily, NlError
+ from net.lib.ynl.pyynl.lib import YnlFamily, NlError
else:
# Running in tree
tools_full_path = KSRC / "tools"
SPEC_PATH = KSRC / "Documentation/netlink/specs"
sys.path.append(tools_full_path.as_posix())
- from net.ynl.lib import YnlFamily, NlError
+ from net.ynl.pyynl.lib import YnlFamily, NlError
except ModuleNotFoundError as e:
ksft_pr("Failed importing `ynl` library from kernel sources")
ksft_pr(str(e))
@@ -32,18 +32,37 @@ except ModuleNotFoundError as e:
# Set schema='' to avoid jsonschema validation, it's slow
#
class EthtoolFamily(YnlFamily):
- def __init__(self):
+ def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(),
- schema='')
+ schema='', recv_size=recv_size)
class RtnlFamily(YnlFamily):
- def __init__(self):
- super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
- schema='')
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
+class RtnlAddrFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
class NetdevFamily(YnlFamily):
- def __init__(self):
+ def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(),
- schema='')
+ schema='', recv_size=recv_size)
+
+class NetshaperFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
+
+class DevlinkFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
+
+class PSPFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('psp.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
diff --git a/tools/testing/selftests/net/lib/sh/defer.sh b/tools/testing/selftests/net/lib/sh/defer.sh
new file mode 100644
index 000000000000..47ab78c4d465
--- /dev/null
+++ b/tools/testing/selftests/net/lib/sh/defer.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Whether to pause and allow debugging when an executed deferred command has a
+# non-zero exit code.
+: "${DEFER_PAUSE_ON_FAIL:=no}"
+
+# map[(scope_id,track,cleanup_id) -> cleanup_command]
+# track={d=default | p=priority}
+declare -A __DEFER__JOBS
+
+# map[(scope_id,track) -> # cleanup_commands]
+declare -A __DEFER__NJOBS
+
+# scope_id of the topmost scope.
+__DEFER__SCOPE_ID=0
+
+__defer__ndefer_key()
+{
+ local track=$1; shift
+
+ echo $__DEFER__SCOPE_ID,$track
+}
+
+__defer__defer_key()
+{
+ local track=$1; shift
+ local defer_ix=$1; shift
+
+ echo $__DEFER__SCOPE_ID,$track,$defer_ix
+}
+
+__defer__ndefers()
+{
+ local track=$1; shift
+
+ echo ${__DEFER__NJOBS[$(__defer__ndefer_key $track)]}
+}
+
+__defer__run()
+{
+ local track=$1; shift
+ local defer_ix=$1; shift
+ local defer_key=$(__defer__defer_key $track $defer_ix)
+ local ret
+
+ eval ${__DEFER__JOBS[$defer_key]}
+ ret=$?
+
+ if [[ "$DEFER_PAUSE_ON_FAIL" == yes && "$ret" -ne 0 ]]; then
+ echo "Deferred command (track $track index $defer_ix):"
+ echo " ${__DEFER__JOBS[$defer_key]}"
+ echo "... ended with an exit status of $ret"
+ echo "Hit enter to continue, 'q' to quit"
+ read a
+ [[ "$a" == q ]] && exit 1
+ fi
+
+ unset __DEFER__JOBS[$defer_key]
+}
+
+__defer__schedule()
+{
+ local track=$1; shift
+ local ndefers=$(__defer__ndefers $track)
+ local ndefers_key=$(__defer__ndefer_key $track)
+ local defer_key=$(__defer__defer_key $track $ndefers)
+ local defer="${@@Q}"
+
+ __DEFER__JOBS[$defer_key]="$defer"
+ __DEFER__NJOBS[$ndefers_key]=$((ndefers + 1))
+}
+
+__defer__scope_wipe()
+{
+ __DEFER__NJOBS[$(__defer__ndefer_key d)]=0
+ __DEFER__NJOBS[$(__defer__ndefer_key p)]=0
+}
+
+defer_scope_push()
+{
+ ((__DEFER__SCOPE_ID++))
+ __defer__scope_wipe
+}
+
+defer_scope_pop()
+{
+ local defer_ix
+
+ for ((defer_ix=$(__defer__ndefers p); defer_ix-->0; )); do
+ __defer__run p $defer_ix
+ done
+
+ for ((defer_ix=$(__defer__ndefers d); defer_ix-->0; )); do
+ __defer__run d $defer_ix
+ done
+
+ __defer__scope_wipe
+ ((__DEFER__SCOPE_ID--))
+}
+
+defer()
+{
+ __defer__schedule d "$@"
+}
+
+defer_prio()
+{
+ __defer__schedule p "$@"
+}
+
+defer_scopes_cleanup()
+{
+ while ((__DEFER__SCOPE_ID >= 0)); do
+ defer_scope_pop
+ done
+}
+
+in_defer_scope()
+{
+ local ret
+
+ defer_scope_push
+ "$@"
+ ret=$?
+ defer_scope_pop
+
+ return $ret
+}
+
+__defer__scope_wipe
diff --git a/tools/testing/selftests/net/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
index d988b2e0cee8..e73fab3edd9f 100644
--- a/tools/testing/selftests/net/xdp_dummy.bpf.c
+++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
@@ -10,4 +10,10 @@ int xdp_dummy_prog(struct xdp_md *ctx)
return XDP_PASS;
}
+SEC("xdp.frags")
+int xdp_dummy_prog_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/lib/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c
new file mode 100644
index 000000000000..eb025a9f35b1
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_helper.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <linux/if_xdp.h>
+#include <linux/if_link.h>
+#include <net/if.h>
+#include <inttypes.h>
+
+#include "ksft.h"
+
+#define UMEM_SZ (1U << 16)
+#define NUM_DESC (UMEM_SZ / 2048)
+
+
+static void print_usage(const char *bin)
+{
+ fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n"
+ "where:\n\t-z: force zerocopy mode", bin);
+}
+
+/* this is a simple helper program that creates an XDP socket and does the
+ * minimum necessary to get bind() to succeed.
+ *
+ * this test program is not intended to actually process packets, but could be
+ * extended in the future if that is actually needed.
+ *
+ * it is used by queues.py to ensure the xsk netlinux attribute is set
+ * correctly.
+ */
+int main(int argc, char **argv)
+{
+ struct xdp_umem_reg umem_reg = { 0 };
+ struct sockaddr_xdp sxdp = { 0 };
+ int num_desc = NUM_DESC;
+ void *umem_area;
+ int retry = 0;
+ int ifindex;
+ int sock_fd;
+ int queue;
+
+ if (argc != 3 && argc != 4) {
+ print_usage(argv[0]);
+ return 1;
+ }
+
+ sock_fd = socket(AF_XDP, SOCK_RAW, 0);
+ if (sock_fd < 0) {
+ perror("socket creation failed");
+ /* if the kernel doesn't support AF_XDP, let the test program
+ * know with -1. All other error paths return 1.
+ */
+ if (errno == EAFNOSUPPORT)
+ return -1;
+ return 1;
+ }
+
+ /* "Probing mode", just checking if AF_XDP sockets are supported */
+ if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-")) {
+ printf("AF_XDP support detected\n");
+ close(sock_fd);
+ return 0;
+ }
+
+ ifindex = atoi(argv[1]);
+ queue = atoi(argv[2]);
+
+ umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE |
+ MAP_ANONYMOUS, -1, 0);
+ if (umem_area == MAP_FAILED) {
+ perror("mmap failed");
+ return 1;
+ }
+
+ umem_reg.addr = (uintptr_t)umem_area;
+ umem_reg.len = UMEM_SZ;
+ umem_reg.chunk_size = 2048;
+ umem_reg.headroom = 0;
+
+ setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg,
+ sizeof(umem_reg));
+ setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc,
+ sizeof(num_desc));
+ setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc,
+ sizeof(num_desc));
+ setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc));
+
+ sxdp.sxdp_family = AF_XDP;
+ sxdp.sxdp_ifindex = ifindex;
+ sxdp.sxdp_queue_id = queue;
+ sxdp.sxdp_flags = 0;
+
+ if (argc > 3) {
+ if (!strcmp(argv[3], "-z")) {
+ sxdp.sxdp_flags = XDP_ZEROCOPY;
+ } else {
+ print_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ while (1) {
+ if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0)
+ break;
+
+ if (errno == EBUSY && retry < 3) {
+ retry++;
+ sleep(1);
+ continue;
+ } else {
+ perror("bind failed");
+ munmap(umem_area, UMEM_SZ);
+ close(sock_fd);
+ return 1;
+ }
+ }
+
+ ksft_ready();
+ ksft_wait();
+
+ /* parent program will write a byte to stdin when its ready for this
+ * helper to exit
+ */
+
+ close(sock_fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c
new file mode 100644
index 000000000000..64f05229ab24
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c
@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_ADJST_OFFSET 256
+#define MAX_PAYLOAD_LEN 5000
+#define MAX_HDR_LEN 64
+
+extern int bpf_xdp_pull_data(struct xdp_md *xdp, __u32 len) __ksym __weak;
+
+enum {
+ XDP_MODE = 0,
+ XDP_PORT = 1,
+ XDP_ADJST_OFFSET = 2,
+ XDP_ADJST_TAG = 3,
+} xdp_map_setup_keys;
+
+enum {
+ XDP_MODE_PASS = 0,
+ XDP_MODE_DROP = 1,
+ XDP_MODE_TX = 2,
+ XDP_MODE_TAIL_ADJST = 3,
+ XDP_MODE_HEAD_ADJST = 4,
+} xdp_map_modes;
+
+enum {
+ STATS_RX = 0,
+ STATS_PASS = 1,
+ STATS_DROP = 2,
+ STATS_TX = 3,
+ STATS_ABORT = 4,
+} xdp_stats;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __s32);
+} map_xdp_setup SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __u64);
+} map_xdp_stats SEC(".maps");
+
+static __u32 min(__u32 a, __u32 b)
+{
+ return a < b ? a : b;
+}
+
+static void record_stats(struct xdp_md *ctx, __u32 stat_type)
+{
+ __u64 *count;
+
+ count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type);
+
+ if (count)
+ __sync_fetch_and_add(count, 1);
+}
+
+static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+ void *data, *data_end;
+ struct ethhdr *eth;
+ int err;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth));
+ if (err)
+ return NULL;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = eth = (void *)(long)ctx->data;
+
+ if (data + sizeof(*eth) > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) +
+ sizeof(*udph));
+ if (err)
+ return NULL;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ iph = data + sizeof(*eth);
+
+ if (iph + 1 > (struct iphdr *)data_end ||
+ iph->protocol != IPPROTO_UDP)
+ return NULL;
+
+ udph = data + sizeof(*iph) + sizeof(*eth);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) +
+ sizeof(*udph));
+ if (err)
+ return NULL;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ ipv6h = data + sizeof(*eth);
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+ ipv6h->nexthdr != IPPROTO_UDP)
+ return NULL;
+
+ udph = data + sizeof(*ipv6h) + sizeof(*eth);
+ } else {
+ return NULL;
+ }
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ if (udph->dest != bpf_htons(port))
+ return NULL;
+
+ record_stats(ctx, STATS_RX);
+
+ return udph;
+}
+
+static int xdp_mode_pass(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_PASS);
+
+ return XDP_PASS;
+}
+
+static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_DROP);
+
+ return XDP_DROP;
+}
+
+static void swap_machdr(void *data)
+{
+ struct ethhdr *eth = data;
+ __u8 tmp_mac[ETH_ALEN];
+
+ __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN);
+ __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN);
+}
+
+static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+ void *data, *data_end;
+ struct ethhdr *eth;
+ int err;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth));
+ if (err)
+ return XDP_PASS;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = eth = (void *)(long)ctx->data;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_PASS;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph;
+ __be32 tmp_ip;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) +
+ sizeof(*udph));
+ if (err)
+ return XDP_PASS;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ iph = data + sizeof(*eth);
+
+ if (iph + 1 > (struct iphdr *)data_end ||
+ iph->protocol != IPPROTO_UDP)
+ return XDP_PASS;
+
+ udph = data + sizeof(*iph) + sizeof(*eth);
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return XDP_PASS;
+ if (udph->dest != bpf_htons(port))
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_RX);
+ eth = data;
+ swap_machdr((void *)eth);
+
+ tmp_ip = iph->saddr;
+ iph->saddr = iph->daddr;
+ iph->daddr = tmp_ip;
+
+ record_stats(ctx, STATS_TX);
+
+ return XDP_TX;
+
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct in6_addr tmp_ipv6;
+ struct ipv6hdr *ipv6h;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) +
+ sizeof(*udph));
+ if (err)
+ return XDP_PASS;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ ipv6h = data + sizeof(*eth);
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+ ipv6h->nexthdr != IPPROTO_UDP)
+ return XDP_PASS;
+
+ udph = data + sizeof(*ipv6h) + sizeof(*eth);
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return XDP_PASS;
+ if (udph->dest != bpf_htons(port))
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_RX);
+ eth = data;
+ swap_machdr((void *)eth);
+
+ __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6));
+ __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr,
+ sizeof(tmp_ipv6));
+ __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6));
+
+ record_stats(ctx, STATS_TX);
+
+ return XDP_TX;
+ }
+
+ return XDP_PASS;
+}
+
+static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+ __u32 len, len_new;
+
+ if (data + sizeof(*eth) > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+ __u16 total_len;
+
+ if (iph + 1 > (struct iphdr *)data_end)
+ return NULL;
+
+ iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset);
+
+ udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
+ if (!udph || udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ __u16 payload_len;
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+ if (!udph || udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ *udp_csum = ~((__u32)udph->check);
+
+ len = ipv6h->payload_len;
+ len_new = bpf_htons(bpf_ntohs(len) + offset);
+ ipv6h->payload_len = len_new;
+
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+
+ len = udph->len;
+ len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+ } else {
+ return NULL;
+ }
+
+ udph->len = len_new;
+
+ return udph;
+}
+
+static __u16 csum_fold_helper(__u32 csum)
+{
+ return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff;
+}
+
+static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset,
+ unsigned long hdr_len)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ __u32 buff_pos, udp_csum = 0;
+ struct udphdr *udph = NULL;
+ __u32 buff_len;
+
+ udph = update_pkt(ctx, 0 - offset, &udp_csum);
+ if (!udph)
+ return -1;
+
+ buff_len = bpf_xdp_get_buff_len(ctx);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ /* Make sure we have enough data to avoid eating the header */
+ if (buff_len - offset < hdr_len)
+ return -1;
+
+ buff_pos = buff_len - offset;
+ if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ __u32 buff_pos, udp_csum = 0;
+ __u32 buff_len, hdr_len, key;
+ struct udphdr *udph;
+ __s32 *val;
+ __u8 tag;
+
+ /* Proceed to update the packet headers before attempting to adjuste
+ * the tail. Once the tail is adjusted we lose access to the offset
+ * amount of data at the end of the packet which is crucial to update
+ * the checksum.
+ * Since any failure beyond this would abort the packet, we should
+ * not worry about passing a packet up the stack with wrong headers
+ */
+ udph = update_pkt(ctx, offset, &udp_csum);
+ if (!udph)
+ return -1;
+
+ key = XDP_ADJST_TAG;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return -1;
+
+ tag = (__u8)(*val);
+
+ for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+ __builtin_memcpy(&tmp_buff[i], &tag, 1);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ buff_len = bpf_xdp_get_buff_len(ctx);
+
+ if (bpf_xdp_adjust_tail(ctx, offset) < 0) {
+ bpf_printk("Failed to adjust tail\n");
+ return -1;
+ }
+
+ if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+ __s32 *adjust_offset, *val;
+ unsigned long hdr_len;
+ void *offset_ptr;
+ __u32 key;
+ __u8 tag;
+ int ret;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ hdr_len = (void *)udph - (void *)(long)ctx->data +
+ sizeof(struct udphdr);
+ key = XDP_ADJST_OFFSET;
+ adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!adjust_offset)
+ return XDP_PASS;
+
+ if (*adjust_offset < 0)
+ ret = xdp_adjst_tail_shrnk_data(ctx,
+ (__u16)(0 - *adjust_offset),
+ hdr_len);
+ else
+ ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset));
+ if (ret)
+ goto abort_pkt;
+
+ record_stats(ctx, STATS_PASS);
+ return XDP_PASS;
+
+abort_pkt:
+ record_stats(ctx, STATS_ABORT);
+ return XDP_ABORTED;
+}
+
+static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len,
+ __u32 offset)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ struct udphdr *udph;
+ void *offset_ptr;
+ __u32 udp_csum = 0;
+
+ /* Update the length information in the IP and UDP headers before
+ * adjusting the headroom. This simplifies accessing the relevant
+ * fields in the IP and UDP headers for fragmented packets. Any
+ * failure beyond this point will result in the packet being aborted,
+ * so we don't need to worry about incorrect length information for
+ * passed packets.
+ */
+ udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum);
+ if (!udph)
+ return -1;
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0)
+ return -1;
+
+ if (bpf_xdp_adjust_head(ctx, offset) < 0)
+ return -1;
+
+ if (offset > MAX_ADJST_OFFSET)
+ return -1;
+
+ if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+ return -1;
+
+ /* Added here to handle clang complain about negative value */
+ hdr_len = hdr_len & 0xff;
+
+ if (hdr_len == 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len,
+ __u32 offset)
+{
+ char hdr_buff[MAX_HDR_LEN];
+ char data_buff[MAX_ADJST_OFFSET];
+ void *offset_ptr;
+ __s32 *val;
+ __u32 key;
+ __u8 tag;
+ __u32 udp_csum = 0;
+ struct udphdr *udph;
+
+ udph = update_pkt(ctx, (__s16)(offset), &udp_csum);
+ if (!udph)
+ return -1;
+
+ key = XDP_ADJST_TAG;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return -1;
+
+ tag = (__u8)(*val);
+ for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+ __builtin_memcpy(&data_buff[i], &tag, 1);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+ return -1;
+
+ /* Added here to handle clang complain about negative value */
+ hdr_len = hdr_len & 0xff;
+
+ if (hdr_len == 0)
+ return -1;
+
+ if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+ return -1;
+
+ if (offset > MAX_ADJST_OFFSET)
+ return -1;
+
+ if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_head_adjst(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph_ptr = NULL;
+ __u32 key, size, hdr_len;
+ __s32 *val;
+ int res;
+
+ /* Filter packets based on UDP port */
+ udph_ptr = filter_udphdr(ctx, port);
+ if (!udph_ptr)
+ return XDP_PASS;
+
+ hdr_len = (void *)udph_ptr - (void *)(long)ctx->data +
+ sizeof(struct udphdr);
+
+ key = XDP_ADJST_OFFSET;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return XDP_PASS;
+
+ switch (*val) {
+ case -16:
+ case 16:
+ size = 16;
+ break;
+ case -32:
+ case 32:
+ size = 32;
+ break;
+ case -64:
+ case 64:
+ size = 64;
+ break;
+ case -128:
+ case 128:
+ size = 128;
+ break;
+ case -256:
+ case 256:
+ size = 256;
+ break;
+ default:
+ bpf_printk("Invalid adjustment offset: %d\n", *val);
+ goto abort;
+ }
+
+ if (*val < 0)
+ res = xdp_adjst_head_grow_data(ctx, hdr_len, size);
+ else
+ res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size);
+
+ if (res)
+ goto abort;
+
+ record_stats(ctx, STATS_PASS);
+ return XDP_PASS;
+
+abort:
+ record_stats(ctx, STATS_ABORT);
+ return XDP_ABORTED;
+}
+
+static int xdp_prog_common(struct xdp_md *ctx)
+{
+ __u32 key, *port;
+ __s32 *mode;
+
+ key = XDP_MODE;
+ mode = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!mode)
+ return XDP_PASS;
+
+ key = XDP_PORT;
+ port = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!port)
+ return XDP_PASS;
+
+ switch (*mode) {
+ case XDP_MODE_PASS:
+ return xdp_mode_pass(ctx, (__u16)(*port));
+ case XDP_MODE_DROP:
+ return xdp_mode_drop_handler(ctx, (__u16)(*port));
+ case XDP_MODE_TX:
+ return xdp_mode_tx_handler(ctx, (__u16)(*port));
+ case XDP_MODE_TAIL_ADJST:
+ return xdp_adjst_tail(ctx, (__u16)(*port));
+ case XDP_MODE_HEAD_ADJST:
+ return xdp_head_adjst(ctx, (__u16)(*port));
+ }
+
+ /* Default action is to simple pass */
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_prog(struct xdp_md *ctx)
+{
+ return xdp_prog_common(ctx);
+}
+
+SEC("xdp.frags")
+int xdp_prog_frags(struct xdp_md *ctx)
+{
+ return xdp_prog_common(ctx);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/link_netns.py b/tools/testing/selftests/net/link_netns.py
new file mode 100755
index 000000000000..aab043c59d69
--- /dev/null
+++ b/tools/testing/selftests/net/link_netns.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import time
+
+from lib.py import ksft_run, ksft_exit, ksft_true
+from lib.py import ip
+from lib.py import NetNS, NetNSEnter
+from lib.py import RtnlFamily
+
+
+LINK_NETNSID = 100
+
+
+def test_event() -> None:
+ with NetNS() as ns1, NetNS() as ns2:
+ with NetNSEnter(str(ns2)):
+ rtnl = RtnlFamily()
+
+ rtnl.ntf_subscribe("rtnlgrp-link")
+
+ ip(f"netns set {ns2} {LINK_NETNSID}", ns=str(ns1))
+ ip(f"link add netns {ns1} link-netnsid {LINK_NETNSID} dummy1 type dummy")
+ ip(f"link add netns {ns1} dummy2 type dummy", ns=str(ns2))
+
+ ip("link del dummy1", ns=str(ns1))
+ ip("link del dummy2", ns=str(ns1))
+
+ time.sleep(1)
+ rtnl.check_ntf()
+ ksft_true(rtnl.async_msg_queue.empty(),
+ "Received unexpected link notification")
+
+
+def validate_link_netns(netns, ifname, link_netnsid) -> bool:
+ link_info = ip(f"-d link show dev {ifname}", ns=netns, json=True)
+ if not link_info:
+ return False
+ return link_info[0].get("link_netnsid") == link_netnsid
+
+
+def test_link_net() -> None:
+ configs = [
+ # type, common args, type args, fallback to dev_net
+ ("ipvlan", "link dummy1", "", False),
+ ("macsec", "link dummy1", "", False),
+ ("macvlan", "link dummy1", "", False),
+ ("macvtap", "link dummy1", "", False),
+ ("vlan", "link dummy1", "id 100", False),
+ ("gre", "", "local 192.0.2.1", True),
+ ("vti", "", "local 192.0.2.1", True),
+ ("ipip", "", "local 192.0.2.1", True),
+ ("ip6gre", "", "local 2001:db8::1", True),
+ ("ip6tnl", "", "local 2001:db8::1", True),
+ ("vti6", "", "local 2001:db8::1", True),
+ ("sit", "", "local 192.0.2.1", True),
+ ("xfrm", "", "if_id 1", True),
+ ]
+
+ with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3:
+ net1, net2, net3 = str(ns1), str(ns2), str(ns3)
+
+ # prepare link netnsid and a dummy link needed by certain drivers
+ ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2))
+ ip("link add dummy1 type dummy", ns=net3)
+
+ cases = [
+ # source, "netns", "link-netns", expected link-netns
+ (net3, None, None, None, None),
+ (net3, net2, None, None, LINK_NETNSID),
+ (net2, None, net3, LINK_NETNSID, LINK_NETNSID),
+ (net1, net2, net3, LINK_NETNSID, LINK_NETNSID),
+ ]
+
+ for src_net, netns, link_netns, exp1, exp2 in cases:
+ tgt_net = netns or src_net
+ for typ, cargs, targs, fb_dev_net in configs:
+ cmd = "link add"
+ if netns:
+ cmd += f" netns {netns}"
+ if link_netns:
+ cmd += f" link-netns {link_netns}"
+ cmd += f" {cargs} foo type {typ} {targs}"
+ ip(cmd, ns=src_net)
+ if fb_dev_net:
+ ksft_true(validate_link_netns(tgt_net, "foo", exp1),
+ f"{typ} link_netns validation failed")
+ else:
+ ksft_true(validate_link_netns(tgt_net, "foo", exp2),
+ f"{typ} link_netns validation failed")
+ ip(f"link del foo", ns=tgt_net)
+
+
+def test_peer_net() -> None:
+ types = [
+ "vxcan",
+ "netkit",
+ "veth",
+ ]
+
+ with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3, NetNS() as ns4:
+ net1, net2, net3, net4 = str(ns1), str(ns2), str(ns3), str(ns4)
+
+ ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2))
+
+ cases = [
+ # source, "netns", "link-netns", "peer netns", expected
+ (net1, None, None, None, None),
+ (net1, net2, None, None, None),
+ (net2, None, net3, None, LINK_NETNSID),
+ (net1, net2, net3, None, None),
+ (net2, None, None, net3, LINK_NETNSID),
+ (net1, net2, None, net3, LINK_NETNSID),
+ (net2, None, net2, net3, LINK_NETNSID),
+ (net1, net2, net4, net3, LINK_NETNSID),
+ ]
+
+ for src_net, netns, link_netns, peer_netns, exp in cases:
+ tgt_net = netns or src_net
+ for typ in types:
+ cmd = "link add"
+ if netns:
+ cmd += f" netns {netns}"
+ if link_netns:
+ cmd += f" link-netns {link_netns}"
+ cmd += f" foo type {typ}"
+ if peer_netns:
+ cmd += f" peer netns {peer_netns}"
+ ip(cmd, ns=src_net)
+ ksft_true(validate_link_netns(tgt_net, "foo", exp),
+ f"{typ} peer_netns validation failed")
+ ip(f"link del foo", ns=tgt_net)
+
+
+def main() -> None:
+ ksft_run([test_event, test_link_net, test_peer_net])
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
new file mode 100755
index 000000000000..881eb399798f
--- /dev/null
+++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# WARNING
+# -------
+# This is just a dummy script that triggers encap cases with possible dst cache
+# reference loops in affected lwt users (see list below). Some cases are
+# pathological configurations for simplicity, others are valid. Overall, we
+# don't want this issue to happen, no matter what. In order to catch any
+# reference loops, kmemleak MUST be used. The results alone are always blindly
+# successful, don't rely on them. Note that the following tests may crash the
+# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is
+# not present.
+#
+# Affected lwt users so far (please update accordingly if needed):
+# - ila_lwt (output only)
+# - ioam6_iptunnel (output only)
+# - rpl_iptunnel (both input and output)
+# - seg6_iptunnel (both input and output)
+
+source lib.sh
+
+check_compatibility()
+{
+ setup_ns tmp_node &>/dev/null
+ if [ $? != 0 ]; then
+ echo "SKIP: Cannot create netns."
+ exit $ksft_skip
+ fi
+
+ ip link add name veth0 netns $tmp_node type veth \
+ peer name veth1 netns $tmp_node &>/dev/null
+ local ret=$?
+
+ ip -netns $tmp_node link set veth0 up &>/dev/null
+ ret=$((ret + $?))
+
+ ip -netns $tmp_node link set veth1 up &>/dev/null
+ ret=$((ret + $?))
+
+ if [ $ret != 0 ]; then
+ echo "SKIP: Cannot configure links."
+ cleanup_ns $tmp_node
+ exit $ksft_skip
+ fi
+
+ lsmod 2>/dev/null | grep -q "ila"
+ ila_lsmod=$?
+ [ $ila_lsmod != 0 ] && modprobe ila &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:1::/64 \
+ encap ila 1:2:3:4 csum-mode no-action ident-type luid \
+ hook-type output \
+ dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:2::/64 \
+ encap ioam6 trace prealloc type 0x800000 ns 0 size 4 \
+ dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:3::/64 \
+ encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:4::/64 \
+ encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila"
+ skip_ila=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6"
+ skip_ioam6=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl"
+ skip_rpl=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6"
+ skip_seg6=$?
+
+ cleanup_ns $tmp_node
+}
+
+setup()
+{
+ setup_ns alpha beta gamma &>/dev/null
+
+ ip link add name veth-alpha netns $alpha type veth \
+ peer name veth-betaL netns $beta &>/dev/null
+
+ ip link add name veth-betaR netns $beta type veth \
+ peer name veth-gamma netns $gamma &>/dev/null
+
+ ip -netns $alpha link set veth-alpha name veth0 &>/dev/null
+ ip -netns $beta link set veth-betaL name veth0 &>/dev/null
+ ip -netns $beta link set veth-betaR name veth1 &>/dev/null
+ ip -netns $gamma link set veth-gamma name veth0 &>/dev/null
+
+ ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null
+ ip -netns $alpha link set veth0 up &>/dev/null
+ ip -netns $alpha link set lo up &>/dev/null
+ ip -netns $alpha route add 2001:db8:2::/64 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null
+ ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null
+ ip -netns $beta link set veth0 up &>/dev/null
+ ip -netns $beta link set veth1 up &>/dev/null
+ ip -netns $beta link set lo up &>/dev/null
+ ip -netns $beta route del 2001:db8:2::/64
+ ip -netns $beta route add 2001:db8:2::/64 dev veth1
+ ip netns exec $beta \
+ sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null
+
+ ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null
+ ip -netns $gamma link set veth0 up &>/dev/null
+ ip -netns $gamma link set lo up &>/dev/null
+ ip -netns $gamma route add 2001:db8:1::/64 \
+ via 2001:db8:2::1 dev veth0 &>/dev/null
+
+ sleep 1
+
+ ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null
+ if [ $? != 0 ]; then
+ echo "SKIP: Setup failed."
+ exit $ksft_skip
+ fi
+
+ sleep 1
+}
+
+cleanup()
+{
+ cleanup_ns $alpha $beta $gamma
+ [ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null
+}
+
+run_ila()
+{
+ if [ $skip_ila != 0 ]; then
+ echo "SKIP: ila (output)"
+ return
+ fi
+
+ ip -netns $beta route del 2001:db8:2::/64
+ ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \
+ encap ila 2001:db8:2:0 csum-mode no-action ident-type luid \
+ hook-type output \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: ila (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128
+ ip -netns $beta route add 2001:db8:2::/64 dev veth1
+ sleep 1
+}
+
+run_ioam6()
+{
+ if [ $skip_ioam6 != 0 ]; then
+ echo "SKIP: ioam6 (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: ioam6 (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run_rpl()
+{
+ if [ $skip_rpl != 0 ]; then
+ echo "SKIP: rpl (input)"
+ echo "SKIP: rpl (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap rpl segs 2001:db8:2::2 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: rpl (input)"
+ ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ echo "TEST: rpl (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run_seg6()
+{
+ if [ $skip_seg6 != 0 ]; then
+ echo "SKIP: seg6 (input)"
+ echo "SKIP: seg6 (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap seg6 mode inline segs 2001:db8:2::2 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: seg6 (input)"
+ ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ echo "TEST: seg6 (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run()
+{
+ run_ila
+ run_ioam6
+ run_rpl
+ run_seg6
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges."
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool."
+ exit $ksft_skip
+fi
+
+check_compatibility
+
+trap cleanup EXIT
+
+setup
+run
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
index 49daae73c41e..833279fb34e2 100644
--- a/tools/testing/selftests/net/mptcp/.gitignore
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
mptcp_connect
+mptcp_diag
mptcp_inq
mptcp_sockopt
pm_nl_ctl
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 7b936a926859..15d144a25d82 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -2,14 +2,35 @@
top_srcdir = ../../../../..
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
- simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
+TEST_PROGS := \
+ diag.sh \
+ mptcp_connect.sh \
+ mptcp_connect_checksum.sh \
+ mptcp_connect_mmap.sh \
+ mptcp_connect_sendfile.sh \
+ mptcp_join.sh \
+ mptcp_sockopt.sh \
+ pm_netlink.sh \
+ simult_flows.sh \
+ userspace_pm.sh \
+# end of TEST_PROGS
-TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
+TEST_GEN_FILES := \
+ mptcp_connect \
+ mptcp_diag \
+ mptcp_inq \
+ mptcp_sockopt \
+ pm_nl_ctl \
+# end of TEST_GEN_FILES
-TEST_FILES := mptcp_lib.sh settings
+TEST_FILES := \
+ mptcp_lib.sh \
+ settings \
+# end of TEST_FILES
+
+TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh)
EXTRA_CLEAN := *.pcap
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 4f80014cae49..59051ee2a986 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,34 +1,36 @@
-CONFIG_KALLSYMS=y
-CONFIG_MPTCP=y
-CONFIG_IPV6=y
-CONFIG_MPTCP_IPV6=y
CONFIG_INET_DIAG=m
CONFIG_INET_MPTCP_DIAG=m
-CONFIG_VETH=y
-CONFIG_NET_SCH_NETEM=m
-CONFIG_SYN_COOKIES=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NF_TABLES=m
-CONFIG_NFT_COMPAT=m
-CONFIG_NETFILTER_XTABLES=m
-CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NF_TABLES_INET=y
-CONFIG_NFT_TPROXY=m
-CONFIG_NFT_SOCKET=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IPV6=y
CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IP6_NF_FILTER=m
+CONFIG_KALLSYMS=y
+CONFIG_MPTCP=y
+CONFIG_MPTCP_IPV6=y
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_CLS_FW=m
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_SOCKET=m
+CONFIG_NFT_TPROXY=m
+CONFIG_SYN_COOKIES=y
+CONFIG_VETH=y
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 776d43a6922d..d847ff1737c3 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -28,7 +28,7 @@ flush_pids()
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null
@@ -200,6 +200,62 @@ chk_msk_cestab()
"${expected}" "${msg}" ""
}
+chk_dump_one()
+{
+ local ss_token
+ local token
+ local msg
+
+ ss_token="$(ss -inmHMN $ns |
+ mptcp_lib_get_info_value "token" "token")"
+
+ token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\
+ awk -F':[ \t]+' '/^token/ {print $2}')"
+
+ msg="....chk dump_one"
+
+ mptcp_lib_print_title "$msg"
+ if [ -n "$ss_token" ] && [ "$ss_token" = "$token" ]; then
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ else
+ mptcp_lib_pr_fail "expected $ss_token found $token"
+ mptcp_lib_result_fail "${msg}"
+ ret=${KSFT_FAIL}
+ fi
+}
+
+chk_dump_subflow()
+{
+ local inet_diag_token
+ local subflow_line
+ local ss_output
+ local ss_token
+ local msg
+
+ ss_output=$(ss -tniN $ns)
+
+ subflow_line=$(echo "$ss_output" | \
+ grep -m1 -Eo '[0-9.]+:[0-9].+ +[0-9.]+:[0-9.]+')
+
+ ss_token=$(echo "$ss_output" | grep -m1 -Eo 'token:[^ ]+')
+
+ inet_diag_token=$(ip netns exec $ns ./mptcp_diag -s "$subflow_line" | \
+ grep -Eo 'token:[^ ]+')
+
+ msg="....chk dump_subflow"
+
+ mptcp_lib_print_title "$msg"
+ if [ -n "$ss_token" ] && [ "$ss_token" = "$inet_diag_token" ]; then
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ else
+ mptcp_lib_pr_fail "expected $ss_token found $inet_diag_token"
+ mptcp_lib_result_fail "${msg}"
+ ret=${KSFT_FAIL}
+ fi
+}
+
msk_info_get_value()
{
local port="${1}"
@@ -284,12 +340,14 @@ echo "b" | \
./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \
127.0.0.1 >/dev/null &
wait_connected $ns 10000
-chk_msk_nr 2 "after MPC handshake "
+chk_msk_nr 2 "after MPC handshake"
chk_last_time_info 10000
chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
chk_msk_inuse 2
chk_msk_cestab 2
+chk_dump_one
+chk_dump_subflow
flush_pids
chk_msk_inuse 0 "2->0"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index d2043ec3bf6d..404a77bf366a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -25,6 +25,8 @@
#include <sys/types.h>
#include <sys/mman.h>
+#include <arpa/inet.h>
+
#include <netdb.h>
#include <netinet/in.h>
@@ -178,13 +180,27 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen,
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
- int err = getaddrinfo(node, service, hints, res);
+ int err;
+again:
+ err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ /* glibc starts to support MPTCP since v2.42.
+ * For older versions, use IPPROTO_TCP to resolve,
+ * and use TCP/MPTCP to create socket.
+ * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82
+ */
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -290,7 +306,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -354,7 +370,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
int infd, struct wstate *winfo)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
@@ -694,8 +710,14 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd,
bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len);
if (bw < 0) {
- if (cfg_rcv_trunc)
- return 0;
+ /* expected reset, continue to read */
+ if (cfg_rcv_trunc &&
+ (errno == ECONNRESET ||
+ errno == EPIPE)) {
+ fds.events &= ~POLLOUT;
+ continue;
+ }
+
perror("write");
return 111;
}
@@ -721,8 +743,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd,
}
if (fds.revents & (POLLERR | POLLNVAL)) {
- if (cfg_rcv_trunc)
- return 0;
+ if (cfg_rcv_trunc) {
+ fds.events &= ~(POLLERR | POLLNVAL);
+ continue;
+ }
fprintf(stderr, "Unexpected revents: "
"POLLERR/POLLNVAL(%x)\n", fds.revents);
return 5;
@@ -1048,6 +1072,8 @@ static void check_getpeername_connect(int fd)
socklen_t salen = sizeof(ss);
char a[INET6_ADDRSTRLEN];
char b[INET6_ADDRSTRLEN];
+ const char *iface;
+ size_t len;
if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) {
perror("getpeername");
@@ -1057,7 +1083,13 @@ static void check_getpeername_connect(int fd)
xgetnameinfo((struct sockaddr *)&ss, salen,
a, sizeof(a), b, sizeof(b));
- if (strcmp(cfg_host, a) || strcmp(cfg_port, b))
+ iface = strchr(cfg_host, '%');
+ if (iface)
+ len = iface - cfg_host;
+ else
+ len = strlen(cfg_host) + 1;
+
+ if (strncmp(cfg_host, a, len) || strcmp(cfg_port, b))
fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__,
cfg_host, a, cfg_port, b);
}
@@ -1077,6 +1109,7 @@ int main_loop_s(int listensock)
struct pollfd polls;
socklen_t salen;
int remotesock;
+ int err = 0;
int fd = 0;
again:
@@ -1109,19 +1142,19 @@ again:
SOCK_TEST_TCPULP(remotesock, 0);
memset(&winfo, 0, sizeof(winfo));
- copyfd_io(fd, remotesock, 1, true, &winfo);
+ err = copyfd_io(fd, remotesock, 1, true, &winfo);
} else {
perror("accept");
return 1;
}
- if (--cfg_repeat > 0) {
- if (cfg_input)
- close(fd);
+ if (cfg_input)
+ close(fd);
+
+ if (!err && --cfg_repeat > 0)
goto again;
- }
- return 0;
+ return err;
}
static void init_rng(void)
@@ -1211,23 +1244,42 @@ static void parse_setsock_options(const char *name)
exit(1);
}
-void xdisconnect(int fd, int addrlen)
+void xdisconnect(int fd)
{
- struct sockaddr_storage empty;
+ socklen_t addrlen = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage addr, empty;
int msec_sleep = 10;
- int queued = 1;
- int i;
+ void *raw_addr;
+ int i, cmdlen;
+ char cmd[128];
+
+ /* get the local address and convert it to string */
+ if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0)
+ xerror("getsockname");
+
+ if (addr.ss_family == AF_INET)
+ raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr);
+ else if (addr.ss_family == AF_INET6)
+ raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr);
+ else
+ xerror("bad family");
+
+ strcpy(cmd, "ss -Mnt | grep -q ");
+ cmdlen = strlen(cmd);
+ if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen],
+ sizeof(cmd) - cmdlen))
+ xerror("inet_ntop");
shutdown(fd, SHUT_WR);
- /* while until the pending data is completely flushed, the later
+ /*
+ * wait until the pending data is completely flushed and all
+ * the sockets reached the closed status.
* disconnect will bypass/ignore/drop any pending data.
*/
for (i = 0; ; i += msec_sleep) {
- if (ioctl(fd, SIOCOUTQ, &queued) < 0)
- xerror("can't query out socket queue: %d", errno);
-
- if (!queued)
+ /* closed socket are not listed by 'ss' */
+ if (system(cmd) != 0)
break;
if (i > poll_timeout)
@@ -1249,7 +1301,7 @@ int main_loop(void)
if (cfg_input && cfg_sockopt_types.mptfo) {
fd_in = open(cfg_input, O_RDONLY);
- if (fd < 0)
+ if (fd_in < 0)
xerror("can't open %s:%d", cfg_input, errno);
}
@@ -1272,18 +1324,18 @@ again:
if (cfg_input && !cfg_sockopt_types.mptfo) {
fd_in = open(cfg_input, O_RDONLY);
- if (fd < 0)
+ if (fd_in < 0)
xerror("can't open %s:%d", cfg_input, errno);
}
ret = copyfd_io(fd_in, fd, 1, 0, &winfo);
if (ret)
- return ret;
+ goto out;
if (cfg_truncate > 0) {
- xdisconnect(fd, peer->ai_addrlen);
+ shutdown(fd, SHUT_WR);
} else if (--cfg_repeat > 0) {
- xdisconnect(fd, peer->ai_addrlen);
+ xdisconnect(fd);
/* the socket could be unblocking at this point, we need the
* connect to be blocking
@@ -1299,7 +1351,10 @@ again:
close(fd);
}
- return 0;
+out:
+ if (cfg_input)
+ close(fd_in);
+ return ret;
}
int parse_proto(const char *proto)
@@ -1394,7 +1449,7 @@ static void parse_opts(int argc, char **argv)
*/
if (cfg_truncate < 0) {
cfg_rcv_trunc = true;
- signal(SIGPIPE, handle_signal);
+ signal(SIGPIPE, SIG_IGN);
}
break;
case 'j':
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index b77fb7065bfb..a6447f7a31fe 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -134,10 +134,10 @@ ns4=""
TEST_GROUP=""
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
- rm -f "$cin_disconnect" "$cout_disconnect"
+ rm -f "$cin_disconnect"
rm -f "$cin" "$cout"
rm -f "$sin" "$sout"
rm -f "$capout"
@@ -155,7 +155,6 @@ cin=$(mktemp)
cout=$(mktemp)
capout=$(mktemp)
cin_disconnect="$cin".disconnect
-cout_disconnect="$cout".disconnect
trap cleanup EXIT
mptcp_lib_ns_init ns1 ns2 ns3 ns4
@@ -212,6 +211,11 @@ if $checksum; then
done
fi
+if $capture; then
+ rndh="${ns1:4}"
+ mptcp_lib_pr_info "Packet capture files will have this prefix: ${rndh}-"
+fi
+
set_ethtool_flags() {
local ns="$1"
local dev="$2"
@@ -259,6 +263,15 @@ check_mptcp_disabled()
mptcp_lib_ns_init disabled_ns
print_larger_title "New MPTCP socket can be blocked via sysctl"
+
+ # mainly to cover more code
+ if ! ip netns exec ${disabled_ns} sysctl net.mptcp >/dev/null; then
+ mptcp_lib_pr_fail "not able to list net.mptcp sysctl knobs"
+ mptcp_lib_result_fail "not able to list net.mptcp sysctl knobs"
+ ret=${KSFT_FAIL}
+ return 1
+ fi
+
# net.mptcp.enabled should be enabled by default
if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then
mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default"
@@ -345,13 +358,14 @@ do_transfer()
local addr_port
addr_port=$(printf "%s:%d" ${connect_addr} ${port})
- local result_msg
- result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
- mptcp_lib_print_title "${result_msg}"
+ local pretty_title
+ pretty_title="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
+ mptcp_lib_print_title "${pretty_title}"
+
+ local tap_title="${connector_ns:0:3} ${cl_proto} -> ${listener_ns:0:3} (${addr_port}) ${srv_proto}"
if $capture; then
local capuser
- local rndh="${connector_ns:4}"
if [ -z $SUDO_USER ] ; then
capuser=""
else
@@ -361,90 +375,79 @@ do_transfer()
local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}"
local capopt="-i any -s 65535 -B 32768 ${capuser}"
- ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
+ ip netns exec ${listener_ns} tcpdump ${capopt} \
+ -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
local cappid_listener=$!
- ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
- local cappid_connector=$!
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ ip netns exec ${connector_ns} tcpdump ${capopt} \
+ -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+ local cappid_connector=$!
+ fi
sleep 1
fi
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat -n
+ mptcp_lib_nstat_init "${listener_ns}"
if [ ${listener_ns} != ${connector_ns} ]; then
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat -n
- fi
-
- local stat_synrx_last_l
- local stat_ackrx_last_l
- local stat_cookietx_last
- local stat_cookierx_last
- local stat_csum_err_s
- local stat_csum_err_c
- local stat_tcpfb_last_l
- stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
- stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
- stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
- stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
-
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- $extra_args $local_addr < "$sin" > "$sout" &
+ mptcp_lib_nstat_init "${connector_ns}"
+ fi
+
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ $extra_args $local_addr < "$sin" > "$sout" &
local spid=$!
mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
local start
start=$(date +%s%3N)
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
- $extra_args $connect_addr < "$cin" > "$cout" &
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_args $connect_addr < "$cin" > "$cout" &
local cpid=$!
+ mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+ "${connector_ns}" "${port}" "${cpid}" "${spid}" &
+ local timeout_pid=$!
+
wait $cpid
local retc=$?
wait $spid
local rets=$?
+ if kill -0 $timeout_pid; then
+ # Finished before the timeout: kill the background job
+ mptcp_lib_kill_group_wait $timeout_pid
+ timeout_pid=0
+ fi
+
local stop
stop=$(date +%s%3N)
if $capture; then
sleep 1
kill ${cappid_listener}
- kill ${cappid_connector}
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ kill ${cappid_connector}
+ fi
fi
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat | grep Tcp > /tmp/${listener_ns}.out
+ mptcp_lib_nstat_get "${listener_ns}"
if [ ${listener_ns} != ${connector_ns} ]; then
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat | grep Tcp > /tmp/${connector_ns}.out
+ mptcp_lib_nstat_get "${connector_ns}"
fi
local duration
duration=$((stop-start))
- result_msg+=" # time=${duration}ms"
printf "(duration %05sms) " "${duration}"
- if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
mptcp_lib_pr_fail "client exit code $retc, server $rets"
- echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
- cat /tmp/${listener_ns}.out
- echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
- [ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
echo
cat "$capout"
- mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
+ mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}"
return 1
fi
@@ -454,38 +457,38 @@ do_transfer()
rets=$?
local extra=""
- local stat_synrx_now_l
- local stat_ackrx_now_l
- local stat_cookietx_now
- local stat_cookierx_now
- local stat_ooo_now
- local stat_tcpfb_now_l
- stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
- stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
- stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
-
- expect_synrx=$((stat_synrx_last_l))
- expect_ackrx=$((stat_ackrx_last_l))
+ local stat_synrx
+ local stat_ackrx
+ local stat_cookietx
+ local stat_cookierx
+ local stat_ooo
+ local stat_tcpfb
+ stat_synrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ stat_ackrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ stat_cookietx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ stat_cookierx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ stat_ooo=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ stat_tcpfb=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
+
+ expect_synrx=0
+ expect_ackrx=0
cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies)
cookies=${cookies##*=}
if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
- expect_synrx=$((stat_synrx_last_l+connect_per_transfer))
- expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer))
+ expect_synrx=${connect_per_transfer}
+ expect_ackrx=${connect_per_transfer}
fi
- if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
- mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \
+ if [ ${stat_synrx} -lt ${expect_synrx} ]; then
+ mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx})" \
"than expected (${expect_synrx})"
retc=1
fi
- if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then
- if [ ${stat_ooo_now} -eq 0 ]; then
- mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \
+ if [ ${stat_ackrx} -lt ${expect_ackrx} ]; then
+ if [ ${stat_ooo} -eq 0 ]; then
+ mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx})" \
"than expected (${expect_ackrx})"
rets=1
else
@@ -499,57 +502,55 @@ do_transfer()
csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
- local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
- if [ $csum_err_s_nr -gt 0 ]; then
- mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]"
+ if [ $csum_err_s -gt 0 ]; then
+ mptcp_lib_pr_fail "server got ${csum_err_s} data checksum error[s]"
rets=1
fi
- local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
- if [ $csum_err_c_nr -gt 0 ]; then
- mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]"
+ if [ $csum_err_c -gt 0 ]; then
+ mptcp_lib_pr_fail "client got ${csum_err_c} data checksum error[s]"
retc=1
fi
fi
- if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
+ if [ ${stat_ooo} -eq 0 ] && [ ${stat_tcpfb} -gt 0 ]; then
mptcp_lib_pr_fail "unexpected fallback to TCP"
rets=1
fi
if [ $cookies -eq 2 ];then
- if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
+ if [ $stat_cookietx -eq 0 ] ;then
extra+=" WARN: CookieSent: did not advance"
fi
- if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
+ if [ $stat_cookierx -eq 0 ] ;then
extra+=" WARN: CookieRecv: did not advance"
fi
else
- if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
+ if [ $stat_cookietx -gt 0 ] ;then
extra+=" WARN: CookieSent: changed"
fi
- if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
+ if [ $stat_cookierx -gt 0 ] ;then
extra+=" WARN: CookieRecv: changed"
fi
fi
- if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then
+ if [ ${stat_synrx} -gt ${expect_synrx} ]; then
extra+=" WARN: SYNRX: expect ${expect_synrx},"
- extra+=" got ${stat_synrx_now_l} (probably retransmissions)"
+ extra+=" got ${stat_synrx} (probably retransmissions)"
fi
- if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then
+ if [ ${stat_ackrx} -gt ${expect_ackrx} ]; then
extra+=" WARN: ACKRX: expect ${expect_ackrx},"
- extra+=" got ${stat_ackrx_now_l} (probably retransmissions)"
+ extra+=" got ${stat_ackrx} (probably retransmissions)"
fi
if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
mptcp_lib_pr_ok "${extra:1}"
- mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
+ mptcp_lib_result_pass "${TEST_GROUP}: ${tap_title}"
else
if [ -n "${extra}" ]; then
mptcp_lib_print_warn "${extra:1}"
fi
- mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
+ mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}"
fi
cat "$capout"
@@ -577,7 +578,7 @@ make_file()
mptcp_lib_make_file $name 1024 $ksize
dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null
- echo "Created $name (size $(du -b "$name")) containing data sent by $who"
+ echo "Created $name (size $(stat -c "%s" "$name") B) containing data sent by $who"
}
run_tests_lo()
@@ -848,6 +849,8 @@ stop_if_error()
make_file "$cin" "client"
make_file "$sin" "server"
+mptcp_lib_subtests_last_ts_reset
+
check_mptcp_disabled
stop_if_error "The kernel configuration is not valid for MPTCP"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh
new file mode 100755
index 000000000000..ce93ec2f107f
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -C "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh
new file mode 100755
index 000000000000..5dd30f9394af
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh
new file mode 100755
index 000000000000..1d16fb1cc9bb
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
new file mode 100644
index 000000000000..e084796e804d
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025, Kylin Software */
+
+#include <linux/sock_diag.h>
+#include <linux/rtnetlink.h>
+#include <linux/inet_diag.h>
+#include <linux/netlink.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <linux/tcp.h>
+#include <arpa/inet.h>
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
+#define parse_rtattr_nested(tb, max, rta) \
+ (parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \
+ NLA_F_NESTED))
+
+struct params {
+ __u32 target_token;
+ char subflow_addrs[1024];
+};
+
+struct mptcp_info {
+ __u8 mptcpi_subflows;
+ __u8 mptcpi_add_addr_signal;
+ __u8 mptcpi_add_addr_accepted;
+ __u8 mptcpi_subflows_max;
+ __u8 mptcpi_add_addr_signal_max;
+ __u8 mptcpi_add_addr_accepted_max;
+ __u32 mptcpi_flags;
+ __u32 mptcpi_token;
+ __u64 mptcpi_write_seq;
+ __u64 mptcpi_snd_una;
+ __u64 mptcpi_rcv_nxt;
+ __u8 mptcpi_local_addr_used;
+ __u8 mptcpi_local_addr_max;
+ __u8 mptcpi_csum_enabled;
+ __u32 mptcpi_retransmits;
+ __u64 mptcpi_bytes_retrans;
+ __u64 mptcpi_bytes_sent;
+ __u64 mptcpi_bytes_received;
+ __u64 mptcpi_bytes_acked;
+ __u8 mptcpi_subflows_total;
+ __u8 reserved[3];
+ __u32 mptcpi_last_data_sent;
+ __u32 mptcpi_last_data_recv;
+ __u32 mptcpi_last_ack_recv;
+};
+
+enum {
+ MPTCP_SUBFLOW_ATTR_UNSPEC,
+ MPTCP_SUBFLOW_ATTR_TOKEN_REM,
+ MPTCP_SUBFLOW_ATTR_TOKEN_LOC,
+ MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ,
+ MPTCP_SUBFLOW_ATTR_MAP_SEQ,
+ MPTCP_SUBFLOW_ATTR_MAP_SFSEQ,
+ MPTCP_SUBFLOW_ATTR_SSN_OFFSET,
+ MPTCP_SUBFLOW_ATTR_MAP_DATALEN,
+ MPTCP_SUBFLOW_ATTR_FLAGS,
+ MPTCP_SUBFLOW_ATTR_ID_REM,
+ MPTCP_SUBFLOW_ATTR_ID_LOC,
+ MPTCP_SUBFLOW_ATTR_PAD,
+
+ __MPTCP_SUBFLOW_ATTR_MAX
+};
+
+#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1)
+
+#define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0)
+#define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1)
+#define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2)
+#define MPTCP_SUBFLOW_FLAG_JOIN_LOC _BITUL(3)
+#define MPTCP_SUBFLOW_FLAG_BKUP_REM _BITUL(4)
+#define MPTCP_SUBFLOW_FLAG_BKUP_LOC _BITUL(5)
+#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED _BITUL(6)
+#define MPTCP_SUBFLOW_FLAG_CONNECTED _BITUL(7)
+#define MPTCP_SUBFLOW_FLAG_MAPVALID _BITUL(8)
+
+#define rta_getattr(type, value) (*(type *)RTA_DATA(value))
+
+static void die_perror(const char *msg)
+{
+ perror(msg);
+ exit(1);
+}
+
+static void die_usage(int r)
+{
+ fprintf(stderr, "Usage:\n"
+ "mptcp_diag -t <token>\n"
+ "mptcp_diag -s \"<saddr>:<sport> <daddr>:<dport>\"\n");
+ exit(r);
+}
+
+static void send_query(int fd, struct inet_diag_req_v2 *r, __u32 proto)
+{
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct {
+ struct nlmsghdr nlh;
+ struct inet_diag_req_v2 r;
+ } req = {
+ .nlh = {
+ .nlmsg_len = sizeof(req),
+ .nlmsg_type = SOCK_DIAG_BY_FAMILY,
+ .nlmsg_flags = NLM_F_REQUEST
+ },
+ .r = *r
+ };
+ struct rtattr rta_proto;
+ struct iovec iov[6];
+ int iovlen = 0;
+
+ iov[iovlen++] = (struct iovec) {
+ .iov_base = &req,
+ .iov_len = sizeof(req)
+ };
+
+ if (proto == IPPROTO_MPTCP) {
+ rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
+ rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
+
+ iov[iovlen++] = (struct iovec){ &rta_proto, sizeof(rta_proto)};
+ iov[iovlen++] = (struct iovec){ &proto, sizeof(proto)};
+ req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
+ }
+
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = iov,
+ .msg_iovlen = iovlen
+ };
+
+ for (;;) {
+ if (sendmsg(fd, &msg, 0) < 0) {
+ if (errno == EINTR)
+ continue;
+ die_perror("sendmsg");
+ }
+ break;
+ }
+}
+
+static void parse_rtattr_flags(struct rtattr *tb[], int max, struct rtattr *rta,
+ int len, unsigned short flags)
+{
+ unsigned short type;
+
+ memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+ while (RTA_OK(rta, len)) {
+ type = rta->rta_type & ~flags;
+ if (type <= max && !tb[type])
+ tb[type] = rta;
+ rta = RTA_NEXT(rta, len);
+ }
+}
+
+static void print_info_msg(struct mptcp_info *info)
+{
+ printf("Token & Flags\n");
+ printf("token: %x\n", info->mptcpi_token);
+ printf("flags: %x\n", info->mptcpi_flags);
+ printf("csum_enabled: %u\n", info->mptcpi_csum_enabled);
+
+ printf("\nBasic Info\n");
+ printf("subflows: %u\n", info->mptcpi_subflows);
+ printf("subflows_max: %u\n", info->mptcpi_subflows_max);
+ printf("subflows_total: %u\n", info->mptcpi_subflows_total);
+ printf("local_addr_used: %u\n", info->mptcpi_local_addr_used);
+ printf("local_addr_max: %u\n", info->mptcpi_local_addr_max);
+ printf("add_addr_signal: %u\n", info->mptcpi_add_addr_signal);
+ printf("add_addr_accepted: %u\n", info->mptcpi_add_addr_accepted);
+ printf("add_addr_signal_max: %u\n", info->mptcpi_add_addr_signal_max);
+ printf("add_addr_accepted_max: %u\n", info->mptcpi_add_addr_accepted_max);
+
+ printf("\nTransmission Info\n");
+ printf("write_seq: %llu\n", info->mptcpi_write_seq);
+ printf("snd_una: %llu\n", info->mptcpi_snd_una);
+ printf("rcv_nxt: %llu\n", info->mptcpi_rcv_nxt);
+ printf("last_data_sent: %u\n", info->mptcpi_last_data_sent);
+ printf("last_data_recv: %u\n", info->mptcpi_last_data_recv);
+ printf("last_ack_recv: %u\n", info->mptcpi_last_ack_recv);
+ printf("retransmits: %u\n", info->mptcpi_retransmits);
+ printf("retransmit bytes: %llu\n", info->mptcpi_bytes_retrans);
+ printf("bytes_sent: %llu\n", info->mptcpi_bytes_sent);
+ printf("bytes_received: %llu\n", info->mptcpi_bytes_received);
+ printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked);
+}
+
+/*
+ * 'print_subflow_info' is from 'mptcp_subflow_info'
+ * which is a function in 'misc/ss.c' of iproute2.
+ */
+static void print_subflow_info(struct rtattr *tb[])
+{
+ u_int32_t flags = 0;
+
+ printf("It's a mptcp subflow, the subflow info:\n");
+ if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) {
+ char caps[32 + 1] = { 0 }, *cap = &caps[0];
+
+ flags = rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_FLAGS]);
+
+ if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM)
+ *cap++ = 'M';
+ if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC)
+ *cap++ = 'm';
+ if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM)
+ *cap++ = 'J';
+ if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC)
+ *cap++ = 'j';
+ if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM)
+ *cap++ = 'B';
+ if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC)
+ *cap++ = 'b';
+ if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED)
+ *cap++ = 'e';
+ if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED)
+ *cap++ = 'c';
+ if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID)
+ *cap++ = 'v';
+
+ if (flags)
+ printf(" flags:%s", caps);
+ }
+ if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] &&
+ tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] &&
+ tb[MPTCP_SUBFLOW_ATTR_ID_REM] &&
+ tb[MPTCP_SUBFLOW_ATTR_ID_LOC])
+ printf(" token:%04x(id:%u)/%04x(id:%u)",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]),
+ rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_REM]),
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]),
+ rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_LOC]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])
+ printf(" seq:%llu",
+ rta_getattr(__u64, tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])
+ printf(" sfseq:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]));
+ if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])
+ printf(" ssnoff:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])
+ printf(" maplen:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]));
+ printf("\n");
+}
+
+static void parse_nlmsg(struct nlmsghdr *nlh, __u32 proto)
+{
+ struct inet_diag_msg *r = NLMSG_DATA(nlh);
+ struct rtattr *tb[INET_DIAG_MAX + 1];
+
+ parse_rtattr_flags(tb, INET_DIAG_MAX, (struct rtattr *)(r + 1),
+ nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)),
+ NLA_F_NESTED);
+
+ if (proto == IPPROTO_MPTCP && tb[INET_DIAG_INFO]) {
+ int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
+ struct mptcp_info *info;
+
+ /* workaround fort older kernels with less fields */
+ if (len < sizeof(*info)) {
+ info = alloca(sizeof(*info));
+ memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len);
+ memset((char *)info + len, 0, sizeof(*info) - len);
+ } else {
+ info = RTA_DATA(tb[INET_DIAG_INFO]);
+ }
+ print_info_msg(info);
+ }
+ if (proto == IPPROTO_TCP && tb[INET_DIAG_ULP_INFO]) {
+ struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 };
+
+ parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX,
+ tb[INET_DIAG_ULP_INFO]);
+
+ if (ulpinfo[INET_ULP_INFO_MPTCP]) {
+ struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = { 0 };
+
+ parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX,
+ ulpinfo[INET_ULP_INFO_MPTCP]);
+ print_subflow_info(sfinfo);
+ } else {
+ printf("It's a normal TCP!\n");
+ }
+ }
+}
+
+static void recv_nlmsg(int fd, __u32 proto)
+{
+ char rcv_buff[8192];
+ struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff;
+ struct sockaddr_nl rcv_nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct iovec rcv_iov = {
+ .iov_base = rcv_buff,
+ .iov_len = sizeof(rcv_buff)
+ };
+ struct msghdr rcv_msg = {
+ .msg_name = &rcv_nladdr,
+ .msg_namelen = sizeof(rcv_nladdr),
+ .msg_iov = &rcv_iov,
+ .msg_iovlen = 1
+ };
+ int len;
+
+ len = recvmsg(fd, &rcv_msg, 0);
+
+ while (NLMSG_OK(nlh, len)) {
+ if (nlh->nlmsg_type == NLMSG_DONE) {
+ printf("NLMSG_DONE\n");
+ break;
+ } else if (nlh->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err;
+
+ err = (struct nlmsgerr *)NLMSG_DATA(nlh);
+ printf("Error %d:%s\n",
+ -(err->error), strerror(-(err->error)));
+ break;
+ }
+ parse_nlmsg(nlh, proto);
+ nlh = NLMSG_NEXT(nlh, len);
+ }
+}
+
+static void get_mptcpinfo(__u32 token)
+{
+ struct inet_diag_req_v2 r = {
+ .sdiag_family = AF_INET,
+ /* Real proto is set via INET_DIAG_REQ_PROTOCOL */
+ .sdiag_protocol = IPPROTO_TCP,
+ .idiag_ext = 1 << (INET_DIAG_INFO - 1),
+ .id.idiag_cookie[0] = token,
+ };
+ __u32 proto = IPPROTO_MPTCP;
+ int fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ if (fd < 0)
+ die_perror("Netlink socket");
+
+ send_query(fd, &r, proto);
+ recv_nlmsg(fd, proto);
+
+ close(fd);
+}
+
+static void get_subflow_info(char *subflow_addrs)
+{
+ struct inet_diag_req_v2 r = {
+ .sdiag_family = AF_INET,
+ .sdiag_protocol = IPPROTO_TCP,
+ .idiag_ext = 1 << (INET_DIAG_INFO - 1),
+ .id.idiag_cookie[0] = INET_DIAG_NOCOOKIE,
+ .id.idiag_cookie[1] = INET_DIAG_NOCOOKIE,
+ };
+ char saddr[64], daddr[64];
+ int sport, dport;
+ int ret;
+ int fd;
+
+ ret = sscanf(subflow_addrs, "%[^:]:%d %[^:]:%d", saddr, &sport, daddr, &dport);
+ if (ret != 4)
+ die_perror("IP PORT Pairs has style problems!");
+
+ printf("%s:%d -> %s:%d\n", saddr, sport, daddr, dport);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ if (fd < 0)
+ die_perror("Netlink socket");
+
+ r.id.idiag_sport = htons(sport);
+ r.id.idiag_dport = htons(dport);
+
+ inet_pton(AF_INET, saddr, &r.id.idiag_src);
+ inet_pton(AF_INET, daddr, &r.id.idiag_dst);
+ send_query(fd, &r, IPPROTO_TCP);
+ recv_nlmsg(fd, IPPROTO_TCP);
+}
+
+static void parse_opts(int argc, char **argv, struct params *p)
+{
+ int c;
+
+ if (argc < 2)
+ die_usage(1);
+
+ while ((c = getopt(argc, argv, "ht:s:")) != -1) {
+ switch (c) {
+ case 'h':
+ die_usage(0);
+ break;
+ case 't':
+ sscanf(optarg, "%x", &p->target_token);
+ break;
+ case 's':
+ strncpy(p->subflow_addrs, optarg,
+ sizeof(p->subflow_addrs) - 1);
+ break;
+ default:
+ die_usage(1);
+ break;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct params p = { 0 };
+
+ parse_opts(argc, argv, &p);
+
+ if (p.target_token)
+ get_mptcpinfo(p.target_token);
+
+ if (p.subflow_addrs[0] != '\0')
+ get_subflow_info(p.subflow_addrs);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
index 218aac467321..8e8f6441ad8b 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -72,13 +72,22 @@ static const char *getxinfo_strerr(int err)
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
- int err = getaddrinfo(node, service, hints, res);
+ int err;
+again:
+ err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -91,7 +100,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -136,7 +145,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
@@ -493,6 +502,7 @@ static int server(int unixfd)
process_one_client(r, unixfd);
+ close(fd);
return 0;
}
@@ -571,8 +581,12 @@ int main(int argc, char *argv[])
die_perror("pipe");
s = xfork();
- if (s == 0)
- return server(unixfds[1]);
+ if (s == 0) {
+ close(unixfds[0]);
+ ret = server(unixfds[1]);
+ close(unixfds[1]);
+ return ret;
+ }
close(unixfds[1]);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 108aeeb84ef1..b2e6e548f796 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -8,7 +8,7 @@
# ShellCheck incorrectly believes that most of the code here is unreachable
# because it's invoked by variable name, see how the "tests" array is used
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
. "$(dirname "${0}")/mptcp_lib.sh"
@@ -23,6 +23,7 @@ tmpfile=""
cout=""
err=""
capout=""
+cappid=""
ns1=""
ns2=""
iptables="iptables"
@@ -61,6 +62,29 @@ unset sflags
unset fastclose
unset fullmesh
unset speed
+unset bind_addr
+unset join_syn_rej
+unset join_csum_ns1
+unset join_csum_ns2
+unset join_fail_nr
+unset join_rst_nr
+unset join_infi_nr
+unset join_corrupted_pkts
+unset join_syn_tx
+unset join_create_err
+unset join_bind_err
+unset join_connect_err
+
+unset fb_ns1
+unset fb_ns2
+unset fb_infinite_map_tx
+unset fb_dss_corruption
+unset fb_simult_conn
+unset fb_mpc_passive
+unset fb_mpc_active
+unset fb_mpc_data
+unset fb_md5_sig
+unset fb_dss
# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
# (ip6 && (ip6[74] & 0xf0) == 0x30)'"
@@ -196,6 +220,22 @@ print_skip()
mptcp_lib_pr_skip "${@}"
}
+# $1: check name; $2: rc
+print_results()
+{
+ local check="${1}"
+ local rc=${2}
+
+ print_check "${check}"
+ if [ ${rc} = ${KSFT_PASS} ]; then
+ print_ok
+ elif [ ${rc} = ${KSFT_SKIP} ]; then
+ print_skip
+ else
+ fail_test "see above"
+ fi
+}
+
# [ $1: fail msg ]
mark_as_skipped()
{
@@ -319,6 +359,7 @@ reset_with_add_addr_timeout()
tables="${ip6tables}"
fi
+ # set a maximum, to avoid too long timeout with exponential backoff
ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \
@@ -337,7 +378,7 @@ reset_with_checksum()
local ns1_enable=$1
local ns2_enable=$2
- reset "checksum test ${1} ${2}" || return 1
+ reset "checksum test ${ns1_enable} ${ns2_enable}" || return 1
ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
@@ -420,12 +461,17 @@ reset_with_fail()
fi
}
+start_events()
+{
+ mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid
+ mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid
+}
+
reset_with_events()
{
reset "${1}" || return 1
- mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid
- mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid
+ start_events
}
reset_with_tcp_filter()
@@ -436,9 +482,10 @@ reset_with_tcp_filter()
local ns="${!1}"
local src="${2}"
local target="${3}"
+ local chain="${4:-INPUT}"
if ! ip netns exec "${ns}" ${iptables} \
- -A INPUT \
+ -A "${chain}" \
-s "${src}" \
-p tcp \
-j "${target}"; then
@@ -599,6 +646,27 @@ wait_mpj()
done
}
+wait_ll_ready()
+{
+ local ns="${1}"
+
+ local i
+ for i in $(seq 50); do
+ ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" |
+ grep -qw "tentative" || break
+ sleep 0.1
+ done
+}
+
+get_ll_addr()
+{
+ local ns="${1}"
+ local iface="${2}"
+
+ ip -n "${ns}" -6 addr show dev "${iface}" scope link |
+ grep "inet6 fe80" | sed 's#.*\(fe80::.*\)/.*#\1#'
+}
+
kill_events_pids()
{
mptcp_lib_kill_wait $evts_ns1_pid
@@ -661,7 +729,7 @@ pm_nl_check_endpoint()
done
if [ -z "${id}" ]; then
- test_fail "bad test - missing endpoint id"
+ fail_test "bad test - missing endpoint id"
return
fi
@@ -833,7 +901,7 @@ chk_cestab_nr()
local cestab=$2
local count
- print_check "cestab $cestab"
+ print_check "currently established: $cestab"
count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab")
if [ -z "$count" ]; then
print_skip
@@ -855,45 +923,68 @@ check_cestab()
fi
}
-do_transfer()
+cond_start_capture()
{
- local listener_ns="$1"
- local connector_ns="$2"
- local cl_proto="$3"
- local srv_proto="$4"
- local connect_addr="$5"
-
- local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1))
- local cappid
- local FAILING_LINKS=${FAILING_LINKS:-""}
- local fastclose=${fastclose:-""}
- local speed=${speed:-"fast"}
+ local ns="$1"
- :> "$cout"
- :> "$sout"
:> "$capout"
if $capture; then
- local capuser
- if [ -z $SUDO_USER ] ; then
+ local capuser capfile
+ if [ -z $SUDO_USER ]; then
capuser=""
else
capuser="-Z $SUDO_USER"
fi
- capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}")
+ capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "$ns")
echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile"
- ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
+ ip netns exec "$ns" tcpdump -i any -s 65535 -B 32768 $capuser -w "$capfile" > "$capout" 2>&1 &
cappid=$!
sleep 1
fi
+}
+
+cond_stop_capture()
+{
+ if $capture; then
+ sleep 1
+ kill $cappid
+ cat "$capout"
+ fi
+}
+
+get_port()
+{
+ echo "$((10000 + MPTCP_LIB_TEST_COUNTER - 1))"
+}
+
+do_transfer()
+{
+ local listener_ns="$1"
+ local connector_ns="$2"
+ local cl_proto="$3"
+ local srv_proto="$4"
+ local connect_addr="$5"
+ local port
+
+ local FAILING_LINKS=${FAILING_LINKS:-""}
+ local fastclose=${fastclose:-""}
+ local speed=${speed:-"fast"}
+ local bind_addr=${bind_addr:-"::"}
+ local listener_in="${sin}"
+ local connector_in="${cin}"
+ port=$(get_port)
+
+ :> "$cout"
+ :> "$sout"
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat -n
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat -n
+ cond_start_capture ${listener_ns}
+
+ mptcp_lib_nstat_init "${listener_ns}"
+ mptcp_lib_nstat_init "${connector_ns}"
local extra_args
if [ $speed = "fast" ]; then
@@ -931,42 +1022,40 @@ do_transfer()
extra_srv_args="$extra_args $extra_srv_args"
if [ "$test_linkfail" -gt 1 ];then
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- $extra_srv_args "::" < "$sinfail" > "$sout" &
- else
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- $extra_srv_args "::" < "$sin" > "$sout" &
+ listener_in="${sinfail}"
fi
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \
+ ${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" &
local spid=$!
mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
extra_cl_args="$extra_args $extra_cl_args"
if [ "$test_linkfail" -eq 0 ];then
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
- $extra_cl_args $connect_addr < "$cin" > "$cout" &
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_cl_args $connect_addr < "$cin" > "$cout" &
elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then
+ connector_in="${cinsent}"
( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
tee "$cinsent" | \
- timeout ${timeout_test} \
ip netns exec ${connector_ns} \
./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
$extra_cl_args $connect_addr > "$cout" &
else
+ connector_in="${cinsent}"
tee "$cinsent" < "$cinfail" | \
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
- $extra_cl_args $connect_addr > "$cout" &
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_cl_args $connect_addr > "$cout" &
fi
local cpid=$!
+ mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+ "${connector_ns}" "${port}" "${cpid}" "${spid}" &
+ local timeout_pid=$!
+
pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr
check_cestab $listener_ns $connector_ns
@@ -975,49 +1064,29 @@ do_transfer()
wait $spid
local rets=$?
- if $capture; then
- sleep 1
- kill $cappid
+ if kill -0 $timeout_pid; then
+ # Finished before the timeout: kill the background job
+ mptcp_lib_kill_group_wait $timeout_pid
+ timeout_pid=0
fi
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat | grep Tcp > /tmp/${listener_ns}.out
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat | grep Tcp > /tmp/${connector_ns}.out
+ cond_stop_capture
- if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
- fail_test "client exit code $retc, server $rets"
- echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
- cat /tmp/${listener_ns}.out
- echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
- cat /tmp/${connector_ns}.out
+ mptcp_lib_nstat_get "${listener_ns}"
+ mptcp_lib_nstat_get "${connector_ns}"
- cat "$capout"
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
+ fail_test "client exit code $retc, server $rets"
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
return 1
fi
- if [ "$test_linkfail" -gt 1 ];then
- check_transfer $sinfail $cout "file received by client" $trunc_size
- else
- check_transfer $sin $cout "file received by client" $trunc_size
- fi
+ check_transfer $listener_in $cout "file received by client" $trunc_size
retc=$?
- if [ "$test_linkfail" -eq 0 ];then
- check_transfer $cin $sout "file received by server" $trunc_size
- else
- check_transfer $cinsent $sout "file received by server" $trunc_size
- fi
+ check_transfer $connector_in $sout "file received by server" $trunc_size
rets=$?
- if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
- cat "$capout"
- return 0
- fi
-
- cat "$capout"
- return 1
+ [ $retc -eq 0 ] && [ $rets -eq 0 ]
}
make_file()
@@ -1083,12 +1152,20 @@ run_tests()
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
}
+_dump_stats()
+{
+ local ns="${1}"
+ local side="${2}"
+
+ mptcp_lib_print_err "${side} ns stats (${ns2})"
+ mptcp_lib_pr_nstat "${ns}"
+ echo
+}
+
dump_stats()
{
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep Tcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep Tcp
+ _dump_stats "${ns1}" "Server"
+ _dump_stats "${ns2}" "Client"
}
chk_csum_nr()
@@ -1109,28 +1186,29 @@ chk_csum_nr()
csum_ns2=${csum_ns2:1}
fi
- print_check "sum"
+ print_check "checksum server"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr")
- if [ "$count" != "$csum_ns1" ]; then
+ if [ -n "$count" ] && [ "$count" != "$csum_ns1" ]; then
extra_msg+=" ns1=$count"
fi
if [ -z "$count" ]; then
print_skip
elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
- { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
+ { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
fail_test "got $count data checksum error[s] expected $csum_ns1"
else
print_ok
fi
- print_check "csum"
+
+ print_check "checksum client"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr")
- if [ "$count" != "$csum_ns2" ]; then
+ if [ -n "$count" ] && [ "$count" != "$csum_ns2" ]; then
extra_msg+=" ns2=$count"
fi
if [ -z "$count" ]; then
print_skip
elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
- { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
+ { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
fail_test "got $count data checksum error[s] expected $csum_ns2"
else
print_ok
@@ -1147,6 +1225,8 @@ chk_fail_nr()
local count
local ns_tx=$ns1
local ns_rx=$ns2
+ local tx="server"
+ local rx="client"
local extra_msg=""
local allow_tx_lost=0
local allow_rx_lost=0
@@ -1154,7 +1234,8 @@ chk_fail_nr()
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns2
ns_rx=$ns1
- extra_msg="invert"
+ tx="client"
+ rx="server"
fi
if [[ "${fail_tx}" = "-"* ]]; then
@@ -1166,29 +1247,29 @@ chk_fail_nr()
fail_rx=${fail_rx:1}
fi
- print_check "ftx"
+ print_check "fail tx ${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx")
- if [ "$count" != "$fail_tx" ]; then
- extra_msg+=",tx=$count"
+ if [ -n "$count" ] && [ "$count" != "$fail_tx" ]; then
+ extra_msg+=" tx=$count"
fi
if [ -z "$count" ]; then
print_skip
elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } ||
- { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
+ { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
fail_test "got $count MP_FAIL[s] TX expected $fail_tx"
else
print_ok
fi
- print_check "failrx"
+ print_check "fail rx ${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx")
- if [ "$count" != "$fail_rx" ]; then
- extra_msg+=",rx=$count"
+ if [ -n "$count" ] && [ "$count" != "$fail_rx" ]; then
+ extra_msg+=" rx=$count"
fi
if [ -z "$count" ]; then
print_skip
elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } ||
- { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
+ { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
fail_test "got $count MP_FAIL[s] RX expected $fail_rx"
else
print_ok
@@ -1205,37 +1286,35 @@ chk_fclose_nr()
local count
local ns_tx=$ns2
local ns_rx=$ns1
- local extra_msg=""
+ local tx="client"
+ local rx="server"
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns1
ns_rx=$ns2
- extra_msg="invert"
+ tx="server"
+ rx="client"
fi
- print_check "ctx"
+ print_check "fast close tx ${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_tx" ]; then
- extra_msg+=",tx=$count"
fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
else
print_ok
fi
- print_check "fclzrx"
+ print_check "fast close rx ${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_rx" ]; then
- extra_msg+=",rx=$count"
fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
else
print_ok
fi
-
- print_info "$extra_msg"
}
chk_rst_nr()
@@ -1246,15 +1325,17 @@ chk_rst_nr()
local count
local ns_tx=$ns1
local ns_rx=$ns2
- local extra_msg=""
+ local tx="server"
+ local rx="client"
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns2
ns_rx=$ns1
- extra_msg="invert"
+ tx="client"
+ rx="server"
fi
- print_check "rtx"
+ print_check "reset tx ${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx")
if [ -z "$count" ]; then
print_skip
@@ -1266,7 +1347,7 @@ chk_rst_nr()
print_ok
fi
- print_check "rstrx"
+ print_check "reset rx ${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx")
if [ -z "$count" ]; then
print_skip
@@ -1277,8 +1358,6 @@ chk_rst_nr()
else
print_ok
fi
-
- print_info "$extra_msg"
}
chk_infi_nr()
@@ -1287,7 +1366,7 @@ chk_infi_nr()
local infi_rx=$2
local count
- print_check "itx"
+ print_check "infi tx client"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx")
if [ -z "$count" ]; then
print_skip
@@ -1297,7 +1376,7 @@ chk_infi_nr()
print_ok
fi
- print_check "infirx"
+ print_check "infi rx server"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx")
if [ -z "$count" ]; then
print_skip
@@ -1308,17 +1387,176 @@ chk_infi_nr()
fi
}
+chk_join_tx_nr()
+{
+ local syn_tx=${join_syn_tx:-0}
+ local create=${join_create_err:-0}
+ local bind=${join_bind_err:-0}
+ local connect=${join_connect_err:-0}
+ local rc=${KSFT_PASS}
+ local count
+
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTx")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$syn_tx" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn tx"
+ fail_test "got $count JOIN[s] syn tx expected $syn_tx"
+ fi
+
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$create" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn tx create socket error"
+ fail_test "got $count JOIN[s] syn tx create socket error expected $create"
+ fi
+
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$bind" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn tx bind error"
+ fail_test "got $count JOIN[s] syn tx bind error expected $bind"
+ fi
+
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$connect" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn tx connect error"
+ fail_test "got $count JOIN[s] syn tx connect error expected $connect"
+ fi
+
+ print_results "join Tx" ${rc}
+}
+
+chk_fallback_nr()
+{
+ local infinite_map_tx=${fb_infinite_map_tx:-0}
+ local dss_corruption=${fb_dss_corruption:-0}
+ local simult_conn=${fb_simult_conn:-0}
+ local mpc_passive=${fb_mpc_passive:-0}
+ local mpc_active=${fb_mpc_active:-0}
+ local mpc_data=${fb_mpc_data:-0}
+ local md5_sig=${fb_md5_sig:-0}
+ local dss=${fb_dss:-0}
+ local rc=${KSFT_PASS}
+ local ns=$1
+ local count
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$infinite_map_tx" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns infinite map tx fallback"
+ fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$dss_corruption" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns dss corruption fallback"
+ fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$simult_conn" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns simult conn fallback"
+ fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$mpc_passive" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns mpc passive fallback"
+ fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$mpc_active" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns mpc active fallback"
+ fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$mpc_data" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns mpc data fallback"
+ fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$md5_sig" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns MD5 Sig fallback"
+ fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$dss" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns dss fallback"
+ fail_test "got $count dss fallback[s] in $ns expected $dss"
+ fi
+
+ return $rc
+}
+
+chk_fallback_nr_all()
+{
+ local netns=("ns1" "ns2")
+ local fb_ns=("fb_ns1" "fb_ns2")
+ local rc=${KSFT_PASS}
+
+ for i in 0 1; do
+ if [ -n "${!fb_ns[i]}" ]; then
+ eval "${!fb_ns[i]}" \
+ chk_fallback_nr ${netns[i]} || rc=${?}
+ else
+ chk_fallback_nr ${netns[i]} || rc=${?}
+ fi
+ done
+
+ if [ "${rc}" != "${KSFT_PASS}" ]; then
+ print_results "fallback" ${rc}
+ fi
+}
+
chk_join_nr()
{
local syn_nr=$1
local syn_ack_nr=$2
local ack_nr=$3
- local csum_ns1=${4:-0}
- local csum_ns2=${5:-0}
- local fail_nr=${6:-0}
- local rst_nr=${7:-0}
- local infi_nr=${8:-0}
- local corrupted_pkts=${9:-0}
+ local syn_rej=${join_syn_rej:-0}
+ local csum_ns1=${join_csum_ns1:-0}
+ local csum_ns2=${join_csum_ns2:-0}
+ local fail_nr=${join_fail_nr:-0}
+ local rst_nr=${join_rst_nr:-0}
+ local infi_nr=${join_infi_nr:-0}
+ local corrupted_pkts=${join_corrupted_pkts:-0}
+ local rc=${KSFT_PASS}
local count
local with_cookie
@@ -1326,43 +1564,73 @@ chk_join_nr()
print_info "${corrupted_pkts} corrupted pkts"
fi
- print_check "syn"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx")
if [ -z "$count" ]; then
- print_skip
+ rc=${KSFT_SKIP}
elif [ "$count" != "$syn_nr" ]; then
- fail_test "got $count JOIN[s] syn expected $syn_nr"
- else
- print_ok
+ rc=${KSFT_FAIL}
+ print_check "syn rx"
+ fail_test "got $count JOIN[s] syn rx expected $syn_nr"
fi
- print_check "synack"
with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies)
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
if [ -z "$count" ]; then
- print_skip
+ rc=${KSFT_SKIP}
elif [ "$count" != "$syn_ack_nr" ]; then
# simult connections exceeding the limit with cookie enabled could go up to
# synack validation as the conn limit can be enforced reliably only after
# the subflow creation
- if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then
- print_ok
- else
- fail_test "got $count JOIN[s] synack expected $syn_ack_nr"
+ if [ "$with_cookie" != 2 ] || [ "$count" -le "$syn_ack_nr" ] || [ "$count" -gt "$syn_nr" ]; then
+ rc=${KSFT_FAIL}
+ print_check "synack rx"
+ fail_test "got $count JOIN[s] synack rx expected $syn_ack_nr"
fi
- else
- print_ok
fi
- print_check "ack"
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "0" ]; then
+ rc=${KSFT_FAIL}
+ print_check "synack HMAC"
+ fail_test "got $count JOIN[s] synack HMAC failure expected 0"
+ fi
+
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
if [ -z "$count" ]; then
- print_skip
+ rc=${KSFT_SKIP}
elif [ "$count" != "$ack_nr" ]; then
- fail_test "got $count JOIN[s] ack expected $ack_nr"
- else
- print_ok
+ rc=${KSFT_FAIL}
+ print_check "ack rx"
+ fail_test "got $count JOIN[s] ack rx expected $ack_nr"
fi
+
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "0" ]; then
+ rc=${KSFT_FAIL}
+ print_check "ack HMAC"
+ fail_test "got $count JOIN[s] ack HMAC failure expected 0"
+ fi
+
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$syn_rej" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn rejected"
+ fail_test "got $count JOIN[s] syn rejected expected $syn_rej"
+ fi
+
+ print_results "join Rx" ${rc}
+
+ join_syn_tx="${join_syn_tx:-${syn_nr}}" \
+ chk_join_tx_nr
+
+ chk_fallback_nr_all
+
if $validate_checksum; then
chk_csum_nr $csum_ns1 $csum_ns2
chk_fail_nr $fail_nr $fail_nr
@@ -1415,30 +1683,39 @@ chk_add_nr()
local add_nr=$1
local echo_nr=$2
local port_nr=${3:-0}
- local syn_nr=${4:-$port_nr}
- local syn_ack_nr=${5:-$port_nr}
- local ack_nr=${6:-$port_nr}
- local mis_syn_nr=${7:-0}
- local mis_ack_nr=${8:-0}
+ local ns_invert=${4:-""}
+ local syn_nr=$port_nr
+ local syn_ack_nr=$port_nr
+ local ack_nr=$port_nr
+ local mis_syn_nr=0
+ local mis_ack_nr=0
+ local ns_tx=$ns1
+ local ns_rx=$ns2
+ local tx=""
+ local rx=""
local count
- local timeout
- timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
+ if [[ $ns_invert = "invert" ]]; then
+ ns_tx=$ns2
+ ns_rx=$ns1
+ tx=" client"
+ rx=" server"
+ fi
- print_check "add"
- count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr")
+ print_check "add addr rx${rx}"
+ count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr")
if [ -z "$count" ]; then
print_skip
- # if the test configured a short timeout tolerate greater then expected
- # add addrs options, due to retransmissions
- elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then
+ # Tolerate more ADD_ADDR then expected (if any), due to retransmissions
+ elif [ "$count" != "$add_nr" ] &&
+ { [ "$add_nr" -eq 0 ] || [ "$count" -lt "$add_nr" ]; }; then
fail_test "got $count ADD_ADDR[s] expected $add_nr"
else
print_ok
fi
- print_check "echo"
- count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd")
+ print_check "add addr echo rx${tx}"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$echo_nr" ]; then
@@ -1448,8 +1725,8 @@ chk_add_nr()
fi
if [ $port_nr -gt 0 ]; then
- print_check "pt"
- count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd")
+ print_check "add addr rx with port${rx}"
+ count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$port_nr" ]; then
@@ -1458,8 +1735,8 @@ chk_add_nr()
print_ok
fi
- print_check "syn"
- count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
+ print_check "syn rx port${tx}"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$syn_nr" ]; then
@@ -1469,8 +1746,8 @@ chk_add_nr()
print_ok
fi
- print_check "synack"
- count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
+ print_check "synack rx port${rx}"
+ count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$syn_ack_nr" ]; then
@@ -1480,8 +1757,8 @@ chk_add_nr()
print_ok
fi
- print_check "ack"
- count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
+ print_check "ack rx port${tx}"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$ack_nr" ]; then
@@ -1491,8 +1768,8 @@ chk_add_nr()
print_ok
fi
- print_check "syn"
- count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
+ print_check "syn rx port mismatch${tx}"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$mis_syn_nr" ]; then
@@ -1502,8 +1779,8 @@ chk_add_nr()
print_ok
fi
- print_check "ack"
- count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
+ print_check "ack rx port mismatch${tx}"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$mis_ack_nr" ]; then
@@ -1519,24 +1796,21 @@ chk_add_tx_nr()
{
local add_tx_nr=$1
local echo_tx_nr=$2
- local timeout
local count
- timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
-
- print_check "add TX"
+ print_check "add addr tx"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
if [ -z "$count" ]; then
print_skip
- # if the test configured a short timeout tolerate greater then expected
- # add addrs options, due to retransmissions
- elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
+ # Tolerate more ADD_ADDR then expected (if any), due to retransmissions
+ elif [ "$count" != "$add_tx_nr" ] &&
+ { [ "$add_tx_nr" -eq 0 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr"
else
print_ok
fi
- print_check "echo TX"
+ print_check "add addr echo tx"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx")
if [ -z "$count" ]; then
print_skip
@@ -1556,6 +1830,8 @@ chk_rm_nr()
local count
local addr_ns=$ns1
local subflow_ns=$ns2
+ local addr="server"
+ local subflow="client"
local extra_msg=""
shift 2
@@ -1565,16 +1841,14 @@ chk_rm_nr()
shift
done
- if [ -z $invert ]; then
- addr_ns=$ns1
- subflow_ns=$ns2
- elif [ $invert = "true" ]; then
+ if [ "$invert" = "true" ]; then
addr_ns=$ns2
subflow_ns=$ns1
- extra_msg="invert"
+ addr="client"
+ subflow="server"
fi
- print_check "rm"
+ print_check "rm addr rx ${addr}"
count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr")
if [ -z "$count" ]; then
print_skip
@@ -1584,7 +1858,7 @@ chk_rm_nr()
print_ok
fi
- print_check "rmsf"
+ print_check "rm subflow ${subflow}"
count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow")
if [ -z "$count" ]; then
print_skip
@@ -1598,7 +1872,7 @@ chk_rm_nr()
count=$((count + cnt))
if [ "$count" != "$rm_subflow_nr" ]; then
suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
- extra_msg+=" simult"
+ extra_msg="simult"
fi
if [ $count -ge "$rm_subflow_nr" ] && \
[ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
@@ -1619,7 +1893,7 @@ chk_rm_tx_nr()
{
local rm_addr_tx_nr=$1
- print_check "rm TX"
+ print_check "rm addr tx client"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx")
if [ -z "$count" ]; then
print_skip
@@ -1634,9 +1908,11 @@ chk_prio_nr()
{
local mp_prio_nr_tx=$1
local mp_prio_nr_rx=$2
+ local mpj_syn=$3
+ local mpj_syn_ack=$4
local count
- print_check "ptx"
+ print_check "mp_prio tx server"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx")
if [ -z "$count" ]; then
print_skip
@@ -1646,7 +1922,7 @@ chk_prio_nr()
print_ok
fi
- print_check "prx"
+ print_check "mp_prio rx client"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx")
if [ -z "$count" ]; then
print_skip
@@ -1655,6 +1931,26 @@ chk_prio_nr()
else
print_ok
fi
+
+ print_check "syn backup"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynBackupRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$mpj_syn" ]; then
+ fail_test "got $count JOIN[s] syn with Backup expected $mpj_syn"
+ else
+ print_ok
+ fi
+
+ print_check "synack backup"
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckBackupRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$mpj_syn_ack" ]; then
+ fail_test "got $count JOIN[s] synack with Backup expected $mpj_syn_ack"
+ else
+ print_ok
+ fi
}
chk_subflow_nr()
@@ -1819,7 +2115,8 @@ subflows_tests()
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
fi
# subflow
@@ -1848,7 +2145,8 @@ subflows_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 2 2 1
+ join_syn_rej=1 \
+ chk_join_nr 2 2 1
fi
# single subflow, dev
@@ -1869,9 +2167,11 @@ subflows_error_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.12.2 flags subflow
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
+ join_bind_err=1 \
+ chk_join_nr 0 0 0
fi
# multiple subflows, with subflow creation error
@@ -1883,7 +2183,8 @@ subflows_error_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=2 \
+ chk_join_nr 1 1 1
fi
# multiple subflows, with subflow timeout on MPJ
@@ -1895,7 +2196,8 @@ subflows_error_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=2 \
+ chk_join_nr 1 1 1
fi
# multiple subflows, check that the endpoint corresponding to
@@ -1916,7 +2218,8 @@ subflows_error_tests()
# additional subflow could be created only if the PM select
# the later endpoint, skipping the already used one
- chk_join_nr 1 1 1
+ join_syn_tx=2 \
+ chk_join_nr 1 1 1
fi
}
@@ -1955,6 +2258,21 @@ signal_address_tests()
chk_add_nr 1 1
fi
+ # uncommon: subflow and signal flags on the same endpoint
+ # or because the user wrongly picked both, but still expects the client
+ # to create additional subflows
+ if reset "subflow and signal together"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags signal,subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1 0 invert # only initiated by ns2
+ chk_add_nr 0 0 0 # none initiated by ns1
+ chk_rst_nr 0 0 invert # no RST sent by the client
+ chk_rst_nr 0 0 # no RST sent by the server
+ fi
+
# accept and use add_addr with additional subflows
if reset "multiple subflows and signal"; then
pm_nl_set_limits $ns1 0 3
@@ -1974,7 +2292,8 @@ signal_address_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
pm_nl_set_limits $ns2 3 3
- run_tests $ns1 $ns2 10.0.1.1
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
chk_add_nr 3 3
fi
@@ -1986,8 +2305,10 @@ signal_address_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
pm_nl_set_limits $ns2 3 3
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ join_syn_tx=3 \
+ chk_join_nr 1 1 1
chk_add_nr 3 3
fi
@@ -2023,6 +2344,74 @@ signal_address_tests()
fi
}
+laminar_endp_tests()
+{
+ # no laminar endpoints: routing rules are used
+ if reset_with_tcp_filter "without a laminar endpoint" ns1 10.0.2.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ join_syn_tx=1 \
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+ fi
+
+ # laminar endpoints: this endpoint is used
+ if reset_with_tcp_filter "with a laminar endpoint" ns1 10.0.2.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ fi
+
+ # laminar endpoints: these endpoints are used
+ if reset_with_tcp_filter "with multiple laminar endpoints" ns1 10.0.2.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns2 dead:beef:3::2 flags laminar
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags laminar
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 2 2
+ fi
+
+ # laminar endpoints: only one endpoint is used
+ if reset_with_tcp_filter "single laminar endpoint" ns1 10.0.2.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 2 2
+ fi
+
+ # laminar endpoints: subflow and laminar flags
+ if reset_with_tcp_filter "sublow + laminar endpoints" ns1 10.0.2.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ pm_nl_set_limits $ns1 0 4
+ pm_nl_set_limits $ns2 2 4
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,laminar
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,laminar
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ fi
+}
+
link_failure_tests()
{
# accept and use add_addr with additional subflows and link loss
@@ -2155,7 +2544,8 @@ add_addr_timeout_tests()
pm_nl_set_limits $ns2 2 2
speed=10 \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=2 \
+ chk_join_nr 1 1 1
chk_add_nr 8 0
fi
}
@@ -2166,7 +2556,7 @@ remove_tests()
if reset "remove single subflow"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
addr_nr_ns2=-1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
@@ -2179,8 +2569,8 @@ remove_tests()
if reset "remove multiple subflows"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
- pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
addr_nr_ns2=-2 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
@@ -2191,7 +2581,7 @@ remove_tests()
# single address, remove
if reset "remove single address"; then
pm_nl_set_limits $ns1 0 1
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 1
addr_nr_ns1=-1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
@@ -2204,9 +2594,9 @@ remove_tests()
# subflow and signal, remove
if reset "remove subflow and signal"; then
pm_nl_set_limits $ns1 0 2
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 2
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
@@ -2218,10 +2608,10 @@ remove_tests()
# subflows and signal, remove
if reset "remove subflows and signal"; then
pm_nl_set_limits $ns1 0 3
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 3
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
@@ -2233,9 +2623,9 @@ remove_tests()
# addresses remove
if reset "remove addresses"; then
pm_nl_set_limits $ns1 3 3
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
- pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup
pm_nl_set_limits $ns2 3 3
addr_nr_ns1=-3 speed=10 \
run_tests $ns1 $ns2 10.0.1.1
@@ -2248,14 +2638,15 @@ remove_tests()
# invalid addresses remove
if reset "remove invalid addresses"; then
pm_nl_set_limits $ns1 3 3
- pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup
# broadcast IP: no packet for this address will be received on ns1
- pm_nl_add_endpoint $ns1 224.0.0.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 224.0.0.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
pm_nl_set_limits $ns2 2 2
addr_nr_ns1=-3 speed=10 \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=2 join_connect_err=1 \
+ chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
chk_rst_nr 0 0
@@ -2264,10 +2655,10 @@ remove_tests()
# subflows and signal, flush
if reset "flush subflows and signal"; then
pm_nl_set_limits $ns1 0 3
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 3
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
@@ -2280,9 +2671,9 @@ remove_tests()
if reset "flush subflows"; then
pm_nl_set_limits $ns1 3 3
pm_nl_set_limits $ns2 3 3
- pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup id 150
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
@@ -2299,9 +2690,9 @@ remove_tests()
# addresses flush
if reset "flush addresses"; then
pm_nl_set_limits $ns1 3 3
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
- pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup
pm_nl_set_limits $ns2 3 3
addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
@@ -2314,13 +2705,14 @@ remove_tests()
# invalid addresses flush
if reset "flush invalid addresses"; then
pm_nl_set_limits $ns1 3 3
- pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.14.1 flags signal,backup
pm_nl_set_limits $ns2 3 3
addr_nr_ns1=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=3 \
+ chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
chk_rst_nr 0 0
@@ -2584,7 +2976,11 @@ mixed_tests()
pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
speed=slow \
run_tests $ns1 $ns2 dead:beef:2::1
- chk_join_nr 1 1 1
+ if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_fullmesh_max$"; then
+ chk_join_nr 0 0 0
+ else
+ chk_join_nr 1 1 1
+ fi
fi
# fullmesh still tries to create all the possibly subflows with
@@ -2612,33 +3008,46 @@ backup_tests()
sflags=nobackup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 1 0
fi
# single address, backup
if reset "single address, backup" &&
continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
+ pm_nl_set_limits $ns2 1 1
+ sflags=nobackup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ chk_prio_nr 1 0 0 1
+ fi
+
+ # single address, switch to backup
+ if reset "single address, switch to backup" &&
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_set_limits $ns2 1 1
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
- chk_prio_nr 1 1
+ chk_prio_nr 1 1 0 0
fi
# single address with port, backup
if reset "single address with port, backup" &&
continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 1
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup port 10100
pm_nl_set_limits $ns2 1 1
- sflags=backup speed=slow \
+ sflags=nobackup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
- chk_prio_nr 1 1
+ chk_prio_nr 1 0 0 1
fi
if reset "mpc backup" &&
@@ -2647,17 +3056,26 @@ backup_tests()
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 0 0
fi
if reset "mpc backup both sides" &&
continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then
- pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns1 10.0.1.1 flags signal,backup
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
+
+ # 10.0.2.2 (non-backup) -> 10.0.1.1 (backup)
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+ # 10.0.1.2 (backup) -> 10.0.2.1 (non-backup)
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ ip -net "$ns2" route add 10.0.2.1 via 10.0.1.1 dev ns2eth1 # force this path
+
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- chk_prio_nr 1 1
+ chk_join_nr 2 2 2
+ chk_prio_nr 1 1 1 1
fi
if reset "mpc switch to backup" &&
@@ -2666,7 +3084,7 @@ backup_tests()
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 0 0
fi
if reset "mpc switch to backup both sides" &&
@@ -2676,7 +3094,7 @@ backup_tests()
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
- chk_prio_nr 1 1
+ chk_prio_nr 1 1 0 0
fi
}
@@ -2709,6 +3127,32 @@ verify_listener_events()
fail_test
}
+chk_mpc_endp_attempt()
+{
+ local retl=$1
+ local attempts=$2
+
+ print_check "Connect"
+
+ if [ ${retl} = 124 ]; then
+ fail_test "timeout on connect"
+ elif [ ${retl} = 0 ]; then
+ fail_test "unexpected successful connect"
+ else
+ print_ok
+
+ print_check "Attempts"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPCapableEndpAttempt")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$attempts" ]; then
+ fail_test "got ${count} MPC attempt[s] on port-based endpoint, expected ${attempts}"
+ else
+ print_ok
+ fi
+ fi
+}
+
add_addr_ports_tests()
{
# signal address with port
@@ -2799,6 +3243,149 @@ add_addr_ports_tests()
chk_join_nr 2 2 2
chk_add_nr 2 2 2
fi
+
+ if reset "port-based signal endpoint must not accept mpc"; then
+ local port retl count
+ port=$(get_port)
+
+ cond_start_capture ${ns1}
+ pm_nl_add_endpoint ${ns1} 10.0.2.1 flags signal port ${port}
+ mptcp_lib_wait_local_port_listen ${ns1} ${port}
+
+ timeout 1 ip netns exec ${ns2} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s MPTCP 10.0.2.1 >/dev/null 2>&1
+ retl=$?
+ cond_stop_capture
+
+ chk_mpc_endp_attempt ${retl} 1
+ fi
+}
+
+bind_tests()
+{
+ # bind to one address should not allow extra subflows to other addresses
+ if reset "bind main address v4, no join v4"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ bind_addr="10.0.1.1" \
+ run_tests $ns1 $ns2 10.0.1.1
+ join_syn_tx=1 \
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+ fi
+
+ # bind to one address should not allow extra subflows to other addresses
+ if reset "bind main address v6, no join v6"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ bind_addr="dead:beef:1::1" \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ join_syn_tx=1 \
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+ fi
+
+ # multiple binds to allow extra subflows to other addresses
+ if reset "multiple bind to allow joins v4"; then
+ local extra_bind
+
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+
+ # Launching another app listening on a different address
+ # Note: it could be a totally different app, e.g. nc, socat, ...
+ ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+ -s MPTCP 10.0.2.1 &
+ extra_bind=$!
+
+ bind_addr="10.0.1.1" \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+
+ kill ${extra_bind}
+ fi
+
+ # multiple binds to allow extra subflows to other addresses
+ if reset "multiple bind to allow joins v6"; then
+ local extra_bind
+
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+
+ # Launching another app listening on a different address
+ # Note: it could be a totally different app, e.g. nc, socat, ...
+ ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+ -s MPTCP dead:beef:2::1 &
+ extra_bind=$!
+
+ bind_addr="dead:beef:1::1" \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+
+ kill ${extra_bind}
+ fi
+
+ # multiple binds to allow extra subflows to other addresses: v6 LL case
+ if reset "multiple bind to allow joins v6 link-local routing"; then
+ local extra_bind ns1ll1 ns1ll2
+
+ ns1ll1="$(get_ll_addr $ns1 ns1eth1)"
+ ns1ll2="$(get_ll_addr $ns1 ns1eth2)"
+
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal
+
+ wait_ll_ready $ns1 # to be able to bind
+ wait_ll_ready $ns2 # also needed to bind on the client side
+ ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+ -s MPTCP "${ns1ll2}%ns1eth2" &
+ extra_bind=$!
+
+ bind_addr="${ns1ll1}%ns1eth1" \
+ run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1"
+ # it is not possible to connect to the announced LL addr without
+ # specifying the outgoing interface.
+ join_connect_err=1 \
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+
+ kill ${extra_bind}
+ fi
+
+ # multiple binds to allow extra subflows to v6 LL addresses: laminar
+ if reset "multiple bind to allow joins v6 link-local laminar" &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ local extra_bind ns1ll1 ns1ll2 ns2ll2
+
+ ns1ll1="$(get_ll_addr $ns1 ns1eth1)"
+ ns1ll2="$(get_ll_addr $ns1 ns1eth2)"
+ ns2ll2="$(get_ll_addr $ns2 ns2eth2)"
+
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal
+ pm_nl_add_endpoint $ns2 "${ns2ll2}" flags laminar dev ns2eth2
+
+ wait_ll_ready $ns1 # to be able to bind
+ wait_ll_ready $ns2 # also needed to bind on the client side
+ ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+ -s MPTCP "${ns1ll2}%ns1eth2" &
+ extra_bind=$!
+
+ bind_addr="${ns1ll1}%ns1eth1" \
+ run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1"
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+
+ kill ${extra_bind}
+ fi
}
syncookies_tests()
@@ -2829,7 +3416,8 @@ syncookies_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 2 1 1
+ join_syn_rej=1 \
+ chk_join_nr 2 1 1
fi
# test signal address with cookies
@@ -2868,37 +3456,16 @@ syncookies_tests()
checksum_tests()
{
- # checksum test 0 0
- if reset_with_checksum 0 0; then
- pm_nl_set_limits $ns1 0 1
- pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- fi
-
- # checksum test 1 1
- if reset_with_checksum 1 1; then
- pm_nl_set_limits $ns1 0 1
- pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- fi
-
- # checksum test 0 1
- if reset_with_checksum 0 1; then
- pm_nl_set_limits $ns1 0 1
- pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- fi
-
- # checksum test 1 0
- if reset_with_checksum 1 0; then
- pm_nl_set_limits $ns1 0 1
- pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- fi
+ local checksum_enable
+ for checksum_enable in "0 0" "1 1" "0 1" "1 0"; do
+ # checksum test 0 0, 1 1, 0 1, 1 0
+ if reset_with_checksum ${checksum_enable}; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+ done
}
deny_join_id0_tests()
@@ -2962,6 +3529,17 @@ deny_join_id0_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
fi
+
+ # default limits, server deny join id 0 + signal
+ if reset_with_allow_join_id0 "default limits, server deny join id 0" 0 1; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ fi
}
fullmesh_tests()
@@ -2987,6 +3565,9 @@ fullmesh_tests()
pm_nl_set_limits $ns1 1 3
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh
+ fi
fullmesh=1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
@@ -3053,7 +3634,7 @@ fullmesh_tests()
addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 1 0
chk_rm_nr 0 1
fi
@@ -3066,7 +3647,7 @@ fullmesh_tests()
sflags=nobackup,nofullmesh speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 1 0
chk_rm_nr 0 1
fi
}
@@ -3074,7 +3655,6 @@ fullmesh_tests()
fastclose_tests()
{
if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
- MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=client \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
@@ -3083,10 +3663,10 @@ fastclose_tests()
fi
if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
- MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=server \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0 0 0 0 1
+ join_rst_nr=1 \
+ chk_join_nr 0 0 0
chk_fclose_nr 1 1 invert
chk_rst_nr 1 1
fi
@@ -3105,7 +3685,11 @@ fail_tests()
MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=128 \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
+ join_csum_ns1=+1 join_csum_ns2=+0 \
+ join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \
+ join_corrupted_pkts="$(pedit_action_pkts)" \
+ fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \
+ chk_join_nr 0 0 0
chk_fail_nr 1 -1 invert
fi
@@ -3118,7 +3702,10 @@ fail_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
test_linkfail=1024 \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)"
+ join_csum_ns1=1 join_csum_ns2=0 \
+ join_fail_nr=1 join_rst_nr=1 join_infi_nr=0 \
+ join_corrupted_pkts="$(pedit_action_pkts)" \
+ chk_join_nr 1 1 1
fi
}
@@ -3260,6 +3847,36 @@ userspace_pm_chk_get_addr()
fi
}
+# $1: ns ; $2: event type ; $3: count
+chk_evt_nr()
+{
+ local ns=${1}
+ local evt_name="${2}"
+ local exp="${3}"
+
+ local evts="${evts_ns1}"
+ local evt="${!evt_name}"
+ local count
+
+ evt_name="${evt_name:16}" # without MPTCP_LIB_EVENT_
+ [ "${ns}" == "ns2" ] && evts="${evts_ns2}"
+
+ print_check "event ${ns} ${evt_name} (${exp})"
+
+ if [[ "${evt_name}" = "LISTENER_"* ]] &&
+ ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
+ print_skip "event not supported"
+ return
+ fi
+
+ count=$(grep -cw "type:${evt}" "${evts}")
+ if [ "${count}" != "${exp}" ]; then
+ fail_test "got ${count} events, expected ${exp}"
+ else
+ print_ok
+ fi
+}
+
userspace_tests()
{
# userspace pm type prevents add_addr
@@ -3294,7 +3911,8 @@ userspace_tests()
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
fi
# userspace pm type does not send join
@@ -3317,8 +3935,9 @@ userspace_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
- chk_prio_nr 0 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
+ chk_prio_nr 0 0 0 0
fi
# userspace pm type prevents rm_addr
@@ -3340,8 +3959,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 2 2
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { timeout_test=120 test_linkfail=128 speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
userspace_pm_add_addr $ns1 10.0.2.1 10
@@ -3356,18 +3975,16 @@ userspace_tests()
"signal"
userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1"
userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1"
- userspace_pm_rm_addr $ns1 10
userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED
userspace_pm_chk_dump_addr "${ns1}" \
- "id 20 flags signal 10.0.3.1" "after rm_addr 10"
+ "id 20 flags signal 10.0.3.1" "after rm_sf 10"
userspace_pm_rm_addr $ns1 20
- userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20"
- chk_rm_nr 2 2 invert
+ chk_rm_nr 1 1 invert
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
# userspace pm create destroy subflow
@@ -3375,8 +3992,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { timeout_test=120 test_linkfail=128 speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
userspace_pm_add_sf $ns2 10.0.3.2 20
@@ -3387,16 +4004,15 @@ userspace_tests()
"id 20 flags subflow 10.0.3.2" \
"subflow"
userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2"
- userspace_pm_rm_addr $ns2 20
userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
userspace_pm_chk_dump_addr "${ns2}" \
"" \
- "after rm_addr 20"
- chk_rm_nr 1 1
+ "after rm_sf 20"
+ chk_rm_nr 0 1
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
# userspace pm create id 0 subflow
@@ -3404,8 +4020,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { timeout_test=120 test_linkfail=128 speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
chk_mptcp_info subflows 0 subflows 0
@@ -3417,7 +4033,7 @@ userspace_tests()
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
# userspace pm remove initial subflow
@@ -3425,8 +4041,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { timeout_test=120 test_linkfail=128 speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
userspace_pm_add_sf $ns2 10.0.3.2 20
@@ -3441,7 +4057,7 @@ userspace_tests()
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 1 1
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
# userspace pm send RM_ADDR for ID 0
@@ -3449,8 +4065,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 1 1
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { timeout_test=120 test_linkfail=128 speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
userspace_pm_add_addr $ns1 10.0.2.1 10
@@ -3467,7 +4083,7 @@ userspace_tests()
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 1 1
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
}
@@ -3476,12 +4092,12 @@ endpoint_tests()
# subflow_rebuild_header is needed to support the implicit flag
# userspace pm type prevents add_addr
if reset "implicit EP" &&
- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- speed=slow \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { timeout_test=120 test_linkfail=128 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
@@ -3497,34 +4113,196 @@ endpoint_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
pm_nl_check_endpoint "modif is allowed" \
$ns2 10.0.2.2 id 1 flags signal
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
- if reset "delete and re-add" &&
- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
- pm_nl_set_limits $ns1 1 1
- pm_nl_set_limits $ns2 1 1
+ if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ start_events
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_set_limits $ns2 0 3
+ pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
- test_linkfail=4 speed=20 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { timeout_test=120 test_linkfail=128 speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
pm_nl_check_endpoint "creation" \
$ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
- chk_subflow_nr "before delete" 2
+ chk_subflow_nr "before delete id 2" 2
chk_mptcp_info subflows 1 subflows 1
pm_nl_del_endpoint $ns2 2 10.0.2.2
sleep 0.5
- chk_subflow_nr "after delete" 1
+ chk_subflow_nr "after delete id 2" 1
chk_mptcp_info subflows 0 subflows 0
- pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
wait_mpj $ns2
- chk_subflow_nr "after re-add" 2
+ chk_subflow_nr "after re-add id 2" 2
chk_mptcp_info subflows 1 subflows 1
- mptcp_lib_kill_wait $tests_pid
+
+ pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
+ wait_attempt_fail $ns2
+ chk_subflow_nr "after new reject" 2
+ chk_mptcp_info subflows 1 subflows 1
+
+ ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
+ pm_nl_del_endpoint $ns2 3 10.0.3.2
+ pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
+ wait_mpj $ns2
+ chk_subflow_nr "after no reject" 3
+ chk_mptcp_info subflows 2 subflows 2
+
+ local i
+ for i in $(seq 3); do
+ pm_nl_del_endpoint $ns2 1 10.0.1.2
+ sleep 0.5
+ chk_subflow_nr "after delete id 0 ($i)" 2
+ chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf
+
+ pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
+ wait_mpj $ns2
+ chk_subflow_nr "after re-add id 0 ($i)" 3
+ chk_mptcp_info subflows 3 subflows 3
+ done
+
+ mptcp_lib_kill_group_wait $tests_pid
+
+ kill_events_pids
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 4
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 4
+
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 0
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 0
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 5 # one has been closed before estab
+
+ join_syn_tx=7 \
+ chk_join_nr 6 6 6
+ chk_rm_nr 4 4
+ fi
+
+ # remove and re-add
+ if reset_with_events "delete re-add signal" &&
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_set_limits $ns2 3 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
+ # broadcast IP: no packet for this address will be received on ns1
+ pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
+ pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal
+ { timeout_test=120 test_linkfail=128 speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
+ local tests_pid=$!
+
+ wait_mpj $ns2
+ pm_nl_check_endpoint "creation" \
+ $ns1 10.0.2.1 id 1 flags signal
+ chk_subflow_nr "before delete" 2
+ chk_mptcp_info subflows 1 subflows 1
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 1
+
+ pm_nl_del_endpoint $ns1 1 10.0.2.1
+ pm_nl_del_endpoint $ns1 2 224.0.0.1
+ sleep 0.5
+ chk_subflow_nr "after delete" 1
+ chk_mptcp_info subflows 0 subflows 0
+ chk_mptcp_info add_addr_signal 0 add_addr_accepted 0
+
+ pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
+ wait_mpj $ns2
+ chk_subflow_nr "after re-add" 3
+ chk_mptcp_info subflows 2 subflows 2
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
+
+ pm_nl_del_endpoint $ns1 42 10.0.1.1
+ sleep 0.5
+ chk_subflow_nr "after delete ID 0" 2
+ chk_mptcp_info subflows 2 subflows 2
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
+
+ pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
+ wait_mpj $ns2
+ chk_subflow_nr "after re-add ID 0" 3
+ chk_mptcp_info subflows 3 subflows 3
+ chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
+
+ pm_nl_del_endpoint $ns1 99 10.0.1.1
+ sleep 0.5
+ chk_subflow_nr "after re-delete ID 0" 2
+ chk_mptcp_info subflows 2 subflows 2
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
+
+ pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal
+ wait_mpj $ns2
+ chk_subflow_nr "after re-re-add ID 0" 3
+ chk_mptcp_info subflows 3 subflows 3
+ chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
+ mptcp_lib_kill_group_wait $tests_pid
+
+ kill_events_pids
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 0
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 3
+
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 6
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 4
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 3
+
+ join_connect_err=1 \
+ chk_join_nr 5 5 5
+ chk_add_nr 6 6
+ chk_rm_nr 4 3 invert
+ fi
+
+ # flush and re-add
+ if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT &&
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 2
+ # broadcast IP: no packet for this address will be received on ns1
+ pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
+ { timeout_test=120 test_linkfail=128 speed=20 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
+ local tests_pid=$!
+
+ wait_attempt_fail $ns2
+ chk_subflow_nr "before flush" 1
+ chk_mptcp_info subflows 0 subflows 0
+
+ pm_nl_flush_endpoint $ns2
+ pm_nl_flush_endpoint $ns1
+ wait_rm_addr $ns2 0
+ ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
+ pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
+ wait_mpj $ns2
+ pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
+ wait_mpj $ns2
+ mptcp_lib_kill_group_wait $tests_pid
+
+ join_syn_tx=3 join_connect_err=1 \
+ chk_join_nr 2 2 2
+ chk_add_nr 2 2
+ chk_rm_nr 1 0 invert
fi
}
@@ -3559,6 +4337,7 @@ all_tests_sorted=(
f@subflows_tests
e@subflows_error_tests
s@signal_address_tests
+ L@laminar_endp_tests
l@link_failure_tests
t@add_addr_timeout_tests
r@remove_tests
@@ -3568,6 +4347,7 @@ all_tests_sorted=(
M@mixed_tests
b@backup_tests
p@add_addr_ports_tests
+ B@bind_tests
k@syncookies_tests
S@checksum_tests
d@deny_join_id0_tests
@@ -3627,9 +4407,11 @@ if [ ${#tests[@]} -eq 0 ]; then
tests=("${all_tests_names[@]}")
fi
+mptcp_lib_subtests_last_ts_reset
for subtests in "${tests[@]}"; do
"${subtests}"
done
+append_prev_results
if [ ${ret} -ne 0 ]; then
echo
@@ -3640,7 +4422,6 @@ if [ ${ret} -ne 0 ]; then
echo
fi
-append_prev_results
mptcp_lib_result_print_all_tap
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 438280e68434..5fea7e7df628 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
. "$(dirname "${0}")/../lib.sh"
-. "$(dirname "${0}")/../net_helper.sh"
readonly KSFT_PASS=0
readonly KSFT_FAIL=1
@@ -12,10 +11,14 @@ readonly KSFT_SKIP=4
readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}"
# These variables are used in some selftests, read-only
+declare -rx MPTCP_LIB_EVENT_CREATED=1 # MPTCP_EVENT_CREATED
+declare -rx MPTCP_LIB_EVENT_ESTABLISHED=2 # MPTCP_EVENT_ESTABLISHED
+declare -rx MPTCP_LIB_EVENT_CLOSED=3 # MPTCP_EVENT_CLOSED
declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED
declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED
declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED
+declare -rx MPTCP_LIB_EVENT_SUB_PRIORITY=13 # MPTCP_EVENT_SUB_PRIORITY
declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED
declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED
@@ -25,6 +28,7 @@ declare -rx MPTCP_LIB_AF_INET6=10
MPTCP_LIB_SUBTESTS=()
MPTCP_LIB_SUBTESTS_DUPLICATED=0
MPTCP_LIB_SUBTEST_FLAKY=0
+MPTCP_LIB_SUBTESTS_LAST_TS_MS=
MPTCP_LIB_TEST_COUNTER=0
MPTCP_LIB_TEST_FORMAT="%02u %-50s"
MPTCP_LIB_IP_MPTCP=0
@@ -102,6 +106,36 @@ mptcp_lib_pr_info() {
mptcp_lib_print_info "INFO: ${*}"
}
+mptcp_lib_pr_nstat() {
+ local ns="${1}"
+ local hist="/tmp/${ns}.out"
+
+ if [ -f "${hist}" ]; then
+ awk '$2 != 0 { print " "$0 }' "${hist}"
+ else
+ ip netns exec "${ns}" nstat -as | grep Tcp
+ fi
+}
+
+# $1-2: listener/connector ns ; $3 port
+mptcp_lib_pr_err_stats() {
+ local lns="${1}"
+ local cns="${2}"
+ local port="${3}"
+
+ echo -en "${MPTCP_LIB_COLOR_RED}"
+ {
+ printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}"
+ ip netns exec "${lns}" ss -Menitam -o "sport = :${port}"
+ mptcp_lib_pr_nstat "${lns}"
+
+ printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}"
+ ip netns exec "${cns}" ss -Menitam -o "dport = :${port}"
+ [ "${lns}" != "${cns}" ] && mptcp_lib_pr_nstat "${cns}"
+ } 1>&2
+ echo -en "${MPTCP_LIB_COLOR_RESET}"
+}
+
# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all
# features using the last version of the kernel and the selftests to make sure
# a test is not being skipped by mistake.
@@ -201,6 +235,11 @@ mptcp_lib_kversion_ge() {
mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}"
}
+mptcp_lib_subtests_last_ts_reset() {
+ MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)"
+}
+mptcp_lib_subtests_last_ts_reset
+
__mptcp_lib_result_check_duplicated() {
local subtest
@@ -215,13 +254,22 @@ __mptcp_lib_result_check_duplicated() {
__mptcp_lib_result_add() {
local result="${1}"
+ local time="time="
+ local ts_prev_ms
shift
local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1))
__mptcp_lib_result_check_duplicated "${*}"
- MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}")
+ # not to add two '#'
+ [[ "${*}" != *"#"* ]] && time="# ${time}"
+
+ ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}"
+ mptcp_lib_subtests_last_ts_reset
+ time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms"
+
+ MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}")
}
# $1: test name
@@ -291,12 +339,28 @@ mptcp_lib_result_print_all_tap() {
# get the value of keyword $1 in the line marked by keyword $2
mptcp_lib_get_info_value() {
- grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+ grep "${2}" 2>/dev/null |
+ sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+ # the ';q' at the end limits to the first matched entry.
}
# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]]
mptcp_lib_evts_get_info() {
- grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
+ grep "${4:-}" "${2}" 2>/dev/null |
+ mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
+}
+
+mptcp_lib_wait_timeout() {
+ local timeout_test="${1}"
+ local listener_ns="${2}"
+ local connector_ns="${3}"
+ local port="${4}"
+ shift 4 # rest are PIDs
+
+ sleep "${timeout_test}"
+ mptcp_lib_print_err "timeout"
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
+ kill "${@}" 2>/dev/null
}
# $1: PID
@@ -308,19 +372,62 @@ mptcp_lib_kill_wait() {
wait "${1}" 2>/dev/null
}
+# $1: PID
+mptcp_lib_pid_list_children() {
+ local curr="${1}"
+ # evoke 'ps' only once
+ local pids="${2:-"$(ps o pid,ppid)"}"
+
+ echo "${curr}"
+
+ local pid
+ for pid in $(echo "${pids}" | awk "\$2 == ${curr} { print \$1 }"); do
+ mptcp_lib_pid_list_children "${pid}" "${pids}"
+ done
+}
+
+# $1: PID
+mptcp_lib_kill_group_wait() {
+ # Some users might not have procps-ng: cannot use "kill -- -PID"
+ mptcp_lib_pid_list_children "${1}" | xargs -r kill &>/dev/null
+ wait "${1}" 2>/dev/null
+}
+
# $1: IP address
mptcp_lib_is_v6() {
[ -z "${1##*:*}" ]
}
+mptcp_lib_nstat_init() {
+ local ns="${1}"
+
+ rm -f "/tmp/${ns}."{nstat,out}
+ NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -n
+}
+
+mptcp_lib_nstat_get() {
+ local ns="${1}"
+
+ # filter out non-*TCP stats, and the rate (last column)
+ NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -sz |
+ grep -o ".*Tcp\S\+\s\+[0-9]\+" > "/tmp/${ns}.out"
+}
+
# $1: ns, $2: MIB counter
+# Get the counter from the history (mptcp_lib_nstat_{init,get}()) if available.
+# If not, get the counter from nstat ignoring any history.
mptcp_lib_get_counter() {
local ns="${1}"
local counter="${2}"
+ local hist="/tmp/${ns}.out"
local count
- count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
- awk 'NR==1 {next} {print $2}')
+ if [[ -s "${hist}" && "${counter}" == *"Tcp"* ]]; then
+ count=$(awk "/^${counter} / {print \$2; exit}" "${hist}")
+ else
+ count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
+ awk 'NR==1 {next} {print $2}')
+ fi
if [ -z "${count}" ]; then
mptcp_lib_fail_if_expected_feature "${counter} counter"
return 1
@@ -342,7 +449,7 @@ mptcp_lib_make_file() {
mptcp_lib_print_file_err() {
ls -l "${1}" 1>&2
echo "Trailing bytes are: "
- tail -c 27 "${1}"
+ tail -c 32 "${1}" | od -x | head -n2
}
# $1: input file ; $2: output file ; $3: what kind of file
@@ -436,8 +543,6 @@ mptcp_lib_ns_init() {
local netns
for netns in "${@}"; do
ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1
- ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0
- ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0
done
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index 926b0be87c99..286164f7246e 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -159,13 +159,22 @@ static const char *getxinfo_strerr(int err)
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
- int err = getaddrinfo(node, service, hints, res);
+ int err;
+again:
+ err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -178,7 +187,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -223,7 +232,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
@@ -658,22 +667,26 @@ static void process_one_client(int fd, int pipefd)
do_getsockopts(&s, fd, ret, ret2);
if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
- xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
+ xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64 ", diff %" PRId64,
+ s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - (ret + 1));
/* be nice when running on top of older kernel */
if (s.pkt_stats_avail) {
if (s.last_sample.mptcpi_bytes_sent != ret2)
- xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64,
+ xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64
+ ", diff %" PRId64,
s.last_sample.mptcpi_bytes_sent, ret2,
s.last_sample.mptcpi_bytes_sent - ret2);
if (s.last_sample.mptcpi_bytes_received != ret)
- xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64,
+ xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64
+ ", diff %" PRId64,
s.last_sample.mptcpi_bytes_received, ret,
s.last_sample.mptcpi_bytes_received - ret);
if (s.last_sample.mptcpi_bytes_acked != ret)
- xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64,
- s.last_sample.mptcpi_bytes_acked, ret2,
- s.last_sample.mptcpi_bytes_acked - ret2);
+ xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64
+ ", diff %" PRId64,
+ s.last_sample.mptcpi_bytes_acked, ret,
+ s.last_sample.mptcpi_bytes_acked - ret);
}
close(fd);
@@ -713,6 +726,7 @@ static int server(int pipefd)
process_one_client(r, pipefd);
+ close(fd);
return 0;
}
@@ -838,8 +852,12 @@ int main(int argc, char *argv[])
die_perror("pipe");
s = xfork();
- if (s == 0)
- return server(pipefds[1]);
+ if (s == 0) {
+ close(pipefds[0]);
+ ret = server(pipefds[1]);
+ close(pipefds[1]);
+ return ret;
+ }
close(pipefds[1]);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 68899a303a1a..ab8bce06b262 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -95,7 +95,7 @@ init()
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}"
@@ -169,34 +169,44 @@ do_transfer()
cmsg+=",TCPINQ"
fi
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
- ${local_addr} < "$sin" > "$sout" &
+ mptcp_lib_nstat_init "${listener_ns}"
+ mptcp_lib_nstat_init "${connector_ns}"
+
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
+ ${local_addr} < "$sin" > "$sout" &
local spid=$!
- sleep 1
+ mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
- $connect_addr < "$cin" > "$cout" &
+ ip netns exec ${connector_ns} \
+ $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
+ $connect_addr < "$cin" > "$cout" &
local cpid=$!
+ mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+ "${connector_ns}" "${port}" "${cpid}" "${spid}" &
+ local timeout_pid=$!
+
wait $cpid
local retc=$?
wait $spid
local rets=$?
+ if kill -0 $timeout_pid; then
+ # Finished before the timeout: kill the background job
+ mptcp_lib_kill_group_wait $timeout_pid
+ timeout_pid=0
+ fi
+
+ mptcp_lib_nstat_get "${listener_ns}"
+ mptcp_lib_nstat_get "${connector_ns}"
+
print_title "Transfer ${ip:2}"
- if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
mptcp_lib_pr_fail "client exit code $retc, server $rets"
- echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
-
- echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
mptcp_lib_result_fail "transfer ${ip}"
@@ -349,6 +359,7 @@ init
make_file "$cin" "client" 1
make_file "$sin" "server" 1
trap cleanup EXIT
+mptcp_lib_subtests_last_ts_reset
run_tests $ns1 $ns2 10.0.1.1
run_tests $ns1 $ns2 dead:beef:1::1
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 2757378b1b13..ec6a87588191 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -32,7 +32,7 @@ ns1=""
err=$(mktemp)
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
rm -f "${err}"
@@ -70,8 +70,9 @@ format_endpoints() {
mptcp_lib_pm_nl_format_endpoints "${@}"
}
+# This function is invoked indirectly
+#shellcheck disable=SC2317,SC2329
get_endpoint() {
- # shellcheck disable=SC2317 # invoked indirectly
mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}"
}
@@ -137,6 +138,8 @@ check()
fi
}
+mptcp_lib_subtests_last_ts_reset
+
check "show_endpoints" "" "defaults addr list"
default_limits="$(get_limits)"
@@ -196,6 +199,7 @@ set_limits 1 9 2>/dev/null
check "get_limits" "${default_limits}" "subflows above hard limit"
set_limits 8 8
+flush_endpoint ## to make sure it doesn't affect the limits
check "get_limits" "$(format_limits 8 8)" "set limits"
flush_endpoint
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 7ad5a59adff2..65b374232ff5 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -2,6 +2,7 @@
#include <errno.h>
#include <error.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -19,12 +20,6 @@
#include "linux/mptcp.h"
-#ifndef MPTCP_PM_NAME
-#define MPTCP_PM_NAME "mptcp_pm"
-#endif
-#ifndef MPTCP_PM_EVENTS
-#define MPTCP_PM_EVENTS "mptcp_pm_events"
-#endif
#ifndef IPPROTO_MPTCP
#define IPPROTO_MPTCP 262
#endif
@@ -116,9 +111,11 @@ static int capture_events(int fd, int event_group)
if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
&event_group, sizeof(event_group)) < 0)
- error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group");
+ error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group");
do {
+ bool server_side = false;
+
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
@@ -193,11 +190,22 @@ static int capture_events(int fd, int event_group)
else if (attrs->rta_type == MPTCP_ATTR_ERROR)
fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs));
else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE)
- fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs));
+ server_side = !!*(__u8 *)RTA_DATA(attrs);
+ else if (attrs->rta_type == MPTCP_ATTR_FLAGS) {
+ __u16 flags = *(__u16 *)RTA_DATA(attrs);
+
+ /* only print when present, easier */
+ if (flags & MPTCP_PM_EV_FLAG_DENY_JOIN_ID0)
+ fprintf(stderr, ",deny_join_id0:1");
+ if (flags & MPTCP_PM_EV_FLAG_SERVER_SIDE)
+ server_side = true;
+ }
attrs = RTA_NEXT(attrs, msg_len);
}
}
+ if (server_side)
+ fprintf(stderr, ",server_side:1");
fprintf(stderr, "\n");
} while (1);
@@ -288,7 +296,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family,
if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID)
*events_mcast_grp = *(__u32 *)RTA_DATA(grp);
else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME &&
- !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS))
+ !strcmp(RTA_DATA(grp), MPTCP_PM_EV_GRP_NAME))
got_events_grp = 1;
grp = RTA_NEXT(grp, grp_len);
@@ -822,6 +830,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
else if (!strcmp(tok, "signal"))
flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
+ else if (!strcmp(tok, "laminar"))
+ flags |= MPTCP_PM_ADDR_FLAG_LAMINAR;
else if (!strcmp(tok, "backup"))
flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
else if (!strcmp(tok, "fullmesh"))
@@ -1010,6 +1020,13 @@ static void print_addr(struct rtattr *attrs, int len)
printf(",");
}
+ if (flags & MPTCP_PM_ADDR_FLAG_LAMINAR) {
+ printf("laminar");
+ flags &= ~MPTCP_PM_ADDR_FLAG_LAMINAR;
+ if (flags)
+ printf(",");
+ }
+
if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) {
printf("backup");
flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index f74e1c3c126d..806aaa7d2d61 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -28,14 +28,14 @@ size=0
usage() {
echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]"
- echo -e "\t-b: bail out after first error, otherwise runs al testcases"
+ echo -e "\t-b: bail out after first error, otherwise runs all testcases"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
echo -e "\t-d: debug this script"
echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
rm -f "$cout" "$sout"
@@ -155,50 +155,61 @@ do_transfer()
sleep 1
fi
- timeout ${timeout_test} \
- ip netns exec ${ns3} \
- ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
- 0.0.0.0 < "$sin" > "$sout" &
+ mptcp_lib_nstat_init "${ns3}"
+ mptcp_lib_nstat_init "${ns1}"
+
+ ip netns exec ${ns3} \
+ ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
+ 0.0.0.0 < "$sin" > "$sout" &
local spid=$!
mptcp_lib_wait_local_port_listen "${ns3}" "${port}"
- timeout ${timeout_test} \
- ip netns exec ${ns1} \
- ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
- 10.0.3.3 < "$cin" > "$cout" &
+ ip netns exec ${ns1} \
+ ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
+ 10.0.3.3 < "$cin" > "$cout" &
local cpid=$!
+ mptcp_lib_wait_timeout "${timeout_test}" "${ns3}" "${ns1}" "${port}" \
+ "${cpid}" "${spid}" &
+ local timeout_pid=$!
+
wait $cpid
local retc=$?
wait $spid
local rets=$?
+ if kill -0 $timeout_pid; then
+ # Finished before the timeout: kill the background job
+ mptcp_lib_kill_group_wait $timeout_pid
+ timeout_pid=0
+ fi
+
if $capture; then
sleep 1
kill ${cappid_listener}
kill ${cappid_connector}
fi
+ mptcp_lib_nstat_get "${ns3}"
+ mptcp_lib_nstat_get "${ns1}"
+
cmp $sin $cout > /dev/null 2>&1
local cmps=$?
cmp $cin $sout > /dev/null 2>&1
local cmpc=$?
- printf "%-16s" " max $max_time "
- if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
- [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ] &&
+ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] &&
+ [ $timeout_pid -eq 0 ]; then
+ printf "%-16s" " max $max_time "
mptcp_lib_pr_ok
cat "$capout"
return 0
fi
- mptcp_lib_pr_fail
- echo "client exit code $retc, server $rets" 1>&2
- echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
- ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
- echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2
- ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
+ mptcp_lib_pr_fail "client exit code $retc, server $rets"
+ mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}"
ls -l $sin $cout
ls -l $cin $sout
@@ -286,6 +297,7 @@ while getopts "bcdhi" option;do
done
setup
+mptcp_lib_subtests_last_ts_reset
run_test 10 10 0 0 "balanced bwidth"
run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 9cb05978269d..e9ae1806ab07 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -94,7 +94,7 @@ test_fail()
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
print_title "Cleanup"
@@ -117,7 +117,36 @@ cleanup()
trap cleanup EXIT
# Create and configure network namespaces for testing
+print_title "Init"
mptcp_lib_ns_init ns1 ns2
+
+# check path_manager and pm_type sysctl mapping
+if [ -f /proc/sys/net/mptcp/path_manager ]; then
+ ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=userspace
+ pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)"
+ if [ "${pm_type}" != "1" ]; then
+ test_fail "unexpected pm_type: ${pm_type}"
+ mptcp_lib_result_print_all_tap
+ exit ${KSFT_FAIL}
+ fi
+
+ ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=error 2>/dev/null
+ pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)"
+ if [ "${pm_type}" != "1" ]; then
+ test_fail "unexpected pm_type after error: ${pm_type}"
+ mptcp_lib_result_print_all_tap
+ exit ${KSFT_FAIL}
+ fi
+
+ ip netns exec "$ns1" sysctl -q net.mptcp.pm_type=0
+ pm_name="$(ip netns exec "$ns1" sysctl -n net.mptcp.path_manager)"
+ if [ "${pm_name}" != "kernel" ]; then
+ test_fail "unexpected path-manager: ${pm_name}"
+ mptcp_lib_result_print_all_tap
+ exit ${KSFT_FAIL}
+ fi
+fi
+
for i in "$ns1" "$ns2" ;do
ip netns exec "$i" sysctl -q net.mptcp.pm_type=1
done
@@ -150,8 +179,8 @@ mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid
server_evts=$(mktemp)
mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid
sleep 0.5
+mptcp_lib_subtests_last_ts_reset
-print_title "Init"
print_test "Created network namespaces ns1, ns2"
test_pass
@@ -172,6 +201,9 @@ make_connection()
is_v6="v4"
fi
+ # set this on the client side only: will not affect the rest
+ ip netns exec "$ns2" sysctl -q net.mptcp.allow_join_initial_addr_port=0
+
:>"$client_evts"
:>"$server_evts"
@@ -179,7 +211,8 @@ make_connection()
ip netns exec "$ns1" \
./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 &
local server_pid=$!
- sleep 0.5
+
+ mptcp_lib_wait_local_port_listen "${ns1}" "${port}"
# Run the client, transfer $file and stay connected to the server
# to conduct tests
@@ -194,23 +227,28 @@ make_connection()
local client_token
local client_port
local client_serverside
+ local client_nojoin
local server_token
local server_serverside
+ local server_nojoin
client_token=$(mptcp_lib_evts_get_info token "$client_evts")
client_port=$(mptcp_lib_evts_get_info sport "$client_evts")
client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts")
+ client_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$client_evts")
server_token=$(mptcp_lib_evts_get_info token "$server_evts")
server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts")
+ server_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$server_evts")
print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1"
- if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
- [ "$server_serverside" = 1 ]
+ if [ "${client_token}" != "" ] && [ "${server_token}" != "" ] &&
+ [ "${client_serverside:-0}" = 0 ] && [ "${server_serverside:-0}" = 1 ] &&
+ [ "${client_nojoin:-0}" = 0 ] && [ "${server_nojoin:-0}" = 1 ]
then
test_pass
print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}"
else
- test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
+ test_fail "Expected tokens (c:${client_token} - s:${server_token}), server (c:${client_serverside} - s:${server_serverside}), nojoin (c:${client_nojoin} - s:${server_nojoin})"
mptcp_lib_result_print_all_tap
exit ${KSFT_FAIL}
fi
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 7ea5fb28c93d..1d5d3c4e7e87 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -77,6 +77,7 @@
static int cfg_cork;
static bool cfg_cork_mixed;
static int cfg_cpu = -1; /* default: pin to last cpu */
+static int cfg_expect_zerocopy = -1;
static int cfg_family = PF_UNSPEC;
static int cfg_ifindex = 1;
static int cfg_payload_len;
@@ -92,9 +93,9 @@ static socklen_t cfg_alen;
static struct sockaddr_storage cfg_dst_addr;
static struct sockaddr_storage cfg_src_addr;
+static int exitcode;
static char payload[IP_MAXPACKET];
static long packets, bytes, completions, expected_completions;
-static int zerocopied = -1;
static uint32_t next_completion;
static uint32_t sends_since_notify;
@@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain)
next_completion = hi + 1;
zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
- if (zerocopied == -1)
- zerocopied = zerocopy;
- else if (zerocopied != zerocopy) {
- fprintf(stderr, "serr: inconsistent\n");
- zerocopied = zerocopy;
+ if (cfg_expect_zerocopy != -1 &&
+ cfg_expect_zerocopy != zerocopy) {
+ fprintf(stderr, "serr: ee_code: %u != expected %u\n",
+ zerocopy, cfg_expect_zerocopy);
+ exitcode = 1;
+ /* suppress repeated messages */
+ cfg_expect_zerocopy = zerocopy;
}
if (cfg_verbose >= 2)
@@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol)
fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
packets, bytes >> 20, completions,
- zerocopied == 1 ? 'y' : 'n');
+ cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n');
}
static int do_setup_rx(int domain, int type, int protocol)
@@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv)
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
+ while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv)
case 'z':
cfg_zerocopy = true;
break;
+ case 'Z':
+ cfg_expect_zerocopy = !!atoi(optarg);
+ break;
}
}
@@ -817,5 +823,5 @@ int main(int argc, char **argv)
else
error(1, 0, "unknown cfg_test %s", cfg_test);
- return 0;
+ return exitcode;
}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index 89c22f5320e0..28178a38a4e7 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -6,6 +6,7 @@
set -e
readonly DEV="veth0"
+readonly DUMMY_DEV="dummy0"
readonly DEV_MTU=65535
readonly BIN="./msg_zerocopy"
@@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}"
readonly NS1="${NSPREFIX}1"
readonly NS2="${NSPREFIX}2"
-readonly SADDR4='192.168.1.1'
-readonly DADDR4='192.168.1.2'
-readonly SADDR6='fd::1'
-readonly DADDR6='fd::2'
+readonly LPREFIX4='192.168.1'
+readonly RPREFIX4='192.168.2'
+readonly LPREFIX6='fd'
+readonly RPREFIX6='fc'
+
readonly path_sysctl_mem="net.core.optmem_max"
# No arguments: automated test
if [[ "$#" -eq "0" ]]; then
- $0 4 tcp -t 1
- $0 6 tcp -t 1
- $0 4 udp -t 1
- $0 6 udp -t 1
- echo "OK. All tests passed"
- exit 0
+ ret=0
+
+ $0 4 tcp -t 1 || ret=1
+ $0 6 tcp -t 1 || ret=1
+ $0 4 udp -t 1 || ret=1
+ $0 6 udp -t 1 || ret=1
+
+ [[ "$ret" == "0" ]] && echo "OK. All tests passed"
+ exit $ret
fi
# Argument parsing
@@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@"
# Argument parsing: configure addresses
if [[ "${IP}" == "4" ]]; then
- readonly SADDR="${SADDR4}"
- readonly DADDR="${DADDR4}"
+ readonly SADDR="${LPREFIX4}.1"
+ readonly DADDR="${LPREFIX4}.2"
+ readonly DUMMY_ADDR="${RPREFIX4}.1"
+ readonly DADDR_TXONLY="${RPREFIX4}.2"
+ readonly MASK="24"
elif [[ "${IP}" == "6" ]]; then
- readonly SADDR="${SADDR6}"
- readonly DADDR="${DADDR6}"
+ readonly SADDR="${LPREFIX6}::1"
+ readonly DADDR="${LPREFIX6}::2"
+ readonly DUMMY_ADDR="${RPREFIX6}::1"
+ readonly DADDR_TXONLY="${RPREFIX6}::2"
+ readonly MASK="64"
+ readonly NODAD="nodad"
else
echo "Invalid IP version ${IP}"
exit 1
@@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy
+
# Bring the devices up
ip -netns "${NS1}" link set "${DEV}" up
ip -netns "${NS2}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DUMMY_DEV}" up
# Set fixed MAC addresses on the devices
ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
# Add fixed IP addresses to the devices
-ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
-ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
-ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
-ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD}
+
+ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}"
+ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}"
+
+ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1
+ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1
# Optionally disable sg or csum offload to test edge cases
# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+ret=0
+
do_test() {
local readonly ARGS="$1"
- echo "ipv${IP} ${TXMODE} ${ARGS}"
- ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+ # tx-rx test
+ # packets queued to a local socket are copied,
+ # sender notification has SO_EE_CODE_ZEROCOPY_COPIED.
+
+ echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n"
+ ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \
+ -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
sleep 0.2
- ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+ -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1
wait
+
+ # next test is unconnected tx to dummy0, cannot exercise with tcp
+ [[ "${TXMODE}" == "tcp" ]] && return
+
+ # tx-only test: send out dummy0
+ # packets leaving the host are not copied,
+ # sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED.
+
+ echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n"
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+ -S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1
}
do_test "${EXTRA_ARGS}"
do_test "-z ${EXTRA_ARGS}"
-echo ok
+
+[[ "$ret" == "0" ]] && echo "OK"
diff --git a/tools/testing/selftests/net/nat6to4.sh b/tools/testing/selftests/net/nat6to4.sh
new file mode 100755
index 000000000000..0ee859b622a4
--- /dev/null
+++ b/tools/testing/selftests/net/nat6to4.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NS="ns-peer-$(mktemp -u XXXXXX)"
+
+ip netns add "${NS}"
+ip -netns "${NS}" link set lo up
+ip -netns "${NS}" route add default via 127.0.0.2 dev lo
+
+tc -n "${NS}" qdisc add dev lo ingress
+tc -n "${NS}" filter add dev lo ingress prio 4 protocol ip \
+ bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action
+
+ip netns exec "${NS}" \
+ bash -c 'echo 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abc | socat - UDP4-DATAGRAM:224.1.0.1:6666,ip-multicast-loop=1'
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
deleted file mode 100644
index 6596fe03c77f..000000000000
--- a/tools/testing/selftests/net/net_helper.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Helper functions
-
-wait_local_port_listen()
-{
- local listener_ns="${1}"
- local port="${2}"
- local protocol="${3}"
- local pattern
- local i
-
- pattern=":$(printf "%04X" "${port}") "
-
- # for tcp protocol additionally check the socket state
- [ ${protocol} = "tcp" ] && pattern="${pattern}0A"
- for i in $(seq 10); do
- if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
- /proc/net/"${protocol}"* | grep -q "${pattern}"; then
- break
- fi
- sleep 0.1
- done
-}
diff --git a/tools/testing/selftests/net/netdev-l2addr.sh b/tools/testing/selftests/net/netdev-l2addr.sh
new file mode 100755
index 000000000000..18509da293e5
--- /dev/null
+++ b/tools/testing/selftests/net/netdev-l2addr.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+set -o pipefail
+
+NSIM_ADDR=2025
+TEST_ADDR="d0:be:d0:be:d0:00"
+
+RET_CODE=0
+
+cleanup() {
+ cleanup_netdevsim "$NSIM_ADDR"
+ cleanup_ns "$NS"
+}
+
+trap cleanup EXIT
+
+fail() {
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ RET_CODE=1
+}
+
+get_addr()
+{
+ local type="$1"
+ local dev="$2"
+ local ns="$3"
+
+ ip -j -n "$ns" link show dev "$dev" | jq -er ".[0].$type"
+}
+
+setup_ns NS
+
+nsim=$(create_netdevsim $NSIM_ADDR "$NS")
+
+get_addr address "$nsim" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim" "$NS" >/dev/null || fail "Couldn't get brd addr"
+get_addr permaddr "$nsim" "$NS" >/dev/null && fail "Found perm_addr without setting it"
+
+ip -n "$NS" link set dev "$nsim" address "$TEST_ADDR"
+ip -n "$NS" link set dev "$nsim" brd "$TEST_ADDR"
+
+[[ "$(get_addr address "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set ether addr"
+[[ "$(get_addr broadcast "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set brd addr"
+
+if create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "FF:FF:FF:FF:FF:FF" 2>/dev/null; then
+ fail "Created netdevsim with broadcast permaddr"
+fi
+
+nsim_port=$(create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "$TEST_ADDR")
+
+get_addr address "$nsim_port" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim_port" "$NS" >/dev/null || fail "Couldn't get brd addr"
+[[ "$(get_addr permaddr "$nsim_port" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't get permaddr"
+
+cleanup_netdevsim "$NSIM_ADDR" "$NS"
+
+exit $RET_CODE
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
index e3afcb424710..438f7b2acc5f 100755
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -67,8 +67,12 @@ kci_net_setup()
return $ksft_skip
fi
- # TODO what ipaddr to set ? DHCP ?
- echo "SKIP: $netdev: set IP address"
+ if [ "$veth_created" ]; then
+ echo "XFAIL: $netdev: set IP address unsupported for veth*"
+ else
+ # TODO what ipaddr to set ? DHCP ?
+ echo "SKIP: $netdev: set IP address"
+ fi
return $ksft_skip
}
@@ -86,7 +90,7 @@ kci_netdev_ethtool_test()
ret=$?
if [ $ret -ne 0 ];then
if [ $ret -eq "$1" ];then
- echo "SKIP: $netdev: ethtool $2 not supported"
+ echo "XFAIL: $netdev: ethtool $2 not supported"
return $ksft_skip
else
echo "FAIL: $netdev: ethtool $2"
@@ -124,11 +128,45 @@ kci_netdev_ethtool()
return 1
fi
echo "PASS: $netdev: ethtool list features"
- #TODO for each non fixed features, try to turn them on/off
+
+ while read -r FEATURE VALUE FIXED; do
+ [ "$FEATURE" != "Features" ] || continue # Skip "Features"
+ [ "$FIXED" != "[fixed]" ] || continue # Skip fixed features
+ feature="${FEATURE%:*}"
+
+ ethtool --offload "$netdev" "$feature" off
+ if [ $? -eq 0 ]; then
+ echo "PASS: $netdev: Turned off feature: $feature"
+ else
+ echo "FAIL: $netdev: Failed to turn off feature:" \
+ "$feature"
+ fi
+
+ ethtool --offload "$netdev" "$feature" on
+ if [ $? -eq 0 ]; then
+ echo "PASS: $netdev: Turned on feature: $feature"
+ else
+ echo "FAIL: $netdev: Failed to turn on feature:" \
+ "$feature"
+ fi
+
+ #restore the feature to its initial state
+ ethtool --offload "$netdev" "$feature" "$VALUE"
+ if [ $? -eq 0 ]; then
+ echo "PASS: $netdev: Restore feature $feature" \
+ "to initial state $VALUE"
+ else
+ echo "FAIL: $netdev: Failed to restore feature" \
+ "$feature to initial state $VALUE"
+ fi
+
+ done < "$TMP_ETHTOOL_FEATURES"
+
rm "$TMP_ETHTOOL_FEATURES"
kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev"
kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev"
+
return 0
}
@@ -196,10 +234,24 @@ if [ ! -e "$TMP_LIST_NETDEV" ];then
fi
ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\ -f2 | cut -d: -f1> "$TMP_LIST_NETDEV"
+
+if [ ! -s "$TMP_LIST_NETDEV" ]; then
+ echo "No valid network device found, creating veth pair"
+ ip link add veth0 type veth peer name veth1
+ echo "veth0" > "$TMP_LIST_NETDEV"
+ veth_created=1
+fi
+
while read netdev
do
kci_test_netdev "$netdev"
done < "$TMP_LIST_NETDEV"
+#clean up veth interface pair if it was created
+if [ "$veth_created" ]; then
+ ip link delete veth0
+ echo "Removed veth pair"
+fi
+
rm "$TMP_LIST_NETDEV"
exit 0
diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore
index 0a64d6d0e29a..5d2be9a00627 100644
--- a/tools/testing/selftests/net/netfilter/.gitignore
+++ b/tools/testing/selftests/net/netfilter/.gitignore
@@ -2,5 +2,7 @@
audit_logread
connect_close
conntrack_dump_flush
+conntrack_reverse_clash
sctp_collision
nf_queue
+udpclash
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index 47945b2b3f92..ee2d1a5254f8 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -6,36 +6,52 @@ HOSTPKG_CONFIG := pkg-config
MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
-TEST_PROGS := br_netfilter.sh bridge_brouter.sh
-TEST_PROGS += conntrack_icmp_related.sh
-TEST_PROGS += conntrack_ipip_mtu.sh
-TEST_PROGS += conntrack_tcp_unreplied.sh
-TEST_PROGS += conntrack_sctp_collision.sh
-TEST_PROGS += conntrack_vrf.sh
-TEST_PROGS += ipvs.sh
-TEST_PROGS += nf_conntrack_packetdrill.sh
-TEST_PROGS += nf_nat_edemux.sh
-TEST_PROGS += nft_audit.sh
-TEST_PROGS += nft_concat_range.sh
-TEST_PROGS += nft_conntrack_helper.sh
-TEST_PROGS += nft_fib.sh
-TEST_PROGS += nft_flowtable.sh
-TEST_PROGS += nft_meta.sh
-TEST_PROGS += nft_nat.sh
-TEST_PROGS += nft_nat_zones.sh
-TEST_PROGS += nft_queue.sh
-TEST_PROGS += nft_synproxy.sh
-TEST_PROGS += nft_zones_many.sh
-TEST_PROGS += rpath.sh
-TEST_PROGS += xt_string.sh
+TEST_PROGS := \
+ br_netfilter.sh \
+ br_netfilter_queue.sh \
+ bridge_brouter.sh \
+ conntrack_clash.sh \
+ conntrack_dump_flush.sh \
+ conntrack_icmp_related.sh \
+ conntrack_ipip_mtu.sh \
+ conntrack_resize.sh \
+ conntrack_reverse_clash.sh \
+ conntrack_sctp_collision.sh \
+ conntrack_tcp_unreplied.sh \
+ conntrack_vrf.sh \
+ ipvs.sh \
+ nf_conntrack_packetdrill.sh \
+ nf_nat_edemux.sh \
+ nft_audit.sh \
+ nft_concat_range.sh \
+ nft_conntrack_helper.sh \
+ nft_fib.sh \
+ nft_flowtable.sh \
+ nft_interface_stress.sh \
+ nft_meta.sh \
+ nft_nat.sh \
+ nft_nat_zones.sh \
+ nft_queue.sh \
+ nft_synproxy.sh \
+ nft_tproxy_tcp.sh \
+ nft_tproxy_udp.sh \
+ nft_zones_many.sh \
+ rpath.sh \
+ vxlan_mtu_frag.sh \
+ xt_string.sh \
+# end of TEST_PROGS
TEST_PROGS_EXTENDED = nft_concat_range_perf.sh
-TEST_GEN_PROGS = conntrack_dump_flush
-
-TEST_GEN_FILES = audit_logread
-TEST_GEN_FILES += connect_close nf_queue
-TEST_GEN_FILES += sctp_collision
+TEST_GEN_FILES = \
+ audit_logread \
+ connect_close \
+ conntrack_dump_flush \
+ conntrack_reverse_clash \
+ nf_queue \
+ sctp_collision \
+ udpclash \
+# end of TEST_GEN_FILES
include ../../lib.mk
@@ -44,9 +60,14 @@ $(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS)
$(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS)
$(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS)
+$(OUTPUT)/udpclash: LDLIBS += -lpthread
-TEST_FILES := lib.sh
-TEST_FILES += packetdrill
+TEST_FILES := \
+ lib.sh \
+ packetdrill \
+# end of TEST_FILES
TEST_INCLUDES := \
- ../lib.sh
+ $(wildcard ../lib/sh/*.sh) \
+ ../lib.sh \
+# end of TEST_INCLUDES
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
index c28379a965d8..011de8763094 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -13,6 +13,12 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+ echo SKIP: kernel is tainted
+ exit $ksft_skip
+fi
+
cleanup() {
cleanup_all_ns
}
@@ -54,9 +60,6 @@ bcast_ping()
done
}
-ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0
-
if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then
echo "SKIP: Can't create veth device"
exit $ksft_skip
@@ -165,6 +168,7 @@ if [ "$t" -eq 0 ];then
echo PASS: kernel not tainted
else
echo ERROR: kernel is tainted
+ dmesg
ret=1
fi
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
new file mode 100755
index 000000000000..4788641717d9
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+ echo SKIP: kernel is tainted
+ exit $ksft_skip
+fi
+
+cleanup() {
+ cleanup_all_ns
+}
+
+setup_ns c1 c2 c3 sender
+
+trap cleanup EXIT
+
+nf_queue_wait()
+{
+ grep -q "^ *$1 " "/proc/self/net/netfilter/nfnetlink_queue"
+}
+
+port_add() {
+ ns="$1"
+ dev="$2"
+ a="$3"
+
+ ip link add name "$dev" type veth peer name "$dev" netns "$ns"
+
+ ip -net "$ns" addr add 192.168.1."$a"/24 dev "$dev"
+ ip -net "$ns" link set "$dev" up
+
+ ip link set "$dev" master br0
+ ip link set "$dev" up
+}
+
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+
+ip link add br0 type bridge
+ip addr add 192.168.1.254/24 dev br0
+
+port_add "$c1" "c1" 1
+port_add "$c2" "c2" 2
+port_add "$c3" "c3" 3
+port_add "$sender" "sender" 253
+
+ip link set br0 up
+
+modprobe -q br_netfilter
+
+sysctl net.bridge.bridge-nf-call-iptables=1 || exit 1
+
+ip netns exec "$sender" ping -I sender -c1 192.168.1.1 || exit 1
+ip netns exec "$sender" ping -I sender -c1 192.168.1.2 || exit 2
+ip netns exec "$sender" ping -I sender -c1 192.168.1.3 || exit 3
+
+nft -f /dev/stdin <<EOF
+table ip filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ ct state new counter
+ ip protocol icmp counter queue num 0 bypass
+ }
+}
+EOF
+./nf_queue -t 5 > /dev/null &
+
+busywait 5000 nf_queue_wait
+
+for i in $(seq 1 5); do conntrack -F > /dev/null 2> /dev/null; sleep 0.1 ; done &
+ip netns exec "$sender" ping -I sender -f -c 50 -b 192.168.1.255
+
+read t < /proc/sys/kernel/tainted
+if [ "$t" -eq 0 ];then
+ echo PASS: kernel not tainted
+else
+ echo ERROR: kernel is tainted
+ dmesg
+ exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
index 2549b6590693..ea76f2bc2f59 100755
--- a/tools/testing/selftests/net/netfilter/bridge_brouter.sh
+++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
@@ -22,8 +22,6 @@ trap cleanup EXIT
setup_ns nsbr ns1 ns2
-ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0
-ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0
if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then
echo "SKIP: Can't create veth device"
exit $ksft_skip
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 63ef80ef47a4..12ce61fa15a8 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -7,63 +7,74 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_T_FILTER=m
CONFIG_BRIDGE_NETFILTER=m
CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_CGROUP_BPF=y
+CONFIG_CRYPTO_SHA1=m
CONFIG_DUMMY=m
+CONFIG_INET_DIAG=m
CONFIG_INET_ESP=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IP_NF_IPTABLES=m
+CONFIG_INET_SCTP_DIAG=m
+CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_RAW=m
CONFIG_IP_NF_FILTER=m
-CONFIG_IP6_NF_FILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_RAW=m
-CONFIG_IP6_NF_RAW=m
CONFIG_IP_SCTP=m
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IP_VS=m
CONFIG_IP_VS_PROTO_TCP=y
CONFIG_IP_VS_RR=m
-CONFIG_IPV6=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_MACVLAN=m
CONFIG_NAMESPACES=y
CONFIG_NET_CLS_U32=m
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_NET_NS=y
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_IPIP=m
-CONFIG_NET_VRF=y
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
CONFIG_NETFILTER_SYNPROXY=m
CONFIG_NETFILTER_XTABLES=m
-CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_NAT=m
CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NET_IPIP=m
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_VRF=y
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_FLOW_TABLE_INET=m
CONFIG_NF_LOG_IPV4=m
CONFIG_NF_LOG_IPV6=m
CONFIG_NF_NAT=m
-CONFIG_NF_NAT_REDIRECT=y
CONFIG_NF_NAT_MASQUERADE=y
+CONFIG_NF_NAT_REDIRECT=y
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_BRIDGE=m
CONFIG_NF_TABLES_INET=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_NETDEV=y
-CONFIG_NF_FLOW_TABLE_INET=m
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_CT=m
@@ -81,9 +92,10 @@ CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_SYNPROXY=m
+CONFIG_NFT_TPROXY=m
+CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VLAN_8021Q=m
-CONFIG_XFRM_USER=m
+CONFIG_VXLAN=m
CONFIG_XFRM_STATISTICS=y
-CONFIG_NET_PKTGEN=m
-CONFIG_TUN=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
new file mode 100755
index 000000000000..7fc6c5dbd551
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+clash_resolution_active=0
+dport=22111
+ret=0
+
+cleanup()
+{
+ # netns cleanup also zaps any remaining socat echo server.
+ cleanup_all_ns
+}
+
+checktool "nft --version" "run test without nft"
+checktool "conntrack --version" "run test without conntrack"
+checktool "socat -h" "run test without socat"
+
+trap cleanup EXIT
+
+setup_ns nsclient1 nsclient2 nsrouter
+
+ip netns exec "$nsrouter" nft -f -<<EOF
+table ip t {
+ chain lb {
+ meta l4proto udp dnat to numgen random mod 3 map { 0 : 10.0.2.1 . 9000, 1 : 10.0.2.1 . 9001, 2 : 10.0.2.1 . 9002 }
+ }
+
+ chain prerouting {
+ type nat hook prerouting priority dstnat
+
+ udp dport $dport counter jump lb
+ }
+
+ chain output {
+ type nat hook output priority dstnat
+
+ udp dport $dport counter jump lb
+ }
+}
+EOF
+
+load_simple_ruleset()
+{
+ip netns exec "$1" nft -f -<<EOF
+table ip t {
+ chain forward {
+ type filter hook forward priority 0
+
+ ct state new counter
+ }
+}
+EOF
+}
+
+spawn_servers()
+{
+ local ns="$1"
+ local ports="9000 9001 9002"
+
+ for port in $ports; do
+ ip netns exec "$ns" socat UDP-RECVFROM:$port,fork PIPE 2>/dev/null &
+ done
+
+ for port in $ports; do
+ wait_local_port_listen "$ns" $port udp
+ done
+}
+
+add_addr()
+{
+ local ns="$1"
+ local dev="$2"
+ local i="$3"
+ local j="$4"
+
+ ip -net "$ns" link set "$dev" up
+ ip -net "$ns" addr add "10.0.$i.$j/24" dev "$dev"
+}
+
+ping_test()
+{
+ local ns="$1"
+ local daddr="$2"
+
+ if ! ip netns exec "$ns" ping -q -c 1 $daddr > /dev/null;then
+ echo "FAIL: ping from $ns to $daddr"
+ exit 1
+ fi
+}
+
+run_one_clash_test()
+{
+ local ns="$1"
+ local ctns="$2"
+ local daddr="$3"
+ local dport="$4"
+ local entries
+ local cre
+
+ if ! ip netns exec "$ns" timeout 30 ./udpclash $daddr $dport;then
+ echo "INFO: did not receive expected number of replies for $daddr:$dport"
+ ip netns exec "$ctns" conntrack -S
+ # don't fail: check if clash resolution triggered after all.
+ fi
+
+ entries=$(ip netns exec "$ctns" conntrack -S | wc -l)
+ cre=$(ip netns exec "$ctns" conntrack -S | grep "clash_resolve=0" | wc -l)
+
+ if [ "$cre" -ne "$entries" ];then
+ clash_resolution_active=1
+ return 0
+ fi
+
+ # not a failure: clash resolution logic did not trigger.
+ # With right timing, xmit completed sequentially and
+ # no parallel insertion occurs.
+ return $ksft_skip
+}
+
+run_clash_test()
+{
+ local ns="$1"
+ local ctns="$2"
+ local daddr="$3"
+ local dport="$4"
+ local softerr=0
+
+ for i in $(seq 1 10);do
+ run_one_clash_test "$ns" "$ctns" "$daddr" "$dport"
+ local rv=$?
+ if [ $rv -eq 0 ];then
+ echo "PASS: clash resolution test for $daddr:$dport on attempt $i"
+ return 0
+ elif [ $rv -eq $ksft_skip ]; then
+ softerr=1
+ fi
+ done
+
+ [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger"
+}
+
+ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter"
+ip link add veth0 netns "$nsclient2" type veth peer name veth1 netns "$nsrouter"
+add_addr "$nsclient1" veth0 1 1
+add_addr "$nsclient2" veth0 2 1
+add_addr "$nsrouter" veth0 1 99
+add_addr "$nsrouter" veth1 2 99
+
+ip -net "$nsclient1" route add default via 10.0.1.99
+ip -net "$nsclient2" route add default via 10.0.2.99
+ip netns exec "$nsrouter" sysctl -q net.ipv4.ip_forward=1
+
+ping_test "$nsclient1" 10.0.1.99
+ping_test "$nsclient1" 10.0.2.1
+ping_test "$nsclient2" 10.0.1.1
+
+spawn_servers "$nsclient2"
+
+# exercise clash resolution with nat:
+# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}.
+run_clash_test "$nsclient1" "$nsrouter" 10.0.1.99 "$dport"
+
+# exercise clash resolution without nat.
+load_simple_ruleset "$nsclient2"
+run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001
+
+if [ $clash_resolution_active -eq 0 ];then
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
+ echo "SKIP: Clash resolution did not trigger"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
index bd9317bf5ada..5cecb8a1bc94 100644
--- a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
+++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
@@ -10,7 +10,7 @@
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define TEST_ZONE_ID 123
#define NF_CT_DEFAULT_ZONE_ID 0
@@ -43,6 +43,8 @@ static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type,
mnl_attr_nest_end(nlh, nest_proto);
mnl_attr_nest_end(nlh, nest);
+
+ return 0;
}
static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type,
@@ -71,6 +73,8 @@ static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type,
mnl_attr_nest_end(nlh, nest_proto);
mnl_attr_nest_end(nlh, nest);
+
+ return 0;
}
static int build_cta_proto(struct nlmsghdr *nlh)
@@ -90,6 +94,8 @@ static int build_cta_proto(struct nlmsghdr *nlh)
mnl_attr_nest_end(nlh, nest_proto);
mnl_attr_nest_end(nlh, nest);
+
+ return 0;
}
static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh,
@@ -98,7 +104,7 @@ static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh,
char buf[MNL_SOCKET_BUFFER_SIZE];
struct nlmsghdr *rplnlh;
unsigned int portid;
- int err, ret;
+ int ret;
portid = mnl_socket_get_portid(sock);
@@ -207,6 +213,7 @@ static int conntrack_data_generate_v6(struct mnl_socket *sock,
static int count_entries(const struct nlmsghdr *nlh, void *data)
{
reply_counter++;
+ return MNL_CB_OK;
}
static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone)
@@ -216,7 +223,7 @@ static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone)
struct nfgenmsg *nfh;
struct nlattr *nest;
unsigned int portid;
- int err, ret;
+ int ret;
portid = mnl_socket_get_portid(sock);
@@ -263,7 +270,7 @@ static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone)
struct nfgenmsg *nfh;
struct nlattr *nest;
unsigned int portid;
- int err, ret;
+ int ret;
portid = mnl_socket_get_portid(sock);
diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh
new file mode 100755
index 000000000000..8b0935385849
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+exec unshare -n ./conntrack_dump_flush
diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
new file mode 100755
index 000000000000..615fe3c6f405
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
@@ -0,0 +1,515 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+checktool "conntrack --version" "run test without conntrack"
+checktool "nft --version" "run test without nft tool"
+
+init_net_max=0
+ct_buckets=0
+tmpfile=""
+tmpfile_proc=""
+tmpfile_uniq=""
+ret=0
+have_socat=0
+
+socat -h > /dev/null && have_socat=1
+
+insert_count=2000
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400
+
+modprobe -q nf_conntrack
+if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then
+ echo "SKIP: conntrack sysctls not available"
+ exit $KSFT_SKIP
+fi
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1
+ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1
+
+cleanup() {
+ cleanup_all_ns
+
+ rm -f "$tmpfile" "$tmpfile_proc" "$tmpfile_uniq"
+
+ # restore original sysctl setting
+ sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+ sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets
+}
+trap cleanup EXIT
+
+check_max_alias()
+{
+ local expected="$1"
+ # old name, expected to alias to the first, i.e. changing one
+ # changes the other as well.
+ local lv=$(sysctl -n net.nf_conntrack_max)
+
+ if [ $expected -ne "$lv" ];then
+ echo "nf_conntrack_max sysctls should have identical values"
+ exit 1
+ fi
+}
+
+insert_ctnetlink() {
+ local ns="$1"
+ local count="$2"
+ local i=0
+ local bulk=16
+
+ while [ $i -lt $count ] ;do
+ ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \
+ if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+ -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+ --protonum 17 --timeout 3600 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \
+ return;\
+ fi & \
+ done ; wait" 2>/dev/null
+
+ i=$((i+bulk))
+ done
+}
+
+check_ctcount() {
+ local ns="$1"
+ local count="$2"
+ local msg="$3"
+
+ local now=$(ip netns exec "$ns" conntrack -C)
+
+ if [ $now -ne "$count" ] ;then
+ echo "expected $count entries in $ns, not $now: $msg"
+ exit 1
+ fi
+
+ echo "PASS: got $count connections: $msg"
+}
+
+ctresize() {
+ local duration="$1"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM
+ now=$(date +%s)
+ done
+}
+
+do_rsleep() {
+ local limit="$1"
+ local r=$RANDOM
+
+ r=$((r%limit))
+ sleep "$r"
+}
+
+ct_flush_once() {
+ local ns="$1"
+
+ ip netns exec "$ns" conntrack -F 2>/dev/null
+}
+
+ctflush() {
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ do_rsleep "$duration"
+
+ while [ $now -lt $end ]; do
+ ct_flush_once "$ns"
+ do_rsleep "$duration"
+ now=$(date +%s)
+ done
+}
+
+ct_pingflood()
+{
+ local ns="$1"
+ local duration="$2"
+ local msg="$3"
+ local now=$(date +%s)
+ local end=$((now + duration))
+ local j=0
+ local k=0
+
+ while [ $now -lt $end ]; do
+ j=$((j%256))
+ k=$((k%256))
+
+ ip netns exec "$ns" bash -c \
+ "j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1
+
+ j=$((j+1))
+
+ if [ $j -eq 256 ];then
+ k=$((k+1))
+ fi
+
+ now=$(date +%s)
+ done
+
+ wait
+}
+
+ct_udpflood()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ [ $have_socat -ne "1" ] && return
+
+ while [ $now -lt $end ]; do
+ip netns exec "$ns" bash<<"EOF"
+ for i in $(seq 1 100);do
+ dport=$(((RANDOM%65536)+1))
+
+ echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" &
+ done > /dev/null 2>&1
+ wait
+EOF
+ now=$(date +%s)
+ done
+}
+
+ct_udpclash()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ [ -x udpclash ] || return
+
+ while [ $now -lt $end ]; do
+ ip netns exec "$ns" timeout 30 ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1
+
+ now=$(date +%s)
+ done
+}
+
+# dump to /dev/null. We don't want dumps to cause infinite loops
+# or use-after-free even when conntrack table is altered while dumps
+# are in progress.
+ct_nulldump()
+{
+ local ns="$1"
+
+ ip netns exec "$ns" conntrack -L > /dev/null 2>&1 &
+
+ # Don't require /proc support in conntrack
+ if [ -r /proc/self/net/nf_conntrack ] ; then
+ ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null &
+ fi
+
+ wait
+}
+
+ct_nulldump_loop()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ ct_nulldump "$ns"
+ sleep $((RANDOM%2))
+ now=$(date +%s)
+ done
+}
+
+change_timeouts()
+{
+ local ns="$1"
+ local r1=$((RANDOM%2))
+ local r2=$((RANDOM%2))
+
+ [ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5))
+ [ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5))
+}
+
+ct_change_timeouts_loop()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ change_timeouts "$ns"
+ sleep $((RANDOM%2))
+ now=$(date +%s)
+ done
+
+ # restore defaults
+ ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30
+ ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30
+}
+
+check_taint()
+{
+ local tainted_then="$1"
+ local msg="$2"
+
+ local tainted_now=0
+
+ if [ "$tainted_then" -ne 0 ];then
+ return
+ fi
+
+ read tainted_now < /proc/sys/kernel/tainted
+
+ if [ "$tainted_now" -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "TAINT: $msg"
+ dmesg
+ exit 1
+ fi
+}
+
+insert_flood()
+{
+ local n="$1"
+ local timeout="$2"
+ local r=0
+
+ r=$((RANDOM%$insert_count))
+
+ ct_pingflood "$n" "$timeout" "floodresize" &
+ ct_udpflood "$n" "$timeout" &
+ ct_udpclash "$n" "$timeout" &
+
+ insert_ctnetlink "$n" "$r" &
+ ctflush "$n" "$timeout" &
+ ct_nulldump_loop "$n" "$timeout" &
+ ct_change_timeouts_loop "$n" "$timeout" &
+
+ wait
+}
+
+test_floodresize_all()
+{
+ local timeout=20
+ local n=""
+ local tainted_then=""
+
+ read tainted_then < /proc/sys/kernel/tainted
+
+ for n in "$nsclient1" "$nsclient2";do
+ insert_flood "$n" "$timeout" &
+ done
+
+ # resize table constantly while flood/insert/dump/flushs
+ # are happening in parallel.
+ ctresize "$timeout"
+
+ # wait for subshells to complete, everything is limited
+ # by $timeout.
+ wait
+
+ check_taint "$tainted_then" "resize+flood"
+}
+
+check_dump()
+{
+ local ns="$1"
+ local protoname="$2"
+ local c=0
+ local proto=0
+ local proc=0
+ local unique=""
+ local lret=0
+
+ # NOTE: assumes timeouts are large enough to not have
+ # expirations in all following tests.
+ l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | sort | tee "$tmpfile" | wc -l)
+ c=$(ip netns exec "$ns" conntrack -C)
+
+ if [ "$c" -eq 0 ]; then
+ echo "FAIL: conntrack count for $ns is 0"
+ lret=1
+ fi
+
+ if [ "$c" -ne "$l" ]; then
+ echo "FAIL: conntrack count inconsistency for $ns -L: $c != $l"
+ lret=1
+ fi
+
+ # check the dump we retrieved is free of duplicated entries.
+ unique=$(uniq "$tmpfile" | tee "$tmpfile_uniq" | wc -l)
+ if [ "$l" -ne "$unique" ]; then
+ echo "FAIL: listing contained redundant entries for $ns: $l != $unique"
+ diff -u "$tmpfile" "$tmpfile_uniq"
+ lret=1
+ fi
+
+ # we either inserted icmp or only udp, hence, --proto should return same entry count as without filter.
+ proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | sort | uniq | tee "$tmpfile_uniq" | wc -l)
+ if [ "$l" -ne "$proto" ]; then
+ echo "FAIL: dump inconsistency for $ns -L --proto $protoname: $l != $proto"
+ diff -u "$tmpfile" "$tmpfile_uniq"
+ lret=1
+ fi
+
+ if [ -r /proc/self/net/nf_conntrack ] ; then
+ proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | tee \"$tmpfile_proc\" | wc -l")
+
+ if [ "$l" -ne "$proc" ]; then
+ echo "FAIL: proc inconsistency for $ns: $l != $proc"
+ lret=1
+ fi
+
+ proc=$(uniq "$tmpfile_proc" | tee "$tmpfile_uniq" | wc -l)
+ if [ "$l" -ne "$proc" ]; then
+ echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc"
+ diff -u "$tmpfile_proc" "$tmpfile_uniq"
+ lret=1
+ fi
+ fi
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: dump in netns $ns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+ else
+ echo "FAIL: dump in netns $ns had different entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+ ret=1
+ fi
+}
+
+test_dump_all()
+{
+ local timeout=3
+ local tainted_then=""
+
+ read tainted_then < /proc/sys/kernel/tainted
+
+ ct_flush_once "$nsclient1"
+ ct_flush_once "$nsclient2"
+
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600
+
+ ct_pingflood "$nsclient1" $timeout "dumpall" &
+ insert_ctnetlink "$nsclient2" $insert_count
+
+ wait
+
+ check_dump "$nsclient1" "icmp"
+ check_dump "$nsclient2" "udp"
+
+ check_taint "$tainted_then" "test parallel conntrack dumps"
+}
+
+check_sysctl_immutable()
+{
+ local ns="$1"
+ local name="$2"
+ local failhard="$3"
+ local o=0
+ local n=0
+
+ o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+ n=$((o+1))
+
+ # return value isn't reliable, need to read it back
+ ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null
+
+ n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+
+ [ -z "$n" ] && return 1
+
+ if [ $o -ne $n ]; then
+ if [ $failhard -gt 0 ] ;then
+ echo "FAIL: net.$name should not be changeable from namespace (now $n)"
+ ret=1
+ fi
+ return 0
+ fi
+
+ return 1
+}
+
+test_conntrack_max_limit()
+{
+ sysctl -q net.netfilter.nf_conntrack_max=100
+ insert_ctnetlink "$nsclient1" 101
+
+ # check netns is clamped by init_net, i.e., either netns follows
+ # init_net value, or a higher pernet limit (compared to init_net) is ignored.
+ check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound"
+
+ sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+}
+
+test_conntrack_disable()
+{
+ local timeout=2
+
+ # disable conntrack pickups
+ ip netns exec "$nsclient1" nft flush table ip test_ct
+
+ ct_flush_once "$nsclient1"
+ ct_flush_once "$nsclient2"
+
+ ct_pingflood "$nsclient1" "$timeout" "conntrack disable"
+ ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1
+
+ # Disabled, should not have picked up any connection.
+ check_ctcount "$nsclient1" 0 "conntrack disabled"
+
+ # This one is still active, expect 1 connection.
+ check_ctcount "$nsclient2" 1 "conntrack enabled"
+}
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max)
+
+check_max_alias $init_net_max
+
+sysctl -q net.netfilter.nf_conntrack_max="262000"
+check_max_alias 262000
+
+setup_ns nsclient1 nsclient2
+
+# check this only works from init_net
+for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do
+ check_sysctl_immutable "$nsclient1" "net.$n" 1
+done
+
+# won't work on older kernels. If it works, check that the netns obeys the limit
+if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then
+ # subtest: if pernet is changeable, check that reducing it in pernet
+ # limits the pernet entries. Inverse, pernet clamped by a lower init_net
+ # setting, is already checked by "test_conntrack_max_limit" test.
+
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1
+ insert_ctnetlink "$nsclient1" 2
+ check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound"
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+fi
+
+for n in "$nsclient1" "$nsclient2";do
+# enable conntrack in both namespaces
+ip netns exec "$n" nft -f - <<EOF
+table ip test_ct {
+ chain input {
+ type filter hook input priority 0
+ ct state new counter
+ }
+}
+EOF
+done
+
+tmpfile=$(mktemp)
+tmpfile_proc=$(mktemp)
+tmpfile_uniq=$(mktemp)
+test_conntrack_max_limit
+test_dump_all
+test_floodresize_all
+test_conntrack_disable
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
new file mode 100644
index 000000000000..507930cee8cb
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Needs something like:
+ *
+ * iptables -t nat -A POSTROUTING -o nomatch -j MASQUERADE
+ *
+ * so NAT engine attaches a NAT null-binding to each connection.
+ *
+ * With unmodified kernels, child or parent will exit with
+ * "Port number changed" error, even though no port translation
+ * was requested.
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+
+#define LEN 512
+#define PORT 56789
+#define TEST_TIME 5
+
+static void die(const char *e)
+{
+ perror(e);
+ exit(111);
+}
+
+static void die_port(uint16_t got, uint16_t want)
+{
+ fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got));
+ exit(1);
+}
+
+static int udp_socket(void)
+{
+ static const struct timeval tv = {
+ .tv_sec = 1,
+ };
+ int fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+
+ if (fd < 0)
+ die("socket");
+
+ setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+ return fd;
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_in sa1 = {
+ .sin_family = AF_INET,
+ };
+ struct sockaddr_in sa2 = {
+ .sin_family = AF_INET,
+ };
+ int s1, s2, status;
+ time_t end, now;
+ socklen_t plen;
+ char buf[LEN];
+ bool child;
+
+ sa1.sin_port = htons(PORT);
+ sa2.sin_port = htons(PORT + 1);
+
+ s1 = udp_socket();
+ s2 = udp_socket();
+
+ inet_pton(AF_INET, "127.0.0.11", &sa1.sin_addr);
+ inet_pton(AF_INET, "127.0.0.12", &sa2.sin_addr);
+
+ if (bind(s1, (struct sockaddr *)&sa1, sizeof(sa1)) < 0)
+ die("bind 1");
+ if (bind(s2, (struct sockaddr *)&sa2, sizeof(sa2)) < 0)
+ die("bind 2");
+
+ child = fork() == 0;
+
+ now = time(NULL);
+ end = now + TEST_TIME;
+
+ while (now < end) {
+ struct sockaddr_in peer;
+ socklen_t plen = sizeof(peer);
+
+ now = time(NULL);
+
+ if (child) {
+ if (sendto(s1, buf, LEN, 0, (struct sockaddr *)&sa2, sizeof(sa2)) != LEN)
+ continue;
+
+ if (recvfrom(s2, buf, LEN, 0, (struct sockaddr *)&peer, &plen) < 0)
+ die("child recvfrom");
+
+ if (peer.sin_port != htons(PORT))
+ die_port(peer.sin_port, PORT);
+ } else {
+ if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN)
+ continue;
+
+ if (recvfrom(s1, buf, LEN, 0, (struct sockaddr *)&peer, &plen) < 0)
+ die("parent recvfrom");
+
+ if (peer.sin_port != htons((PORT + 1)))
+ die_port(peer.sin_port, PORT + 1);
+ }
+ }
+
+ if (child)
+ return 0;
+
+ wait(&status);
+
+ if (WIFEXITED(status))
+ return WEXITSTATUS(status);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
new file mode 100755
index 000000000000..a24c896347a8
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+cleanup()
+{
+ cleanup_all_ns
+}
+
+checktool "nft --version" "run test without nft"
+checktool "conntrack --version" "run test without conntrack"
+
+trap cleanup EXIT
+
+setup_ns ns0
+
+# make loopback connections get nat null bindings assigned
+ip netns exec "$ns0" nft -f - <<EOF
+table ip nat {
+ chain POSTROUTING {
+ type nat hook postrouting priority srcnat; policy accept;
+ oifname "nomatch" counter packets 0 bytes 0 masquerade
+ }
+}
+EOF
+
+do_flush()
+{
+ local end
+ local now
+
+ now=$(date +%s)
+ end=$((now + 5))
+
+ while [ $now -lt $end ];do
+ ip netns exec "$ns0" conntrack -F 2>/dev/null
+ now=$(date +%s)
+ done
+}
+
+do_flush &
+
+if ip netns exec "$ns0" ./conntrack_reverse_clash; then
+ echo "PASS: No SNAT performed for null bindings"
+else
+ echo "ERROR: SNAT performed without any matching snat rule"
+ exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
index 073e8e62d350..207b79932d91 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
@@ -51,10 +51,6 @@ trap cleanup EXIT
setup_ns ns0 ns1
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-
if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
echo "SKIP: Could not add veth device"
exit $ksft_skip
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
index 4ceee9fb3949..9c9d5b38ab71 100755
--- a/tools/testing/selftests/net/netfilter/ipvs.sh
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -97,7 +97,7 @@ cleanup() {
}
server_listen() {
- ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" &
+ ip netns exec "$ns2" timeout 5 socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" &
server_pid=$!
sleep 0.2
}
@@ -129,9 +129,6 @@ test_dr() {
# avoid incorrect arp response
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
- # avoid reverse route lookup
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service
@@ -154,7 +151,7 @@ test_nat() {
test_tun() {
ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
- ip netns exec "${ns1}" modprobe -q ipip
+ modprobe -q ipip
ip netns exec "${ns1}" ip link set tunl0 up
ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0
ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0
@@ -163,13 +160,10 @@ test_tun() {
ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port}
ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1
- ip netns exec "${ns2}" modprobe -q ipip
ip netns exec "${ns2}" ip link set tunl0 up
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service
diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
index 1014551dd769..6731fe1eaf2e 100755
--- a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
+++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
@@ -17,9 +17,31 @@ cleanup()
checktool "socat -h" "run test without socat"
checktool "iptables --version" "run test without iptables"
+checktool "conntrack --version" "run test without conntrack"
trap cleanup EXIT
+connect_done()
+{
+ local ns="$1"
+ local port="$2"
+
+ ip netns exec "$ns" ss -nt -o state established "dport = :$port" | grep -q "$port"
+}
+
+check_ctstate()
+{
+ local ns="$1"
+ local dp="$2"
+
+ if ! ip netns exec "$ns" conntrack --get -s 192.168.1.2 -d 192.168.1.1 -p tcp \
+ --sport 10000 --dport "$dp" --state ESTABLISHED > /dev/null 2>&1;then
+ echo "FAIL: Did not find expected state for dport $2"
+ ip netns exec "$ns" bash -c 'conntrack -L; conntrack -S; ss -nt'
+ ret=1
+ fi
+}
+
setup_ns ns1 ns2
# Connect the namespaces using a veth pair
@@ -44,15 +66,18 @@ socatpid=$!
ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000"
# add a virtual IP using DNAT
-ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 || exit 1
# ... and route it to the other namespace
ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1
-# add a persistent connection from the other namespace
-ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+# listener should be up by now, wait if it isn't yet.
+wait_local_port_listen "$ns1" 5201 tcp
-sleep 1
+# add a persistent connection from the other namespace
+sleep 10 | ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+cpid0=$!
+busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" "5201"
# ip daddr:dport will be rewritten to 192.168.1.1 5201
# NAT must reallocate source port 10000 because
@@ -71,26 +96,25 @@ fi
ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
-sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+cpid1=$!
-# if connect succeeds, client closes instantly due to EOF on stdin.
-# if connect hangs, it will time out after 5s.
-echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
+sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
cpid2=$!
-time_then=$(date +%s)
-wait $cpid2
-rv=$?
-time_now=$(date +%s)
+busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5202
+busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5203
-# Check how much time has elapsed, expectation is for
-# 'cpid2' to connect and then exit (and no connect delay).
-delta=$((time_now - time_then))
+check_ctstate "$ns1" 5202
+check_ctstate "$ns1" 5203
-if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then
+kill $socatpid $cpid0 $cpid1 $cpid2
+socatpid=0
+
+if [ $ret -eq 0 ]; then
echo "PASS: could connect to service via redirected ports"
else
- echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
+ echo "FAIL: socat cannot connect to service via redirect"
ret=1
fi
diff --git a/tools/testing/selftests/net/netfilter/nft_audit.sh b/tools/testing/selftests/net/netfilter/nft_audit.sh
index 902f8114bc80..87f2b4c725aa 100755
--- a/tools/testing/selftests/net/netfilter/nft_audit.sh
+++ b/tools/testing/selftests/net/netfilter/nft_audit.sh
@@ -48,12 +48,31 @@ logread_pid=$!
trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT
exec 3<"$logfile"
+lsplit='s/^\(.*\) entries=\([^ ]*\) \(.*\)$/pfx="\1"\nval="\2"\nsfx="\3"/'
+summarize_logs() {
+ sum=0
+ while read line; do
+ eval $(sed "$lsplit" <<< "$line")
+ [[ $sum -gt 0 ]] && {
+ [[ "$pfx $sfx" == "$tpfx $tsfx" ]] && {
+ let "sum += val"
+ continue
+ }
+ echo "$tpfx entries=$sum $tsfx"
+ }
+ tpfx="$pfx"
+ tsfx="$sfx"
+ sum=$val
+ done
+ echo "$tpfx entries=$sum $tsfx"
+}
+
do_test() { # (cmd, log)
echo -n "testing for cmd: $1 ... "
cat <&3 >/dev/null
$1 >/dev/null || exit 1
sleep 0.1
- res=$(diff -a -u <(echo "$2") - <&3)
+ res=$(diff -a -u <(echo "$2") <(summarize_logs <&3))
[ $? -eq 0 ] && { echo "OK"; return; }
echo "FAIL"
grep -v '^\(---\|+++\|@@\)' <<< "$res"
@@ -152,31 +171,17 @@ do_test 'nft reset rules t1 c2' \
'table=t1 family=2 entries=3 op=nft_reset_rule'
do_test 'nft reset rules table t1' \
-'table=t1 family=2 entries=3 op=nft_reset_rule
-table=t1 family=2 entries=3 op=nft_reset_rule
-table=t1 family=2 entries=3 op=nft_reset_rule'
+'table=t1 family=2 entries=9 op=nft_reset_rule'
do_test 'nft reset rules t2 c3' \
-'table=t2 family=2 entries=189 op=nft_reset_rule
-table=t2 family=2 entries=188 op=nft_reset_rule
-table=t2 family=2 entries=126 op=nft_reset_rule'
+'table=t2 family=2 entries=503 op=nft_reset_rule'
do_test 'nft reset rules t2' \
-'table=t2 family=2 entries=3 op=nft_reset_rule
-table=t2 family=2 entries=3 op=nft_reset_rule
-table=t2 family=2 entries=186 op=nft_reset_rule
-table=t2 family=2 entries=188 op=nft_reset_rule
-table=t2 family=2 entries=129 op=nft_reset_rule'
+'table=t2 family=2 entries=509 op=nft_reset_rule'
do_test 'nft reset rules' \
-'table=t1 family=2 entries=3 op=nft_reset_rule
-table=t1 family=2 entries=3 op=nft_reset_rule
-table=t1 family=2 entries=3 op=nft_reset_rule
-table=t2 family=2 entries=3 op=nft_reset_rule
-table=t2 family=2 entries=3 op=nft_reset_rule
-table=t2 family=2 entries=180 op=nft_reset_rule
-table=t2 family=2 entries=188 op=nft_reset_rule
-table=t2 family=2 entries=135 op=nft_reset_rule'
+'table=t1 family=2 entries=9 op=nft_reset_rule
+table=t2 family=2 entries=509 op=nft_reset_rule'
# resetting sets and elements
@@ -200,13 +205,11 @@ do_test 'nft reset counters t1' \
'table=t1 family=2 entries=1 op=nft_reset_obj'
do_test 'nft reset counters t2' \
-'table=t2 family=2 entries=342 op=nft_reset_obj
-table=t2 family=2 entries=158 op=nft_reset_obj'
+'table=t2 family=2 entries=500 op=nft_reset_obj'
do_test 'nft reset counters' \
'table=t1 family=2 entries=1 op=nft_reset_obj
-table=t2 family=2 entries=341 op=nft_reset_obj
-table=t2 family=2 entries=159 op=nft_reset_obj'
+table=t2 family=2 entries=500 op=nft_reset_obj'
# resetting quotas
@@ -217,13 +220,11 @@ do_test 'nft reset quotas t1' \
'table=t1 family=2 entries=1 op=nft_reset_obj'
do_test 'nft reset quotas t2' \
-'table=t2 family=2 entries=315 op=nft_reset_obj
-table=t2 family=2 entries=185 op=nft_reset_obj'
+'table=t2 family=2 entries=500 op=nft_reset_obj'
do_test 'nft reset quotas' \
'table=t1 family=2 entries=1 op=nft_reset_obj
-table=t2 family=2 entries=314 op=nft_reset_obj
-table=t2 family=2 entries=186 op=nft_reset_obj'
+table=t2 family=2 entries=500 op=nft_reset_obj'
# deleting rules
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 6d66240e149c..ad97c6227f35 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -15,10 +15,12 @@ source lib.sh
# Available test groups:
# - reported_issues: check for issues that were reported in the past
# - correctness: check that packets match given entries, and only those
+# - correctness_large: same but with additional non-matching entries
# - concurrency: attempt races between insertion, deletion and lookup
# - timeout: check that packets match entries until they expire
# - performance: estimate matching rate, compare with rbtree and hash baselines
-TESTS="reported_issues correctness concurrency timeout"
+TESTS="reported_issues correctness correctness_large concurrency timeout"
+
[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}"
# Set types, defined by TYPE_ variables below
@@ -27,7 +29,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add reload"
+BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
@@ -371,6 +373,53 @@ race_repeat 0
perf_duration 0
"
+TYPE_net_port_proto_match="
+display net,port,proto
+type_spec ipv4_addr . inet_service . inet_proto
+chain_spec ip daddr . udp dport . meta l4proto
+dst addr4 port proto
+src
+start 1
+count 9
+src_delta 9
+tools sendip bash
+proto udp
+
+race_repeat 0
+
+perf_duration 0
+"
+
+TYPE_avx2_mismatch="
+display avx2 false match
+type_spec inet_proto . ipv6_addr
+chain_spec meta l4proto . ip6 daddr
+dst proto addr6
+src
+start 1
+count 1
+src_delta 1
+tools ping
+proto icmp6
+
+race_repeat 0
+
+perf_duration 0
+"
+
+
+TYPE_doublecreate="
+display cannot create same element twice
+type_spec ipv4_addr . ipv4_addr
+chain_spec ip saddr . ip daddr
+dst addr4
+proto icmp
+
+race_repeat 0
+
+perf_duration 0
+"
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -382,6 +431,7 @@ table inet filter {
set test {
type ${type_spec}
+ counter
flags interval,timeout
}
@@ -1121,9 +1171,18 @@ del() {
fi
}
-# Return packet count from 'test' counter in 'inet filter' table
+# Return packet count for elem $1 from 'test' counter in 'inet filter' table
count_packets() {
found=0
+ for token in $(nft reset element inet filter test "${1}" ); do
+ [ ${found} -eq 1 ] && echo "${token}" && return
+ [ "${token}" = "packets" ] && found=1
+ done
+}
+
+# Return packet count from 'test' counter in 'inet filter' table
+count_packets_nomatch() {
+ found=0
for token in $(nft list counter inet filter test); do
[ ${found} -eq 1 ] && echo "${token}" && return
[ "${token}" = "packets" ] && found=1
@@ -1169,6 +1228,10 @@ perf() {
# Set MAC addresses, send single packet, check that it matches, reset counter
send_match() {
+ local elem="$1"
+
+ shift
+
ip link set veth_a address "$(format_mac "${1}")"
ip -n B link set veth_b address "$(format_mac "${2}")"
@@ -1179,7 +1242,7 @@ send_match() {
eval src_"$f"=\$\(format_\$f "${2}"\)
done
eval send_\$proto
- if [ "$(count_packets)" != "1" ]; then
+ if [ "$(count_packets "$elem")" != "1" ]; then
err "${proto} packet to:"
err " $(for f in ${dst}; do
eval format_\$f "${1}"; printf ' '; done)"
@@ -1205,7 +1268,7 @@ send_nomatch() {
eval src_"$f"=\$\(format_\$f "${2}"\)
done
eval send_\$proto
- if [ "$(count_packets)" != "0" ]; then
+ if [ "$(count_packets_nomatch)" != "0" ]; then
err "${proto} packet to:"
err " $(for f in ${dst}; do
eval format_\$f "${1}"; printf ' '; done)"
@@ -1218,15 +1281,54 @@ send_nomatch() {
fi
}
+maybe_send_nomatch() {
+ local elem="$1"
+ local what="$4"
+
+ [ $((RANDOM%20)) -gt 0 ] && return
+
+ dst_addr4="$2"
+ dst_port="$3"
+ send_udp
+
+ if [ "$(count_packets_nomatch)" != "0" ]; then
+ err "Packet to $dst_addr4:$dst_port did match $what"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+}
+
+maybe_send_match() {
+ local elem="$1"
+ local what="$4"
+
+ [ $((RANDOM%20)) -gt 0 ] && return
+
+ dst_addr4="$2"
+ dst_port="$3"
+ send_udp
+
+ if [ "$(count_packets "{ $elem }")" != "1" ]; then
+ err "Packet to $dst_addr4:$dst_port did not match $what"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+ nft reset counter inet filter test >/dev/null
+ nft reset element inet filter test "{ $elem }" >/dev/null
+}
+
# Correctness test template:
# - add ranged element, check that packets match it
# - check that packets outside range don't match it
# - remove some elements, check that packets don't match anymore
-test_correctness() {
- setup veth send_"${proto}" set || return ${ksft_skip}
-
+test_correctness_main() {
range_size=1
+
+ send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
+
for i in $(seq "${start}" $((start + count))); do
+ local elem=""
+
end=$((start + range_size))
# Avoid negative or zero-sized port ranges
@@ -1237,15 +1339,16 @@ test_correctness() {
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- add "$(format)" || return 1
+ elem="$(format)"
+ add "$elem" || return 1
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$elem" "${j}" $((j + src_delta)) || return 1
done
send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
# Delete elements now and then
if [ $((i % 3)) -eq 0 ]; then
- del "$(format)" || return 1
+ del "$elem" || return 1
for j in $(seq "$start" \
$((range_size / 2 + 1)) ${end}); do
send_nomatch "${j}" $((j + src_delta)) \
@@ -1258,6 +1361,163 @@ test_correctness() {
done
}
+test_correctness() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ test_correctness_main
+}
+
+# Repeat the correctness tests, but add extra non-matching entries.
+# This exercises the more compact '4 bit group' representation that
+# gets picked when the default 8-bit representation exceed
+# NFT_PIPAPO_LT_SIZE_HIGH bytes of memory.
+# See usage of NFT_PIPAPO_LT_SIZE_HIGH in pipapo_lt_bits_adjust().
+#
+# The format() helper is way too slow when generating lots of
+# entries so its not used here.
+test_correctness_large() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+ # number of dummy (filler) entries to add.
+ local dcount=16385
+
+ (
+ echo -n "add element inet filter test { "
+
+ case "$type_spec" in
+ "ether_addr . ipv4_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_mac $((1000000 + i))
+ printf ". 172.%i.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "inet_proto . ipv6_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . " $((RANDOM%256))
+ format_addr6 $((1000000 + i))
+ done
+ ;;
+ "inet_service . inet_proto")
+ # smaller key sizes, need more entries to hit the
+ # 4-bit threshold.
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ local proto=$((RANDOM%256))
+
+ # Test uses UDP to match, as it also fails when matching
+ # an entry that doesn't exist, so skip 'udp' entries
+ # to not trigger a wrong failure.
+ [ $proto -eq 17 ] && proto=18
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . %i " $(((i%65534) + 1)) $((proto))
+ done
+ ;;
+ "inet_service . ipv4_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . 172.%i.%i.%i " $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "ipv4_addr . ether_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ format_mac $((1000000 + i))
+ done
+ ;;
+ "ipv4_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+ done
+ ;;
+ "ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr")
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+ format_mac $((1000000 + i))
+ printf ". %i . 192.168.%i.%i" $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . inet_proto")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . %i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . inet_proto . ipv4_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . ipv4_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256))
+ done
+ ;;
+ "ipv6_addr . ether_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . "
+ format_mac $((1000000 + i))
+ done
+ ;;
+ "ipv6_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1))"
+ done
+ ;;
+ "ipv6_addr . inet_service . ether_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_mac $((i + 1000000))
+ done
+ ;;
+ "ipv6_addr . inet_service . ether_addr . inet_proto")
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_mac $((i + 1000000))
+ echo -n " . $((RANDOM%256))"
+ done
+ ;;
+ "ipv6_addr . inet_service . ipv6_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_addr6 $((i + 2123456))
+ echo -n " . $((RANDOM%256))"
+ done
+ ;;
+ *)
+ "Unhandled $type_spec"
+ return 1
+ esac
+ echo -n "}"
+
+ ) | nft -f - || return 1
+
+ test_correctness_main
+}
+
# Concurrency test template:
# - add all the elements
# - start a thread for each physical thread that:
@@ -1380,14 +1640,17 @@ test_timeout() {
range_size=1
for i in $(seq "$start" $((start + count))); do
+ local elem=""
+
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- add "$(format)" || return 1
+ elem="$(format)"
+ add "$elem" || return 1
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$elem" "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
@@ -1545,7 +1808,7 @@ test_bug_reload() {
srcend=$((end + src_delta))
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$(format)" "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
@@ -1555,6 +1818,142 @@ test_bug_reload() {
nft flush ruleset
}
+# - add ranged element, check that packets match it
+# - delete element again, check it is gone
+test_bug_net_port_proto_match() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+ rstart=${start}
+
+ range_size=1
+ for i in $(seq 1 10); do
+ for j in $(seq 1 20) ; do
+ local dport=$j
+
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
+
+ # too slow, do not test all addresses
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "before add" || return 1
+
+ nft "add element inet filter test { $elem }" || return 1
+
+ maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "after add" || return 1
+
+ nft "get element inet filter test { $elem }" | grep -q "$elem"
+ if [ $? -ne 0 ];then
+ local got=$(nft "get element inet filter test { $elem }")
+ err "post-add: should have returned $elem but got $got"
+ return 1
+ fi
+
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "out-of-range" || return 1
+ done
+ done
+
+ # recheck after set was filled
+ for i in $(seq 1 10); do
+ for j in $(seq 1 20) ; do
+ local dport=$j
+
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
+
+ nft "get element inet filter test { $elem }" | grep -q "$elem"
+ if [ $? -ne 0 ];then
+ local got=$(nft "get element inet filter test { $elem }")
+ err "post-fill: should have returned $elem but got $got"
+ return 1
+ fi
+
+ maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "recheck" || return 1
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "recheck out-of-range" || return 1
+ done
+ done
+
+ # random del and re-fetch
+ for i in $(seq 1 10); do
+ for j in $(seq 1 20) ; do
+ local rnd=$((RANDOM%10))
+ local dport=$j
+ local got=""
+
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
+ if [ $rnd -gt 0 ];then
+ continue
+ fi
+
+ nft "delete element inet filter test { $elem }"
+ got=$(nft "get element inet filter test { $elem }" 2>/dev/null)
+ if [ $? -eq 0 ];then
+ err "post-delete: query for $elem returned $got instead of error."
+ return 1
+ fi
+
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "match after deletion" || return 1
+ done
+ done
+
+ nft flush ruleset
+}
+
+test_bug_avx2_mismatch()
+{
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ local a1="fe80:dead:01ff:0a02:0b03:6007:8009:a001"
+ local a2="fe80:dead:01fe:0a02:0b03:6007:8009:a001"
+
+ nft "add element inet filter test { icmpv6 . $a1 }"
+
+ dst_addr6="$a2"
+ send_icmp6
+
+ if [ "$(count_packets "{ icmpv6 . $a1 }")" -gt "0" ]; then
+ err "False match for $a2"
+ return 1
+ fi
+}
+
+test_bug_doublecreate()
+{
+ local elements="1.2.3.4 . 1.2.4.1, 1.2.4.1 . 1.2.3.4"
+ local ret=1
+ local i
+
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ add "{ $elements }" || return 1
+ # expected to work: 'add' on existing should be no-op.
+ add "{ $elements }" || return 1
+
+ # 'create' should return an error.
+ if nft create element inet filter test "{ $elements }" 2>/dev/null; then
+ err "Could create an existing element"
+ return 1
+ fi
+nft -f - <<EOF 2>/dev/null
+flush set inet filter test
+create element inet filter test { $elements }
+create element inet filter test { $elements }
+EOF
+ ret=$?
+ if [ $ret -eq 0 ]; then
+ err "Could create element twice in one transaction"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+
+nft -f - <<EOF 2>/dev/null
+flush set inet filter test
+create element inet filter test { $elements }
+EOF
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ err "Could not flush and re-create element in one transaction"
+ return 1
+ fi
+
+ return 0
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
index ce1451c275fd..04544905c216 100755
--- a/tools/testing/selftests/net/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -3,6 +3,10 @@
# This tests the fib expression.
#
# Kselftest framework requirement - SKIP code is 4.
+#
+# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99
+# dead:1::99 dead:1::1 dead:2::1 dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
source lib.sh
@@ -45,6 +49,19 @@ table inet filter {
EOF
}
+load_input_ruleset() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain input {
+ type filter hook input priority 0; policy accept;
+ fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+ }
+}
+EOF
+}
+
load_pbr_ruleset() {
local netns=$1
@@ -59,6 +76,89 @@ table inet filter {
EOF
}
+load_type_ruleset() {
+ local netns=$1
+
+ for family in ip ip6;do
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table $family filter {
+ chain type_match_in {
+ fib daddr type local counter comment "daddr configured on other iface"
+ fib daddr . iif type local counter comment "daddr configured on iif"
+ fib daddr type unicast counter comment "daddr not local"
+ fib daddr . iif type unicast counter comment "daddr not configured on iif"
+ }
+
+ chain type_match_out {
+ fib daddr type unicast counter
+ fib daddr . oif type unicast counter
+ fib daddr type local counter
+ fib daddr . oif type local counter
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain input {
+ type filter hook input priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain forward {
+ type filter hook forward priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain output {
+ type filter hook output priority 0;
+ icmp type echo-request counter jump type_match_out
+ icmpv6 type echo-request counter jump type_match_out
+ }
+
+ chain postrouting {
+ type filter hook postrouting priority 0;
+ icmp type echo-request counter jump type_match_out
+ icmpv6 type echo-request counter jump type_match_out
+ }
+}
+EOF
+done
+}
+
+reload_type_ruleset() {
+ ip netns exec "$1" nft flush table ip filter
+ ip netns exec "$1" nft flush table ip6 filter
+ load_type_ruleset "$1"
+}
+
+check_fib_type_counter_family() {
+ local family="$1"
+ local want="$2"
+ local ns="$3"
+ local chain="$4"
+ local what="$5"
+ local errmsg="$6"
+
+ if ! ip netns exec "$ns" nft list chain "$family" filter "$chain" | grep "$what" | grep -q "packets $want";then
+ echo "Netns $ns $family fib type counter doesn't match expected packet count of $want for $what $errmsg" 1>&2
+ ip netns exec "$ns" nft list chain "$family" filter "$chain"
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+check_fib_type_counter() {
+ check_fib_type_counter_family "ip" "$@" || return 1
+ check_fib_type_counter_family "ip6" "$@" || return 1
+}
+
load_ruleset_count() {
local netns=$1
@@ -77,6 +177,7 @@ check_drops() {
if dmesg | grep -q ' nft_rpfilter: ';then
dmesg | grep ' nft_rpfilter: '
echo "FAIL: rpfilter did drop packets"
+ ret=1
return 1
fi
@@ -151,19 +252,509 @@ test_ping() {
return 0
}
+test_ping_unreachable() {
+ local daddr4=$1
+ local daddr6=$2
+
+ if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr4" > /dev/null; then
+ echo "FAIL: ${ns1} could reach $daddr4" 1>&2
+ return 1
+ fi
+
+ if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr6" > /dev/null; then
+ echo "FAIL: ${ns1} could reach $daddr6" 1>&2
+ return 1
+ fi
+
+ return 0
+}
+
+test_fib_type() {
+ local notice="$1"
+ local errmsg="addr-on-if"
+ local lret=0
+
+ if ! load_type_ruleset "$nsrouter";then
+ echo "SKIP: Could not load fib type ruleset"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return
+ fi
+
+ # makes router receive packet for addresses configured on incoming
+ # interface.
+ test_ping 10.0.1.1 dead:1::1 || return 1
+
+ # expectation: triggers all 'local' in prerouting/input.
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type local" "$errmsg" || lret=1
+
+ reload_type_ruleset "$nsrouter"
+ # makes router receive packet for address configured on a different (but local)
+ # interface.
+ test_ping 10.0.2.1 dead:2::1 || return 1
+
+ # expectation: triggers 'unicast' in prerouting/input for daddr . iif and local for 'daddr'.
+ errmsg="addr-on-host"
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+ reload_type_ruleset "$nsrouter"
+ test_ping 10.0.2.99 dead:2::99 || return 1
+ errmsg="addr-on-otherhost"
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type unicast" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: fib expression address types match ($notice)"
+ else
+ echo "FAIL: fib expression address types match ($notice)"
+ ret=1
+ fi
+}
+
+test_fib_vrf_dev_add_dummy()
+{
+ if ! ip -net "$nsrouter" link add dummy0 type dummy ;then
+ echo "SKIP: VRF tests: dummy device type not supported"
+ return 1
+ fi
+
+ if ! ip -net "$nsrouter" link add tvrf type vrf table 9876;then
+ echo "SKIP: VRF tests: vrf device type not supported"
+ return 1
+ fi
+
+ ip -net "$nsrouter" link set dummy0 master tvrf
+ ip -net "$nsrouter" link set dummy0 up
+ ip -net "$nsrouter" link set tvrf up
+}
+
+load_ruleset_vrf()
+{
+# Due to the many different possible combinations using named counters
+# or one-rule-per-expected-result is complex.
+#
+# Instead, add dynamic sets for the fib modes
+# (fib address type, fib output interface lookup .. ),
+# and then add the obtained fib results to them.
+#
+# The test is successful if the sets contain the expected results
+# and no unexpected extra entries existed.
+ip netns exec "$nsrouter" nft -f - <<EOF
+flush ruleset
+table inet t {
+ set fibif4 {
+ typeof meta iif . ip daddr . fib daddr oif
+ flags dynamic
+ counter
+ }
+
+ set fibif4iif {
+ typeof meta iif . ip daddr . fib daddr . iif oif
+ flags dynamic
+ counter
+ }
+
+ set fibif6 {
+ typeof meta iif . ip6 daddr . fib daddr oif
+ flags dynamic
+ counter
+ }
+
+ set fibif6iif {
+ typeof meta iif . ip6 daddr . fib daddr . iif oif
+ flags dynamic
+ counter
+ }
+
+ set fibtype4 {
+ typeof meta iif . ip daddr . fib daddr type
+ flags dynamic
+ counter
+ }
+
+ set fibtype4iif {
+ typeof meta iif . ip daddr . fib daddr . iif type
+ flags dynamic
+ counter
+ }
+
+ set fibtype6 {
+ typeof meta iif . ip6 daddr . fib daddr type
+ flags dynamic
+ counter
+ }
+
+ set fibtype6iif {
+ typeof meta iif . ip6 daddr . fib daddr . iif type
+ flags dynamic
+ counter
+ }
+
+ chain fib_test {
+ meta nfproto ipv4 jump {
+ add @fibif4 { meta iif . ip daddr . fib daddr oif }
+ add @fibif4iif { meta iif . ip daddr . fib daddr . iif oif }
+ add @fibtype4 { meta iif . ip daddr . fib daddr type }
+ add @fibtype4iif { meta iif . ip daddr . fib daddr . iif type }
+
+ add @fibif4 { meta iif . ip saddr . fib saddr oif }
+ add @fibif4iif { meta iif . ip saddr . fib saddr . iif oif }
+ }
+
+ meta nfproto ipv6 jump {
+ add @fibif6 { meta iif . ip6 daddr . fib daddr oif }
+ add @fibif6iif { meta iif . ip6 daddr . fib daddr . iif oif }
+ add @fibtype6 { meta iif . ip6 daddr . fib daddr type }
+ add @fibtype6iif { meta iif . ip6 daddr . fib daddr . iif type }
+
+ add @fibif6 { meta iif . ip6 saddr . fib saddr oif }
+ add @fibif6iif { meta iif . ip6 saddr . fib saddr . iif oif }
+ }
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority 0;
+ icmp type echo-request counter jump fib_test
+
+ # neighbour discovery to be ignored.
+ icmpv6 type echo-request counter jump fib_test
+ }
+}
+EOF
+
+if [ $? -ne 0 ] ;then
+ echo "SKIP: Could not load ruleset for fib vrf test"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return 1
+fi
+}
+
+check_type()
+{
+ local setname="$1"
+ local iifname="$2"
+ local addr="$3"
+ local type="$4"
+ local count="$5"
+ local lret=0
+
+ [ -z "$count" ] && count=1
+
+ if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then
+ echo "FAIL: did not find $iifname . $addr . $type in $setname with $count packets"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ # do not fail right away, delete entry if it exists so later test that
+ # checks for unwanted keys don't get confused by this *expected* key.
+ lret=1
+ fi
+
+ # delete the entry, this allows to check if anything unexpected appeared
+ # at the end of the test run: all dynamic sets should be empty by then.
+ if ! ip netns exec "$nsrouter" nft delete element inet t "$setname" { "$iifname" . "$addr" . "$type" } ; then
+ echo "FAIL: can't delete $iifname . $addr . $type in $setname"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ return 1
+ fi
+
+ return $lret
+}
+
+check_local()
+{
+ check_type $@ "local" 1
+}
+
+check_unicast()
+{
+ check_type $@ "unicast" 1
+}
+
+check_rpf()
+{
+ check_type $@
+}
+
+check_fib_vrf_sets_empty()
+{
+ local setname=""
+ local lret=0
+
+ # A non-empty set means that we have seen unexpected packets OR
+ # that a fib lookup provided unexpected results.
+ for setname in "fibif4" "fibif4iif" "fibif6" "fibif6iif" \
+ "fibtype4" "fibtype4iif" "fibtype6" "fibtype6iif";do
+ if ip netns exec "$nsrouter" nft list set inet t "$setname" | grep -q elements;then
+ echo "FAIL: $setname not empty"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ lret=1
+ fi
+ done
+
+ return $lret
+}
+
+check_fib_vrf_type()
+{
+ local msg="$1"
+
+ local addr
+ # the incoming interface is always veth0. As its not linked to a VRF,
+ # the 'tvrf' device should NOT show up anywhere.
+ local ifname="veth0"
+ local lret=0
+
+ # local_veth0, local_veth1
+ for addr in "10.0.1.1" "10.0.2.1"; do
+ check_local fibtype4 "$ifname" "$addr" || lret=1
+ check_type fibif4 "$ifname" "$addr" "0" || lret=1
+ done
+ for addr in "dead:1::1" "dead:2::1";do
+ check_local fibtype6 "$ifname" "$addr" || lret=1
+ check_type fibif6 "$ifname" "$addr" "0" || lret=1
+ done
+
+ # when restricted to the incoming interface, 10.0.1.1 should
+ # be 'local', but 10.0.2.1 unicast.
+ check_local fibtype4iif "$ifname" "10.0.1.1" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.1" || lret=1
+
+ # same for the ipv6 addresses.
+ check_local fibtype6iif "$ifname" "dead:1::1" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::1" || lret=1
+
+ # None of these addresses should find a valid route when restricting
+ # to the incoming interface (we ask for daddr - 10.0.1.1/2.1 are
+ # reachable via 'lo'.
+ for addr in "10.0.1.1" "10.0.2.1" "10.9.9.1" "10.9.9.2";do
+ check_type fibif4iif "$ifname" "$addr" "0" || lret=1
+ done
+
+ # expect default route (veth1), dummy0 is part of VRF but iif isn't.
+ for addr in "10.9.9.1" "10.9.9.2";do
+ check_unicast fibtype4 "$ifname" "$addr" || lret=1
+ check_unicast fibtype4iif "$ifname" "$addr" || lret=1
+ check_type fibif4 "$ifname" "$addr" "veth1" || lret=1
+ done
+ for addr in "dead:9::1" "dead:9::2";do
+ check_unicast fibtype6 "$ifname" "$addr" || lret=1
+ check_unicast fibtype6iif "$ifname" "$addr" || lret=1
+ check_type fibif6 "$ifname" "$addr" "veth1" || lret=1
+ done
+
+ # same for the IPv6 equivalent addresses.
+ for addr in "dead:1::1" "dead:2::1" "dead:9::1" "dead:9::2";do
+ check_type fibif6iif "$ifname" "$addr" "0" || lret=1
+ done
+
+ check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+ check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1
+ check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1
+ check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1
+ check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+ check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+ check_rpf fibif4iif "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+ check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 5 || lret=1
+ check_rpf fibif6iif "$ifname" "dead:1::99" "veth0" 5 || lret=1
+
+ check_fib_vrf_sets_empty || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "FAIL: $msg"
+ ret=1
+ fi
+}
+
+check_fib_veth_vrf_type()
+{
+ local msg="$1"
+
+ local addr
+ local ifname
+ local setname
+ local lret=0
+
+ # as veth0 is now part of tvrf interface, packets will be seen
+ # twice, once with iif veth0, then with iif tvrf.
+
+ for ifname in "veth0" "tvrf"; do
+ for addr in "10.0.1.1" "10.9.9.1"; do
+ check_local fibtype4 "$ifname" "$addr" || lret=1
+ # addr local, but nft_fib doesn't return routes with RTN_LOCAL.
+ check_type fibif4 "$ifname" "$addr" 0 || lret=1
+ check_type fibif4iif "$ifname" "$addr" 0 || lret=1
+ done
+
+ for addr in "dead:1::1" "dead:9::1"; do
+ check_local fibtype6 "$ifname" "$addr" || lret=1
+ # same, address is local but no route is returned for lo.
+ check_type fibif6 "$ifname" "$addr" 0 || lret=1
+ check_type fibif6iif "$ifname" "$addr" 0 || lret=1
+ done
+
+ for t in fibtype4 fibtype4iif; do
+ check_unicast "$t" "$ifname" 10.9.9.2 || lret=1
+ done
+ for t in fibtype6 fibtype6iif; do
+ check_unicast "$t" "$ifname" dead:9::2 || lret=1
+ done
+
+ check_unicast fibtype4iif "$ifname" "10.9.9.1" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:9::1" || lret=1
+
+ check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+
+ check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+ check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1
+ check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1
+ check_type fibif4 "$ifname" "10.9.9.2" "dummy0" || lret=1
+ check_type fibif6 "$ifname" "dead:9::2" "dummy0" || lret=1
+
+ # restricted to iif -- MUST NOT provide result, its != $ifname.
+ check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1
+ check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+ check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 4 || lret=1
+ check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 4 || lret=1
+ check_rpf fibif4iif "$ifname" "10.0.1.99" "$ifname" 4 || lret=1
+ check_rpf fibif6iif "$ifname" "dead:1::99" "$ifname" 4 || lret=1
+ done
+
+ check_local fibtype4iif "veth0" "10.0.1.1" || lret=1
+ check_local fibtype6iif "veth0" "dead:1::1" || lret=1
+
+ check_unicast fibtype4iif "tvrf" "10.0.1.1" || lret=1
+ check_unicast fibtype6iif "tvrf" "dead:1::1" || lret=1
+
+ # 10.9.9.2 should not provide a result for iif veth, but
+ # should when iif is tvrf.
+ # This is because its reachable via dummy0 which is part of
+ # tvrf. iif veth0 MUST conceal the dummy0 result (i.e. return oif 0).
+ check_type fibif4iif "veth0" "10.9.9.2" 0 || lret=1
+ check_type fibif6iif "veth0" "dead:9::2" 0 || lret=1
+
+ check_type fibif4iif "tvrf" "10.9.9.2" "tvrf" || lret=1
+ check_type fibif6iif "tvrf" "dead:9::2" "tvrf" || lret=1
+
+ check_fib_vrf_sets_empty || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "FAIL: $msg"
+ ret=1
+ fi
+}
+
+# Extends nsrouter config by adding dummy0+vrf.
+#
+# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99
+# dead:1::99 dead:1::1 dead:2::1 dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
+# [dummy0]
+# 10.9.9.1
+# dead:9::1
+# [tvrf]
+test_fib_vrf()
+{
+ local cntname=""
+
+ if ! test_fib_vrf_dev_add_dummy; then
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return
+ fi
+
+ ip -net "$nsrouter" addr add "10.9.9.1"/24 dev dummy0
+ ip -net "$nsrouter" addr add "dead:9::1"/64 dev dummy0 nodad
+
+ ip -net "$nsrouter" route add default via 10.0.2.99
+ ip -net "$nsrouter" route add default via dead:2::99
+
+ load_ruleset_vrf || return
+
+ # no echo reply for these addresses: The dummy interface is part of tvrf,
+ # but veth0 (incoming interface) isn't linked to it.
+ test_ping_unreachable "10.9.9.1" "dead:9::1" &
+ test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+ # expect replies from these.
+ test_ping "10.0.1.1" "dead:1::1"
+ test_ping "10.0.2.1" "dead:2::1"
+ test_ping "10.0.2.99" "dead:2::99"
+
+ wait
+
+ check_fib_vrf_type "fib expression address types match (iif not in vrf)"
+
+ # second round: this time, make veth0 (rx interface) part of the vrf.
+ # 10.9.9.1 / dead:9::1 become reachable from ns1, while ns2
+ # becomes unreachable.
+ ip -net "$nsrouter" link set veth0 master tvrf
+ ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ # this reload should not be needed, but in case
+ # there is some error (missing or unexpected entry) this will prevent them
+ # from leaking into round 2.
+ load_ruleset_vrf || return
+
+ test_ping "10.0.1.1" "dead:1::1"
+ test_ping "10.9.9.1" "dead:9::1"
+
+ # ns2 should no longer be reachable (veth1 not in vrf)
+ test_ping_unreachable "10.0.2.99" "dead:2::99" &
+
+ # vrf via dummy0, but host doesn't exist
+ test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+ wait
+
+ check_fib_veth_vrf_type "fib expression address types match (iif in vrf)"
+}
+
ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
-ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
-ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
test_ping 10.0.2.1 dead:2::1 || exit 1
-check_drops || exit 1
+check_drops
test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not cause unwanted packet drops"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1
+check_drops
+
+test_ping 10.0.1.99 dead:1::99
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not discard loopback packets"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1 || exit 1
check_drops || exit 1
-echo "PASS: fib expression did not cause unwanted packet drops"
+test_ping 10.0.1.99 dead:1::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not discard loopback packets"
ip netns exec "$nsrouter" nft flush table inet filter
@@ -213,7 +804,7 @@ ip -net "$nsrouter" addr del dead:2::1/64 dev veth0
# ... pbr ruleset for the router, check iif+oif.
if ! load_pbr_ruleset "$nsrouter";then
echo "SKIP: Could not load fib forward ruleset"
- exit $ksft_skip
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
fi
ip -net "$nsrouter" rule add from all table 128
@@ -224,11 +815,36 @@ ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1
# drop main ipv4 table
ip -net "$nsrouter" -4 rule delete table main
-if ! test_ping 10.0.2.99 dead:2::99;then
- ip -net "$nsrouter" nft list ruleset
- echo "FAIL: fib mismatch in pbr setup"
- exit 1
+if test_ping 10.0.2.99 dead:2::99;then
+ echo "PASS: fib expression forward check with policy based routing"
+else
+ echo "FAIL: fib expression forward check with policy based routing"
+ ret=1
fi
-echo "PASS: fib expression forward check with policy based routing"
-exit 0
+test_fib_type "policy routing"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+# Un-do policy routing changes
+ip -net "$nsrouter" rule del from all table 128
+ip -net "$nsrouter" rule del from all iif veth0 table 129
+
+ip -net "$nsrouter" route del table 128 to 10.0.1.0/24 dev veth0
+ip -net "$nsrouter" route del table 129 to 10.0.2.0/24 dev veth1
+
+ip -net "$ns1" -4 route del default
+ip -net "$ns1" -6 route del default
+
+ip -net "$ns1" -4 route add default via 10.0.1.1
+ip -net "$ns1" -6 route add default via dead:1::1
+
+ip -net "$nsrouter" -4 rule add from all table main priority 32766
+
+test_fib_type "default table"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+test_fib_vrf
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index b3995550856a..a68bc882fa4e 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -20,6 +20,7 @@ ret=0
SOCAT_TIMEOUT=60
nsin=""
+nsin_small=""
ns1out=""
ns2out=""
@@ -36,7 +37,7 @@ cleanup() {
cleanup_all_ns
- rm -f "$nsin" "$ns1out" "$ns2out"
+ rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out"
[ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
}
@@ -71,6 +72,9 @@ omtu=9000
lmtu=1500
rmtu=2000
+filesize=$((2 * 1024 * 1024))
+filesize_small=$((filesize / 16))
+
usage(){
echo "nft_flowtable.sh [OPTIONS]"
echo
@@ -81,12 +85,16 @@ usage(){
exit 1
}
-while getopts "o:l:r:" o
+while getopts "o:l:r:s:" o
do
case $o in
o) omtu=$OPTARG;;
l) lmtu=$OPTARG;;
r) rmtu=$OPTARG;;
+ s)
+ filesize=$OPTARG
+ filesize_small=$((OPTARG / 16))
+ ;;
*) usage;;
esac
done
@@ -119,6 +127,8 @@ ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad
ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0
ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad
+ip netns exec "$nsr1" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsr2" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
for i in 0 1; do
ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
@@ -145,7 +155,9 @@ ip -net "$ns1" route add default via dead:1::1
ip -net "$ns2" route add default via dead:2::1
ip -net "$nsr1" route add default via 192.168.10.2
+ip -6 -net "$nsr1" route add default via fee1:2::2
ip -net "$nsr2" route add default via 192.168.10.1
+ip -6 -net "$nsr2" route add default via fee1:2::1
ip netns exec "$nsr1" nft -f - <<EOF
table inet filter {
@@ -212,23 +224,16 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
fi
nsin=$(mktemp)
+nsin_small=$(mktemp)
ns1out=$(mktemp)
ns2out=$(mktemp)
make_file()
{
- name=$1
-
- SIZE=$((RANDOM % (1024 * 128)))
- SIZE=$((SIZE + (1024 * 8)))
- TSIZE=$((SIZE * 1024))
-
- dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+ name="$1"
+ sz="$2"
- SIZE=$((RANDOM % 1024))
- SIZE=$((SIZE + 128))
- TSIZE=$((TSIZE + SIZE))
- dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
+ head -c "$sz" < /dev/urandom > "$name"
}
check_counters()
@@ -246,18 +251,18 @@ check_counters()
local fs
fs=$(du -sb "$nsin")
local max_orig=${fs%%/*}
- local max_repl=$((max_orig/4))
+ local max_repl=$((max_orig))
# flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook
# should always be lower than the size of the transmitted file (max_orig).
if [ "$orig_cnt" -gt "$max_orig" ];then
- echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2
+ echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig, reply counter $repl_cnt" 1>&2
ret=1
ok=0
fi
if [ "$repl_cnt" -gt $max_repl ];then
- echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2
+ echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl, original counter $orig_cnt" 1>&2
ret=1
ok=0
fi
@@ -270,6 +275,7 @@ check_counters()
check_dscp()
{
local what=$1
+ local pmtud="$2"
local ok=1
local counter
@@ -282,37 +288,39 @@ check_dscp()
local pc4z=${counter%*bytes*}
local pc4z=${pc4z#*packets}
+ local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected"
+
case "$what" in
"dscp_none")
if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_fwd")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_ingress")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_egress")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
*)
- echo "FAIL: Unknown DSCP check" 1>&2
+ echo "$failmsg: Unknown DSCP check" 1>&2
ret=1
ok=0
esac
@@ -324,9 +332,9 @@ check_dscp()
check_transfer()
{
- in=$1
- out=$2
- what=$3
+ local in=$1
+ local out=$2
+ local what=$3
if ! cmp "$in" "$out" > /dev/null 2>&1; then
echo "FAIL: file mismatch for $what" 1>&2
@@ -347,25 +355,42 @@ test_tcp_forwarding_ip()
{
local nsa=$1
local nsb=$2
- local dstip=$3
- local dstport=$4
+ local pmtu=$3
+ local proto=$4
+ local dstip=$5
+ local dstport=$6
local lret=0
+ local socatc
+ local socatl
+ local infile="$nsin"
+
+ if [ $pmtu -eq 0 ]; then
+ infile="$nsin_small"
+ fi
- timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" &
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -${proto} \
+ TCP"${proto}"-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
lpid=$!
busywait 1000 listener_ready
- timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out"
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -${proto} \
+ TCP"${proto}":"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
+ socatc=$?
wait $lpid
+ socatl=$?
- if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
+ if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then
+ rc=1
+ fi
+
+ if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then
lret=1
ret=1
fi
- if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
+ if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then
lret=1
ret=1
fi
@@ -375,14 +400,22 @@ test_tcp_forwarding_ip()
test_tcp_forwarding()
{
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ local pmtu="$3"
+ local proto="$4"
+ local dstip="$5"
+ local dstport="$6"
+
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
return $?
}
test_tcp_forwarding_set_dscp()
{
- check_dscp "dscp_none"
+ local pmtu="$3"
+ local proto="$4"
+ local dstip="$5"
+ local dstport="$6"
ip netns exec "$nsr1" nft -f - <<EOF
table netdev dscpmangle {
@@ -393,8 +426,8 @@ table netdev dscpmangle {
}
EOF
if [ $? -eq 0 ]; then
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_ingress"
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
+ check_dscp "dscp_ingress" "$pmtu"
ip netns exec "$nsr1" nft delete table netdev dscpmangle
else
@@ -410,10 +443,10 @@ table netdev dscpmangle {
}
EOF
if [ $? -eq 0 ]; then
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_egress"
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
+ check_dscp "dscp_egress" "$pmtu"
- ip netns exec "$nsr1" nft flush table netdev dscpmangle
+ ip netns exec "$nsr1" nft delete table netdev dscpmangle
else
echo "SKIP: Could not load netdev:egress for veth1"
fi
@@ -421,48 +454,53 @@ fi
# partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
# counters should have seen packets (before and after ft offload kicks in).
ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_fwd"
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
+ check_dscp "dscp_fwd" "$pmtu"
}
test_tcp_forwarding_nat()
{
+ local nsa="$1"
+ local nsb="$2"
+ local pmtu="$3"
+ local what="$4"
local lret
- local pmtu
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- lret=$?
+ [ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)"
- pmtu=$3
- what=$4
+ test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 4 10.0.2.99 12345
+ lret=$?
if [ "$lret" -eq 0 ] ; then
if [ "$pmtu" -eq 1 ] ;then
- check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
+ check_counters "flow offload for ns1/ns2 with masquerade $what"
else
echo "PASS: flow offload for ns1/ns2 with masquerade $what"
fi
- test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 4 10.6.6.6 1666
lret=$?
if [ "$pmtu" -eq 1 ] ;then
- check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
+ check_counters "flow offload for ns1/ns2 with dnat $what"
elif [ "$lret" -eq 0 ] ; then
echo "PASS: flow offload for ns1/ns2 with dnat $what"
fi
+ else
+ echo "FAIL: flow offload for ns1/ns2 with dnat $what"
fi
return $lret
}
-make_file "$nsin"
+make_file "$nsin" "$filesize"
+make_file "$nsin_small" "$filesize_small"
# First test:
# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
# Due to MTU mismatch in both directions, all packets (except small packets like pure
# acks) have to be handled by normal forwarding path. Therefore, packet counters
# are not checked.
-if test_tcp_forwarding "$ns1" "$ns2"; then
+if test_tcp_forwarding "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -470,6 +508,14 @@ else
ret=1
fi
+if test_tcp_forwarding "$ns1" "$ns2" 0 6 "[dead:2::99]" 12345; then
+ echo "PASS: IPv6 flow offloaded for ns1/ns2"
+else
+ echo "FAIL: IPv6 flow offload for ns1/ns2:" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
# delete default route, i.e. ns2 won't be able to reach ns1 and
# will depend on ns1 being masqueraded in nsr1.
# expect ns1 has nsr1 address.
@@ -494,8 +540,9 @@ table ip nat {
}
EOF
-if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
- echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
+check_dscp "dscp_none" "0"
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
+ echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2
exit 0
fi
@@ -518,12 +565,87 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
# For earlier tests (large mtus), packets cannot be handled via flowtable
# (except pure acks and other small packets).
ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
+ip netns exec "$ns2" nft reset counters table inet filter >/dev/null
+
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
+ echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2
+ exit 0
+fi
+
+ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
ip netns exec "$nsr1" nft list ruleset
fi
+# IPIP tunnel test:
+# Add IPIP tunnel interfaces and check flowtable acceleration.
+test_ipip() {
+if ! ip -net "$nsr1" link add name tun0 type ipip \
+ local 192.168.10.1 remote 192.168.10.2 >/dev/null;then
+ echo "SKIP: could not add ipip tunnel"
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
+ return
+fi
+ip -net "$nsr1" link set tun0 up
+ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
+ip -net "$nsr2" link set tun0 up
+ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+
+ip -net "$nsr1" route change default via 192.168.100.2
+ip -net "$nsr2" route change default via 192.168.100.1
+ip -net "$ns2" route add default via 10.0.2.1
+
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
+ip netns exec "$nsr1" nft -a insert rule inet filter forward \
+ 'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
+ echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
+# Create vlan tagged devices for IPIP traffic.
+ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
+ip -net "$nsr1" link set veth1.10 up
+ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
+ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
+ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
+ip -net "$nsr1" link set tun1 up
+ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
+ip -net "$nsr1" route change default via 192.168.200.2
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
+
+ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
+ip -net "$nsr2" link set veth0.10 up
+ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
+ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
+ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
+ip -net "$nsr2" link set tun1 up
+ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
+ip -net "$nsr2" route change default via 192.168.200.1
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
+ echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
+# Restore the previous configuration
+ip -net "$nsr1" route change default via 192.168.10.2
+ip -net "$nsr2" route change default via 192.168.10.1
+ip -net "$ns2" route del default via 10.0.2.1
+}
+
# Another test:
# Add bridge interface br0 to Router1, with NAT enabled.
test_bridge() {
@@ -609,6 +731,8 @@ ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
ip -net "$nsr1" link set up dev veth0
}
+test_ipip
+
test_bridge
KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
@@ -649,7 +773,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
ip -net "$ns2" route add default via 10.0.2.1
ip -net "$ns2" route add default via dead:2::1
-if test_tcp_forwarding "$ns1" "$ns2"; then
+if test_tcp_forwarding "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
check_counters "ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
@@ -657,6 +781,14 @@ else
ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
fi
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+ check_counters "IPv6 ipsec tunnel mode for ns1/ns2"
+else
+ echo "FAIL: IPv6 ipsec tunnel mode for ns1/ns2"
+ ip netns exec "$nsr1" nft list ruleset 1>&2
+ ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
+fi
+
if [ "$1" = "" ]; then
low=1280
mtu=$((65536 - low))
@@ -664,8 +796,16 @@ if [ "$1" = "" ]; then
l=$(((RANDOM%mtu) + low))
r=$(((RANDOM%mtu) + low))
- echo "re-run with random mtus: -o $o -l $l -r $r"
- $0 -o "$o" -l "$l" -r "$r"
+ MINSIZE=$((2 * 1000 * 1000))
+ MAXSIZE=$((64 * 1000 * 1000))
+
+ filesize=$(((RANDOM * RANDOM) % MAXSIZE))
+ if [ "$filesize" -lt "$MINSIZE" ]; then
+ filesize=$((filesize+MINSIZE))
+ fi
+
+ echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize"
+ $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1
fi
exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
new file mode 100755
index 000000000000..c0fffaa6dbd9
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
@@ -0,0 +1,157 @@
+#!/bin/bash -e
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+# Torture nftables' netdevice notifier callbacks and related code by frequent
+# renaming of interfaces which netdev-family chains and flowtables hook into.
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+checktool "iperf3 --version" "run test without iperf3 tool"
+
+read kernel_tainted < /proc/sys/kernel/tainted
+
+# how many seconds to torture the kernel?
+# default to 80% of max run time but don't exceed 48s
+TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10))
+[[ $TEST_RUNTIME -gt 48 ]] && TEST_RUNTIME=48
+
+trap "cleanup_all_ns" EXIT
+
+setup_ns nsc nsr nss
+
+ip -net $nsc link add cr0 type veth peer name rc0 netns $nsr
+ip -net $nsc addr add 10.0.0.1/24 dev cr0
+ip -net $nsc link set cr0 up
+ip -net $nsc route add default via 10.0.0.2
+
+ip -net $nss link add sr0 type veth peer name rs0 netns $nsr
+ip -net $nss addr add 10.1.0.1/24 dev sr0
+ip -net $nss link set sr0 up
+ip -net $nss route add default via 10.1.0.2
+
+ip -net $nsr addr add 10.0.0.2/24 dev rc0
+ip -net $nsr link set rc0 up
+ip -net $nsr addr add 10.1.0.2/24 dev rs0
+ip -net $nsr link set rs0 up
+ip netns exec $nsr sysctl -q net.ipv4.ip_forward=1
+ip netns exec $nsr sysctl -q net.ipv4.conf.all.forwarding=1
+
+{
+ echo "table netdev t {"
+ for ((i = 0; i < 10; i++)); do
+ cat <<-EOF
+ chain chain_rc$i {
+ type filter hook ingress device rc$i priority 0
+ counter
+ }
+ chain chain_rs$i {
+ type filter hook ingress device rs$i priority 0
+ counter
+ }
+ EOF
+ done
+ echo "}"
+ echo "table ip t {"
+ for ((i = 0; i < 10; i++)); do
+ cat <<-EOF
+ flowtable ft_${i} {
+ hook ingress priority 0
+ devices = { rc$i, rs$i }
+ }
+ EOF
+ done
+ echo "chain c {"
+ echo "type filter hook forward priority 0"
+ for ((i = 0; i < 10; i++)); do
+ echo -n "iifname rc$i oifname rs$i "
+ echo "ip protocol tcp counter flow add @ft_${i}"
+ done
+ echo "counter"
+ echo "}"
+ echo "}"
+} | ip netns exec $nsr nft -f - || {
+ echo "SKIP: Could not load nft ruleset"
+ exit $ksft_skip
+}
+
+for ((o=0, n=1; ; o=n, n++, n %= 10)); do
+ ip -net $nsr link set rc$o name rc$n
+ ip -net $nsr link set rs$o name rs$n
+done &
+rename_loop_pid=$!
+
+while true; do ip netns exec $nsr nft list ruleset >/dev/null 2>&1; done &
+nft_list_pid=$!
+
+ip netns exec $nsr nft monitor >/dev/null &
+nft_monitor_pid=$!
+
+ip netns exec $nss iperf3 --server --daemon -1
+summary_expr='s,^\[SUM\] .* \([0-9\.]\+\) Kbits/sec .* receiver,\1,p'
+rate=$(ip netns exec $nsc iperf3 \
+ --format k -c 10.1.0.1 --time $TEST_RUNTIME \
+ --length 56 --parallel 10 -i 0 | sed -n "$summary_expr")
+
+kill $nft_list_pid
+kill $nft_monitor_pid
+kill $rename_loop_pid
+wait
+
+wildcard_prep() {
+ ip netns exec $nsr nft -f - <<EOF
+table ip t {
+ flowtable ft_wild {
+ hook ingress priority 0
+ devices = { wild* }
+ }
+}
+EOF
+}
+
+if ! wildcard_prep; then
+ echo "SKIP wildcard tests: not supported by host's nft?"
+else
+ for ((i = 0; i < 100; i++)); do
+ ip -net $nsr link add wild$i type dummy &
+ done
+ wait
+ for ((i = 80; i < 100; i++)); do
+ ip -net $nsr link del wild$i &
+ done
+ for ((i = 0; i < 80; i++)); do
+ ip -net $nsr link del wild$i &
+ done
+ wait
+ for ((i = 0; i < 100; i += 10)); do
+ (
+ for ((j = 0; j < 10; j++)); do
+ ip -net $nsr link add wild$((i + j)) type dummy
+ done
+ for ((j = 0; j < 10; j++)); do
+ ip -net $nsr link del wild$((i + j))
+ done
+ ) &
+ done
+ wait
+fi
+
+
+[[ $kernel_tainted -eq 0 && $(</proc/sys/kernel/tainted) -ne 0 ]] && {
+ echo "FAIL: Kernel is tainted!"
+ exit $ksft_fail
+}
+
+[[ $rate -gt 0 ]] || {
+ echo "FAIL: Zero throughput in iperf3"
+ exit $ksft_fail
+}
+
+[[ -f /sys/kernel/debug/kmemleak && \
+ -n $(</sys/kernel/debug/kmemleak) ]] && {
+ echo "FAIL: non-empty kmemleak report"
+ exit $ksft_fail
+}
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh
index 9e39de26455f..b3ec2d0a3f56 100755
--- a/tools/testing/selftests/net/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat.sh
@@ -569,7 +569,7 @@ test_redirect6()
ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
- echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6"
+ echo "ERROR: cannot ping $ns1 from $ns2 via ipv6"
lret=1
fi
@@ -859,13 +859,31 @@ EOF
# from router:service bypass connection tracking.
test_port_shadow_notrack "$family"
- # test nat based mitigation: fowarded packets coming from service port
+ # test nat based mitigation: forwarded packets coming from service port
# are masqueraded with random highport.
test_port_shadow_pat "$family"
ip netns exec "$ns0" nft delete table $family nat
}
+file_cmp()
+{
+ local infile="$1"
+ local outfile="$2"
+
+ if ! cmp "$infile" "$outfile";then
+ echo -n "Infile "
+ ls -l "$infile"
+ echo -n "Outfile "
+ ls -l "$outfile"
+ echo "ERROR: in and output file mismatch when checking $msg" 1>&1
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
test_stateless_nat_ip()
{
local lret=0
@@ -966,11 +984,7 @@ EOF
wait
- if ! cmp "$INFILE" "$OUTFILE";then
- ls -l "$INFILE" "$OUTFILE"
- echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
- lret=1
- fi
+ file_cmp "$INFILE" "$OUTFILE" "udp with stateless nat" || lret=1
:> "$OUTFILE"
@@ -991,6 +1005,62 @@ EOF
return $lret
}
+test_dnat_clash()
+{
+ local lret=0
+
+ if ! socat -h > /dev/null 2>&1;then
+ echo "SKIP: Could not run dnat clash test without socat tool"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return $ksft_skip
+ fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+flush ruleset
+table ip dnat-test {
+ chain prerouting {
+ type nat hook prerouting priority dstnat; policy accept;
+ ip daddr 10.0.2.1 udp dport 1234 counter dnat to 10.0.1.1:1234
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add dnat rules"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return $ksft_skip
+ fi
+
+ local udpdaddr="10.0.2.1"
+ for i in 1 2;do
+ echo "PING $udpdaddr" > "$INFILE"
+ echo "PONG 10.0.1.1 step $i" | ip netns exec "$ns0" timeout 3 socat STDIO UDP4-LISTEN:1234,bind=10.0.1.1 > "$OUTFILE" 2>/dev/null &
+ local lpid=$!
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1234 "-u"
+
+ result=$(ip netns exec "$ns1" timeout 3 socat STDIO UDP4-SENDTO:"$udpdaddr:1234,sourceport=4321" < "$INFILE")
+ udpdaddr="10.0.1.1"
+
+ if [ "$result" != "PONG 10.0.1.1 step $i" ] ; then
+ echo "ERROR: failed to test udp $ns1 to $ns2 with dnat rule step $i, result: \"$result\"" 1>&2
+ lret=1
+ ret=1
+ fi
+
+ wait
+
+ file_cmp "$INFILE" "$OUTFILE" "udp dnat step $i" || lret=1
+
+ :> "$OUTFILE"
+ done
+
+ test $lret -eq 0 && echo "PASS: IP dnat clash $ns1:$ns2"
+
+ ip netns exec "$ns0" nft flush ruleset
+
+ return $lret
+}
+
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in "$ns0" "$ns1" "$ns2" ;do
ip netns exec "$i" nft -f /dev/stdin <<EOF
@@ -1147,6 +1217,7 @@ $test_inet_nat && test_redirect6 inet
test_port_shadowing
test_stateless_nat_ip
+test_dnat_clash
if [ $ret -ne 0 ];then
echo -n "FAIL: "
diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
index 3b81d88bdde3..9f200f80253a 100755
--- a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
@@ -88,7 +88,6 @@ for i in $(seq 1 "$maxclients");do
echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
echo netns exec "$gw" ip link set "veth$i" up
echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2
- echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0
# clients have same IP addresses.
echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
@@ -178,7 +177,6 @@ fi
ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
# useful for debugging: allows to use 'ping' from clients to gateway.
ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index c61d23a8c88d..6136ceec45e0 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -8,7 +8,9 @@
source lib.sh
ret=0
-timeout=2
+timeout=5
+
+SCTP_TEST_TIMEOUT=60
cleanup()
{
@@ -25,10 +27,13 @@ cleanup()
}
checktool "nft --version" "test without nft tool"
+checktool "socat -h" "run test without socat"
+
+modprobe -q sctp
trap cleanup EXIT
-setup_ns ns1 ns2 nsrouter
+setup_ns ns1 ns2 ns3 nsrouter
TMPFILE0=$(mktemp)
TMPFILE1=$(mktemp)
@@ -36,13 +41,16 @@ TMPFILE2=$(mktemp)
TMPFILE3=$(mktemp)
TMPINPUT=$(mktemp)
-dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
+COUNT=200
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=$((COUNT/8))
+dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT"
if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
echo "SKIP: No virtual ethernet pair device support in kernel"
exit $ksft_skip
fi
ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3"
ip -net "$nsrouter" link set veth0 up
ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
@@ -52,8 +60,13 @@ ip -net "$nsrouter" link set veth1 up
ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
+ip -net "$nsrouter" link set veth2 up
+ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2
+ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad
+
ip -net "$ns1" link set eth0 up
ip -net "$ns2" link set eth0 up
+ip -net "$ns3" link set eth0 up
ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
@@ -65,6 +78,11 @@ ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
ip -net "$ns2" route add default via 10.0.2.1
ip -net "$ns2" route add default via dead:2::1
+ip -net "$ns3" addr add 10.0.3.99/24 dev eth0
+ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad
+ip -net "$ns3" route add default via 10.0.3.1
+ip -net "$ns3" route add default via dead:3::1
+
load_ruleset() {
local name=$1
local prio=$2
@@ -250,45 +268,52 @@ listener_ready()
test_tcp_forward()
{
- ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" &
+ ip netns exec "$nsrouter" ./nf_queue -q 2 &
local nfqpid=$!
timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
local rpid=$!
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2
+
+ local tthen=$(date +%s)
ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
- wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+ wait_and_check_retval "$rpid" "tcp and nfqueue in forward chain" "$tthen"
+ kill "$nfqpid"
}
test_tcp_localhost()
{
- dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
local rpid=$!
- ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+ ip netns exec "$nsrouter" ./nf_queue -q 3 &
local nfqpid=$!
+ local tthen=$(date +%s)
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
- wait "$rpid" && echo "PASS: tcp via loopback"
- wait 2>/dev/null
+ wait_and_check_retval "$rpid" "tcp via loopback" "$tthen"
+ kill "$nfqpid"
}
test_tcp_localhost_connectclose()
{
- ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" &
- ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+ ip netns exec "$nsrouter" ./nf_queue -q 3 &
+ local nfqpid=$!
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
+ timeout 10 ip netns exec "$nsrouter" ./connect_close -p 23456 -t 3
+
+ kill "$nfqpid"
wait && echo "PASS: tcp via loopback with connect/close"
- wait 2>/dev/null
}
test_tcp_localhost_requeue()
@@ -353,7 +378,7 @@ table inet filter {
}
}
EOF
- ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" &
+ ip netns exec "$ns1" ./nf_queue -q 1 &
local nfqpid=$!
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1
@@ -363,6 +388,7 @@ EOF
for n in output post; do
for d in tvrf eth0; do
if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then
+ kill "$nfqpid"
echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
ip netns exec "$ns1" nft list ruleset
ret=1
@@ -371,8 +397,192 @@ EOF
done
done
- wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf"
- wait 2>/dev/null
+ kill "$nfqpid"
+ echo "PASS: icmp+nfqueue via vrf"
+}
+
+sctp_listener_ready()
+{
+ ss -S -N "$1" -ln -o "sport = :12345" | grep -q 12345
+}
+
+check_output_files()
+{
+ local f1="$1"
+ local f2="$2"
+ local err="$3"
+
+ if ! cmp "$f1" "$f2" ; then
+ echo "FAIL: $err: input and output file differ" 1>&2
+ echo -n " Input file" 1>&2
+ ls -l "$f1" 1>&2
+ echo -n "Output file" 1>&2
+ ls -l "$f2" 1>&2
+ ret=1
+ fi
+}
+
+wait_and_check_retval()
+{
+ local rpid="$1"
+ local msg="$2"
+ local tthen="$3"
+ local tnow=$(date +%s)
+
+ if wait "$rpid";then
+ echo -n "PASS: "
+ else
+ echo -n "FAIL: "
+ ret=1
+ fi
+
+ printf "%s (duration: %ds)\n" "$msg" $((tnow-tthen))
+}
+
+test_sctp_forward()
+{
+ ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet sctpq {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ sctp dport 12345 queue num 10
+ }
+}
+EOF
+ timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+ local rpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
+
+ ip netns exec "$nsrouter" ./nf_queue -q 10 -G &
+ local nfqpid=$!
+ local tthen=$(date +%s)
+
+ ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
+
+ if ! ip netns exec "$nsrouter" nft delete table inet sctpq; then
+ echo "FAIL: Could not delete sctpq table"
+ exit 1
+ fi
+
+ wait_and_check_retval "$rpid" "sctp and nfqueue in forward chain" "$tthen"
+ kill "$nfqpid"
+
+ check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward"
+}
+
+test_sctp_output()
+{
+ ip netns exec "$ns1" nft -f /dev/stdin <<EOF
+table inet sctpq {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ sctp dport 12345 queue num 11
+ }
+}
+EOF
+ # reduce test file size, software segmentation causes sk wmem increase.
+ dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT"
+
+ timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+ local rpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
+
+ ip netns exec "$ns1" ./nf_queue -q 11 &
+ local nfqpid=$!
+ local tthen=$(date +%s)
+
+ ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
+
+ if ! ip netns exec "$ns1" nft delete table inet sctpq; then
+ echo "FAIL: Could not delete sctpq table"
+ exit 1
+ fi
+
+ # must wait before checking completeness of output file.
+ wait_and_check_retval "$rpid" "sctp and nfqueue in output chain with GSO" "$tthen"
+ kill "$nfqpid"
+
+ check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
+}
+
+udp_listener_ready()
+{
+ ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345
+}
+
+output_files_written()
+{
+ test -s "$1" && test -s "$2"
+}
+
+test_udp_ct_race()
+{
+ ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet udpq {
+ chain prerouting {
+ type nat hook prerouting priority dstnat - 5; policy accept;
+ ip daddr 10.6.6.6 udp dport 12345 counter dnat to numgen inc mod 2 map { 0 : 10.0.2.99, 1 : 10.0.3.99 }
+ }
+ chain postrouting {
+ type filter hook postrouting priority srcnat - 5; policy accept;
+ udp dport 12345 counter queue num 12
+ }
+}
+EOF
+ :> "$TMPFILE1"
+ :> "$TMPFILE2"
+
+ timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc &
+ local rpid1=$!
+
+ timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork,pf=ipv4 OPEN:"$TMPFILE2",trunc &
+ local rpid2=$!
+
+ ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 &
+ local nfqpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2"
+ busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3"
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12
+
+ # Send two packets, one should end up in ns1, other in ns2.
+ # This is because nfqueue will delay packet for long enough so that
+ # second packet will not find existing conntrack entry.
+ echo "Packet 1" | ip netns exec "$ns1" socat -u STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221
+ echo "Packet 2" | ip netns exec "$ns1" socat -u STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221
+
+ busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2"
+
+ kill "$nfqpid"
+
+ if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then
+ echo "FAIL: Expected One udp conntrack entry"
+ ip netns exec "$nsrouter" conntrack -L -p udp --dport 12345
+ ret=1
+ fi
+
+ if ! ip netns exec "$nsrouter" nft delete table inet udpq; then
+ echo "FAIL: Could not delete udpq table"
+ ret=1
+ return
+ fi
+
+ NUMLINES1=$(wc -l < "$TMPFILE1")
+ NUMLINES2=$(wc -l < "$TMPFILE2")
+
+ if [ "$NUMLINES1" -ne 1 ] || [ "$NUMLINES2" -ne 1 ]; then
+ ret=1
+ echo "FAIL: uneven udp packet distribution: $NUMLINES1 $NUMLINES2"
+ echo -n "$TMPFILE1: ";cat "$TMPFILE1"
+ echo -n "$TMPFILE2: ";cat "$TMPFILE2"
+ return
+ fi
+
+ echo "PASS: both udp receivers got one packet each"
}
test_queue_removal()
@@ -388,7 +598,7 @@ table ip filter {
}
}
EOF
- ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" &
+ ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 &
local nfqpid=$!
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0
@@ -407,6 +617,7 @@ EOF
echo "PASS: queue program exiting while packets queued"
else
echo "TAINT: queue program exiting while packets queued"
+ dmesg
ret=1
fi
}
@@ -414,6 +625,7 @@ EOF
ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null
load_ruleset "filter" 0
@@ -443,11 +655,17 @@ test_queue 10
# same. We queue to a second program as well.
load_ruleset "filter2" 20
test_queue 20
+ip netns exec "$ns1" nft flush ruleset
test_tcp_forward
test_tcp_localhost
test_tcp_localhost_connectclose
test_tcp_localhost_requeue
+test_sctp_forward
+test_sctp_output
+test_udp_ct_race
+
+# should be last, adds vrf device in ns1 and changes routes
test_icmp_vrf
test_queue_removal
diff --git a/tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh b/tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh
new file mode 100755
index 000000000000..e208fb03eeb7
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh
@@ -0,0 +1,358 @@
+#!/bin/bash
+#
+# This tests tproxy on the following scenario:
+#
+# +------------+
+# +-------+ | nsrouter | +-------+
+# |ns1 |.99 .1| |.1 .99| ns2|
+# | eth0|---------------|veth0 veth1|------------------|eth0 |
+# | | 10.0.1.0/24 | | 10.0.2.0/24 | |
+# +-------+ dead:1::/64 | veth2 | dead:2::/64 +-------+
+# +------------+
+# |.1
+# |
+# |
+# | +-------+
+# | .99| ns3|
+# +------------------------|eth0 |
+# 10.0.3.0/24 | |
+# dead:3::/64 +-------+
+#
+# The tproxy implementation acts as an echo server so the client
+# must receive the same message it sent if it has been proxied.
+# If is not proxied the servers return PONG_NS# with the number
+# of the namespace the server is running.
+#
+# shellcheck disable=SC2162,SC2317
+
+source lib.sh
+ret=0
+timeout=5
+
+cleanup()
+{
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+ ip netns pids "$ns3" | xargs kill 2>/dev/null
+ ip netns pids "$nsrouter" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+}
+
+checktool "nft --version" "test without nft tool"
+checktool "socat -h" "run test without socat"
+
+trap cleanup EXIT
+setup_ns ns1 ns2 ns3 nsrouter
+
+if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3"
+
+ip -net "$nsrouter" link set veth0 up
+ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsrouter" link set veth1 up
+ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
+
+ip -net "$nsrouter" link set veth2 up
+ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2
+ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad
+
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+ip -net "$ns3" link set eth0 up
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" route add default via dead:1::1
+
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+ip -net "$ns3" addr add 10.0.3.99/24 dev eth0
+ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad
+ip -net "$ns3" route add default via 10.0.3.1
+ip -net "$ns3" route add default via dead:3::1
+
+ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null
+
+test_ping() {
+ if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+ return 1
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then
+ return 2
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q 10.0.3.99 > /dev/null; then
+ return 1
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q dead:3::99 > /dev/null; then
+ return 2
+ fi
+
+ return 0
+}
+
+test_ping_router() {
+ if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then
+ return 3
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then
+ return 4
+ fi
+
+ return 0
+}
+
+
+listener_ready()
+{
+ local ns="$1"
+ local port="$2"
+ local proto="$3"
+ ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port"
+}
+
+test_tproxy()
+{
+ local traffic_origin="$1"
+ local ip_proto="$2"
+ local expect_ns1_ns2="$3"
+ local expect_ns1_ns3="$4"
+ local expect_nsrouter_ns2="$5"
+ local expect_nsrouter_ns3="$6"
+
+ # derived variables
+ local testname="test_${ip_proto}_tcp_${traffic_origin}"
+ local socat_ipproto
+ local ns1_ip
+ local ns2_ip
+ local ns3_ip
+ local ns2_target
+ local ns3_target
+ local nftables_subject
+ local ip_command
+
+ # socat 1.8.0 has a bug that requires to specify the IP family to bind (fixed in 1.8.0.1)
+ case $ip_proto in
+ "ip")
+ socat_ipproto="-4"
+ ns1_ip=10.0.1.99
+ ns2_ip=10.0.2.99
+ ns3_ip=10.0.3.99
+ ns2_target="tcp:$ns2_ip:8080"
+ ns3_target="tcp:$ns3_ip:8080"
+ nftables_subject="ip daddr $ns2_ip tcp dport 8080"
+ ip_command="ip"
+ ;;
+ "ip6")
+ socat_ipproto="-6"
+ ns1_ip=dead:1::99
+ ns2_ip=dead:2::99
+ ns3_ip=dead:3::99
+ ns2_target="tcp:[$ns2_ip]:8080"
+ ns3_target="tcp:[$ns3_ip]:8080"
+ nftables_subject="ip6 daddr $ns2_ip tcp dport 8080"
+ ip_command="ip -6"
+ ;;
+ *)
+ echo "FAIL: unsupported protocol"
+ exit 255
+ ;;
+ esac
+
+ case $traffic_origin in
+ # to capture the local originated traffic we need to mark the outgoing
+ # traffic so the policy based routing rule redirects it and can be processed
+ # in the prerouting chain.
+ "local")
+ nftables_rules="
+flush ruleset
+table inet filter {
+ chain divert {
+ type filter hook prerouting priority 0; policy accept;
+ $nftables_subject tproxy $ip_proto to :12345 meta mark set 1 accept
+ }
+ chain output {
+ type route hook output priority 0; policy accept;
+ $nftables_subject meta mark set 1 accept
+ }
+}"
+ ;;
+ "forward")
+ nftables_rules="
+flush ruleset
+table inet filter {
+ chain divert {
+ type filter hook prerouting priority 0; policy accept;
+ $nftables_subject tproxy $ip_proto to :12345 meta mark set 1 accept
+ }
+}"
+ ;;
+ *)
+ echo "FAIL: unsupported parameter for traffic origin"
+ exit 255
+ ;;
+ esac
+
+ # shellcheck disable=SC2046 # Intended splitting of ip_command
+ ip netns exec "$nsrouter" $ip_command rule add fwmark 1 table 100
+ ip netns exec "$nsrouter" $ip_command route add local "${ns2_ip}" dev lo table 100
+ echo "$nftables_rules" | ip netns exec "$nsrouter" nft -f /dev/stdin
+
+ timeout "$timeout" ip netns exec "$nsrouter" socat "$socat_ipproto" tcp-listen:12345,fork,ip-transparent SYSTEM:"cat" 2>/dev/null &
+ local tproxy_pid=$!
+
+ timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" tcp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null &
+ local server2_pid=$!
+
+ timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" tcp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null &
+ local server3_pid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-t"
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" 8080 "-t"
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns3" 8080 "-t"
+
+ local result
+ # request from ns1 to ns2 (forwarded traffic)
+ result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO "$ns2_target")
+ if [ "$result" == "$expect_ns1_ns2" ] ;then
+ echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2"
+ else
+ echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2, not \"${expect_ns1_ns2}\" as intended"
+ ret=1
+ fi
+
+ # request from ns1 to ns3(forwarded traffic)
+ result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO "$ns3_target")
+ if [ "$result" = "$expect_ns1_ns3" ] ;then
+ echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3"
+ else
+ echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3, not \"$expect_ns1_ns3\" as intended"
+ ret=1
+ fi
+
+ # request from nsrouter to ns2 (localy originated traffic)
+ result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO "$ns2_target")
+ if [ "$result" == "$expect_nsrouter_ns2" ] ;then
+ echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2"
+ else
+ echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2, not \"$expect_nsrouter_ns2\" as intended"
+ ret=1
+ fi
+
+ # request from nsrouter to ns3 (localy originated traffic)
+ result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO "$ns3_target")
+ if [ "$result" = "$expect_nsrouter_ns3" ] ;then
+ echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3"
+ else
+ echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3, not \"$expect_nsrouter_ns3\" as intended"
+ ret=1
+ fi
+
+ # cleanup
+ kill "$tproxy_pid" "$server2_pid" "$server3_pid" 2>/dev/null
+ # shellcheck disable=SC2046 # Intended splitting of ip_command
+ ip netns exec "$nsrouter" $ip_command rule del fwmark 1 table 100
+ ip netns exec "$nsrouter" $ip_command route flush table 100
+}
+
+
+test_ipv4_tcp_forward()
+{
+ local traffic_origin="forward"
+ local ip_proto="ip"
+ local expect_ns1_ns2="I_M_PROXIED"
+ local expect_ns1_ns3="PONG_NS3"
+ local expect_nsrouter_ns2="PONG_NS2"
+ local expect_nsrouter_ns3="PONG_NS3"
+
+ test_tproxy "$traffic_origin" \
+ "$ip_proto" \
+ "$expect_ns1_ns2" \
+ "$expect_ns1_ns3" \
+ "$expect_nsrouter_ns2" \
+ "$expect_nsrouter_ns3"
+}
+
+test_ipv4_tcp_local()
+{
+ local traffic_origin="local"
+ local ip_proto="ip"
+ local expect_ns1_ns2="I_M_PROXIED"
+ local expect_ns1_ns3="PONG_NS3"
+ local expect_nsrouter_ns2="I_M_PROXIED"
+ local expect_nsrouter_ns3="PONG_NS3"
+
+ test_tproxy "$traffic_origin" \
+ "$ip_proto" \
+ "$expect_ns1_ns2" \
+ "$expect_ns1_ns3" \
+ "$expect_nsrouter_ns2" \
+ "$expect_nsrouter_ns3"
+}
+
+test_ipv6_tcp_forward()
+{
+ local traffic_origin="forward"
+ local ip_proto="ip6"
+ local expect_ns1_ns2="I_M_PROXIED"
+ local expect_ns1_ns3="PONG_NS3"
+ local expect_nsrouter_ns2="PONG_NS2"
+ local expect_nsrouter_ns3="PONG_NS3"
+
+ test_tproxy "$traffic_origin" \
+ "$ip_proto" \
+ "$expect_ns1_ns2" \
+ "$expect_ns1_ns3" \
+ "$expect_nsrouter_ns2" \
+ "$expect_nsrouter_ns3"
+}
+
+test_ipv6_tcp_local()
+{
+ local traffic_origin="local"
+ local ip_proto="ip6"
+ local expect_ns1_ns2="I_M_PROXIED"
+ local expect_ns1_ns3="PONG_NS3"
+ local expect_nsrouter_ns2="I_M_PROXIED"
+ local expect_nsrouter_ns3="PONG_NS3"
+
+ test_tproxy "$traffic_origin" \
+ "$ip_proto" \
+ "$expect_ns1_ns2" \
+ "$expect_ns1_ns3" \
+ "$expect_nsrouter_ns2" \
+ "$expect_nsrouter_ns3"
+}
+
+if test_ping; then
+ # queue bypass works (rules were skipped, no listener)
+ echo "PASS: ${ns1} can reach ${ns2}"
+else
+ echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
+ exit $ret
+fi
+
+test_ipv4_tcp_forward
+test_ipv4_tcp_local
+test_ipv6_tcp_forward
+test_ipv6_tcp_local
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh
new file mode 100755
index 000000000000..d16de13fe5a7
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh
@@ -0,0 +1,262 @@
+#!/bin/bash
+#
+# This tests tproxy on the following scenario:
+#
+# +------------+
+# +-------+ | nsrouter | +-------+
+# |ns1 |.99 .1| |.1 .99| ns2|
+# | eth0|---------------|veth0 veth1|------------------|eth0 |
+# | | 10.0.1.0/24 | | 10.0.2.0/24 | |
+# +-------+ dead:1::/64 | veth2 | dead:2::/64 +-------+
+# +------------+
+# |.1
+# |
+# |
+# | +-------+
+# | .99| ns3|
+# +------------------------|eth0 |
+# 10.0.3.0/24 | |
+# dead:3::/64 +-------+
+#
+# The tproxy implementation acts as an echo server so the client
+# must receive the same message it sent if it has been proxied.
+# If is not proxied the servers return PONG_NS# with the number
+# of the namespace the server is running.
+# shellcheck disable=SC2162,SC2317
+
+source lib.sh
+ret=0
+# UDP is slow
+timeout=15
+
+cleanup()
+{
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+ ip netns pids "$ns3" | xargs kill 2>/dev/null
+ ip netns pids "$nsrouter" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+}
+
+checktool "nft --version" "test without nft tool"
+checktool "socat -h" "run test without socat"
+
+trap cleanup EXIT
+setup_ns ns1 ns2 ns3 nsrouter
+
+if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3"
+
+ip -net "$nsrouter" link set veth0 up
+ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsrouter" link set veth1 up
+ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
+
+ip -net "$nsrouter" link set veth2 up
+ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2
+ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad
+
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+ip -net "$ns3" link set eth0 up
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" route add default via dead:1::1
+
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+ip -net "$ns3" addr add 10.0.3.99/24 dev eth0
+ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad
+ip -net "$ns3" route add default via 10.0.3.1
+ip -net "$ns3" route add default via dead:3::1
+
+ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null
+
+test_ping() {
+ if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+ return 1
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then
+ return 2
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q 10.0.3.99 > /dev/null; then
+ return 1
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q dead:3::99 > /dev/null; then
+ return 2
+ fi
+
+ return 0
+}
+
+test_ping_router() {
+ if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then
+ return 3
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then
+ return 4
+ fi
+
+ return 0
+}
+
+
+listener_ready()
+{
+ local ns="$1"
+ local port="$2"
+ local proto="$3"
+ ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port"
+}
+
+test_tproxy_udp_forward()
+{
+ local ip_proto="$1"
+
+ local expect_ns1_ns2="I_M_PROXIED"
+ local expect_ns1_ns3="PONG_NS3"
+ local expect_nsrouter_ns2="PONG_NS2"
+ local expect_nsrouter_ns3="PONG_NS3"
+
+ # derived variables
+ local testname="test_${ip_proto}_udp_forward"
+ local socat_ipproto
+ local ns1_ip
+ local ns2_ip
+ local ns3_ip
+ local ns1_ip_port
+ local ns2_ip_port
+ local ns3_ip_port
+ local ip_command
+
+ # socat 1.8.0 has a bug that requires to specify the IP family to bind (fixed in 1.8.0.1)
+ case $ip_proto in
+ "ip")
+ socat_ipproto="-4"
+ ns1_ip=10.0.1.99
+ ns2_ip=10.0.2.99
+ ns3_ip=10.0.3.99
+ ns1_ip_port="$ns1_ip:18888"
+ ns2_ip_port="$ns2_ip:8080"
+ ns3_ip_port="$ns3_ip:8080"
+ ip_command="ip"
+ ;;
+ "ip6")
+ socat_ipproto="-6"
+ ns1_ip=dead:1::99
+ ns2_ip=dead:2::99
+ ns3_ip=dead:3::99
+ ns1_ip_port="[$ns1_ip]:18888"
+ ns2_ip_port="[$ns2_ip]:8080"
+ ns3_ip_port="[$ns3_ip]:8080"
+ ip_command="ip -6"
+ ;;
+ *)
+ echo "FAIL: unsupported protocol"
+ exit 255
+ ;;
+ esac
+
+ # shellcheck disable=SC2046 # Intended splitting of ip_command
+ ip netns exec "$nsrouter" $ip_command rule add fwmark 1 table 100
+ ip netns exec "$nsrouter" $ip_command route add local "$ns2_ip" dev lo table 100
+ ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain divert {
+ type filter hook prerouting priority 0; policy accept;
+ $ip_proto daddr $ns2_ip udp dport 8080 tproxy $ip_proto to :12345 meta mark set 1 accept
+ }
+}
+EOF
+
+ timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port" 2>/dev/null &
+ local tproxy_pid=$!
+
+ timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null &
+ local server2_pid=$!
+
+ timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null &
+ local server3_pid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-u"
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" 8080 "-u"
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns3" 8080 "-u"
+
+ local result
+ # request from ns1 to ns2 (forwarded traffic)
+ result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888)
+ if [ "$result" == "$expect_ns1_ns2" ] ;then
+ echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2"
+ else
+ echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2, not \"${expect_ns1_ns2}\" as intended"
+ ret=1
+ fi
+
+ # request from ns1 to ns3 (forwarded traffic)
+ result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port")
+ if [ "$result" = "$expect_ns1_ns3" ] ;then
+ echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3"
+ else
+ echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3, not \"$expect_ns1_ns3\" as intended"
+ ret=1
+ fi
+
+ # request from nsrouter to ns2 (localy originated traffic)
+ result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port")
+ if [ "$result" == "$expect_nsrouter_ns2" ] ;then
+ echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2"
+ else
+ echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2, not \"$expect_nsrouter_ns2\" as intended"
+ ret=1
+ fi
+
+ # request from nsrouter to ns3 (localy originated traffic)
+ result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port")
+ if [ "$result" = "$expect_nsrouter_ns3" ] ;then
+ echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3"
+ else
+ echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3, not \"$expect_nsrouter_ns3\" as intended"
+ ret=1
+ fi
+
+ # cleanup
+ kill "$tproxy_pid" "$server2_pid" "$server3_pid" 2>/dev/null
+ # shellcheck disable=SC2046 # Intended splitting of ip_command
+ ip netns exec "$nsrouter" $ip_command rule del fwmark 1 table 100
+ ip netns exec "$nsrouter" $ip_command route flush table 100
+}
+
+
+if test_ping; then
+ # queue bypass works (rules were skipped, no listener)
+ echo "PASS: ${ns1} can reach ${ns2}"
+else
+ echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
+ exit $ret
+fi
+
+test_tproxy_udp_forward "ip"
+test_tproxy_udp_forward "ip6"
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh
index 4485fd7675ed..24ad41d526d9 100755
--- a/tools/testing/selftests/net/netfilter/rpath.sh
+++ b/tools/testing/selftests/net/netfilter/rpath.sh
@@ -1,8 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# return code to signal skipped test
-ksft_skip=4
+source lib.sh
# search for legacy iptables (it uses the xtables extensions
if iptables-legacy --version >/dev/null 2>&1; then
@@ -32,17 +31,10 @@ if [ -z "$iptables$ip6tables$nft" ]; then
exit $ksft_skip
fi
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-trap "ip netns del $ns1; ip netns del $ns2" EXIT
-
-# create two netns, disable rp_filter in ns2 and
-# keep IPv6 address when moving into VRF
-ip netns add "$ns1"
-ip netns add "$ns2"
-ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0
+trap cleanup_all_ns EXIT
+
+# create two netns, keep IPv6 address when moving into VRF
+setup_ns ns1 ns2
ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1
# a standard connection between the netns, should not trigger rp filter
@@ -61,9 +53,20 @@ ip -net "$ns2" a a 192.168.42.1/24 dev d0
ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad
ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
+# avoid neighbor lookups and enable martian IPv6 pings
+ns2_hwaddr=$(ip -net "$ns2" link show dev v0 | \
+ sed -n 's, *link/ether \([^ ]*\) .*,\1,p')
+ns1_hwaddr=$(ip -net "$ns1" link show dev v0 | \
+ sed -n 's, *link/ether \([^ ]*\) .*,\1,p')
+ip -net "$ns1" neigh add fec0:42::1 lladdr "$ns2_hwaddr" nud permanent dev v0
+ip -net "$ns1" neigh add fec0:23::1 lladdr "$ns2_hwaddr" nud permanent dev v0
+ip -net "$ns2" neigh add fec0:42::2 lladdr "$ns1_hwaddr" nud permanent dev d0
+ip -net "$ns2" neigh add fec0:23::2 lladdr "$ns1_hwaddr" nud permanent dev v0
+
# firewall matches to test
[ -n "$iptables" ] && {
common='-t raw -A PREROUTING -s 192.168.0.0/16'
+ common+=' -p icmp --icmp-type echo-request'
if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then
echo "Cannot add rpfilter rule"
exit $ksft_skip
@@ -72,6 +75,7 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
}
[ -n "$ip6tables" ] && {
common='-t raw -A PREROUTING -s fec0::/16'
+ common+=' -p icmpv6 --icmpv6-type echo-request'
if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then
echo "Cannot add rpfilter rule"
exit $ksft_skip
@@ -82,8 +86,10 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
table inet t {
chain c {
type filter hook prerouting priority raw;
- ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter
- ip6 saddr fec0::/16 fib saddr . iif oif exists counter
+ ip saddr 192.168.0.0/16 icmp type echo-request \
+ fib saddr . iif oif exists counter
+ ip6 saddr fec0::/16 icmpv6 type echo-request \
+ fib saddr . iif oif exists counter
}
}
EOF
diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c
index 21bb1cfd8a85..b282d1785c9b 100644
--- a/tools/testing/selftests/net/netfilter/sctp_collision.c
+++ b/tools/testing/selftests/net/netfilter/sctp_collision.c
@@ -9,9 +9,10 @@
int main(int argc, char *argv[])
{
struct sockaddr_in saddr = {}, daddr = {};
- int sd, ret, len = sizeof(daddr);
+ socklen_t len = sizeof(daddr);
struct timeval tv = {25, 0};
char buf[] = "hello";
+ int sd, ret;
if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n",
diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c
new file mode 100644
index 000000000000..79de163d61ab
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/udpclash.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Usage: ./udpclash <IP> <PORT>
+ *
+ * Emit THREAD_COUNT UDP packets sharing the same saddr:daddr pair.
+ *
+ * This mimics DNS resolver libraries that emit A and AAAA requests
+ * in parallel.
+ *
+ * This exercises conntrack clash resolution logic added and later
+ * refined in
+ *
+ * 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race")
+ * ed07d9a021df ("netfilter: nf_conntrack: resolve clash for matching conntracks")
+ * 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries")
+ */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <pthread.h>
+
+#define THREAD_COUNT 128
+
+struct thread_args {
+ const struct sockaddr_in *si_remote;
+ int sockfd;
+};
+
+static volatile int wait = 1;
+
+static void *thread_main(void *varg)
+{
+ const struct sockaddr_in *si_remote;
+ const struct thread_args *args = varg;
+ static const char msg[] = "foo";
+
+ si_remote = args->si_remote;
+
+ while (wait == 1)
+ ;
+
+ if (sendto(args->sockfd, msg, strlen(msg), MSG_NOSIGNAL,
+ (struct sockaddr *)si_remote, sizeof(*si_remote)) < 0)
+ exit(111);
+
+ return varg;
+}
+
+static int run_test(int fd, const struct sockaddr_in *si_remote)
+{
+ struct thread_args thread_args = {
+ .si_remote = si_remote,
+ .sockfd = fd,
+ };
+ pthread_t *tid = calloc(THREAD_COUNT, sizeof(pthread_t));
+ unsigned int repl_count = 0, timeout = 0;
+ int i;
+
+ if (!tid) {
+ perror("calloc");
+ return 1;
+ }
+
+ for (i = 0; i < THREAD_COUNT; i++) {
+ int err = pthread_create(&tid[i], NULL, &thread_main, &thread_args);
+
+ if (err != 0) {
+ perror("pthread_create");
+ exit(1);
+ }
+ }
+
+ wait = 0;
+
+ for (i = 0; i < THREAD_COUNT; i++)
+ pthread_join(tid[i], NULL);
+
+ while (repl_count < THREAD_COUNT) {
+ struct sockaddr_in si_repl;
+ socklen_t si_repl_len = sizeof(si_repl);
+ char repl[512];
+ ssize_t ret;
+
+ ret = recvfrom(fd, repl, sizeof(repl), MSG_NOSIGNAL,
+ (struct sockaddr *) &si_repl, &si_repl_len);
+ if (ret < 0) {
+ if (timeout++ > 5000) {
+ fputs("timed out while waiting for reply from thread\n", stderr);
+ break;
+ }
+
+ /* give reply time to pass though the stack */
+ usleep(1000);
+ continue;
+ }
+
+ if (si_repl_len != sizeof(*si_remote)) {
+ fprintf(stderr, "warning: reply has unexpected repl_len %d vs %d\n",
+ (int)si_repl_len, (int)sizeof(si_repl));
+ } else if (si_remote->sin_addr.s_addr != si_repl.sin_addr.s_addr ||
+ si_remote->sin_port != si_repl.sin_port) {
+ char a[64], b[64];
+
+ inet_ntop(AF_INET, &si_remote->sin_addr, a, sizeof(a));
+ inet_ntop(AF_INET, &si_repl.sin_addr, b, sizeof(b));
+
+ fprintf(stderr, "reply from wrong source: want %s:%d got %s:%d\n",
+ a, ntohs(si_remote->sin_port), b, ntohs(si_repl.sin_port));
+ }
+
+ repl_count++;
+ }
+
+ printf("got %d of %d replies\n", repl_count, THREAD_COUNT);
+
+ free(tid);
+
+ return repl_count == THREAD_COUNT ? 0 : 1;
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_in si_local = {
+ .sin_family = AF_INET,
+ };
+ struct sockaddr_in si_remote = {
+ .sin_family = AF_INET,
+ };
+ int fd, ret;
+
+ if (argc < 3) {
+ fputs("Usage: send_udp <daddr> <dport>\n", stderr);
+ return 1;
+ }
+
+ si_remote.sin_port = htons(atoi(argv[2]));
+ si_remote.sin_addr.s_addr = inet_addr(argv[1]);
+
+ fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_UDP);
+ if (fd < 0) {
+ perror("socket");
+ return 1;
+ }
+
+ if (bind(fd, (struct sockaddr *)&si_local, sizeof(si_local)) < 0) {
+ perror("bind");
+ return 1;
+ }
+
+ ret = run_test(fd, &si_remote);
+
+ close(fd);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh b/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh
new file mode 100755
index 000000000000..912cb9583af1
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+if ! modprobe -q -n br_netfilter 2>&1; then
+ echo "SKIP: Test needs br_netfilter kernel module"
+ exit $ksft_skip
+fi
+
+cleanup()
+{
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+setup_ns host vtep router
+
+create_topology()
+{
+ ip link add host-eth0 netns "$host" type veth peer name vtep-host netns "$vtep"
+ ip link add vtep-router netns "$vtep" type veth peer name router-vtep netns "$router"
+}
+
+setup_host()
+{
+ # bring ports up
+ ip -n "$host" addr add 10.0.0.1/24 dev host-eth0
+ ip -n "$host" link set host-eth0 up
+
+ # Add VLAN 10,20
+ for vid in 10 20; do
+ ip -n "$host" link add link host-eth0 name host-eth0.$vid type vlan id $vid
+ ip -n "$host" addr add 10.0.$vid.1/24 dev host-eth0.$vid
+ ip -n "$host" link set host-eth0.$vid up
+ done
+}
+
+setup_vtep()
+{
+ # create bridge on vtep
+ ip -n "$vtep" link add name br0 type bridge
+ ip -n "$vtep" link set br0 type bridge vlan_filtering 1
+
+ # VLAN 10 is untagged PVID
+ ip -n "$vtep" link set dev vtep-host master br0
+ bridge -n "$vtep" vlan add dev vtep-host vid 10 pvid untagged
+
+ # VLAN 20 as other VID
+ ip -n "$vtep" link set dev vtep-host master br0
+ bridge -n "$vtep" vlan add dev vtep-host vid 20
+
+ # single-vxlan device on vtep
+ ip -n "$vtep" address add dev vtep-router 60.0.0.1/24
+ ip -n "$vtep" link add dev vxd type vxlan external \
+ vnifilter local 60.0.0.1 remote 60.0.0.2 dstport 4789 ttl 64
+ ip -n "$vtep" link set vxd master br0
+
+ # Add VLAN-VNI 1-1 mappings
+ bridge -n "$vtep" link set dev vxd vlan_tunnel on
+ for vid in 10 20; do
+ bridge -n "$vtep" vlan add dev vxd vid $vid
+ bridge -n "$vtep" vlan add dev vxd vid $vid tunnel_info id $vid
+ bridge -n "$vtep" vni add dev vxd vni $vid
+ done
+
+ # bring ports up
+ ip -n "$vtep" link set vxd up
+ ip -n "$vtep" link set vtep-router up
+ ip -n "$vtep" link set vtep-host up
+ ip -n "$vtep" link set dev br0 up
+}
+
+setup_router()
+{
+ # bring ports up
+ ip -n "$router" link set router-vtep up
+}
+
+setup()
+{
+ modprobe -q br_netfilter
+ create_topology
+ setup_host
+ setup_vtep
+ setup_router
+}
+
+test_large_mtu_untagged_traffic()
+{
+ ip -n "$vtep" link set vxd mtu 1000
+ ip -n "$host" neigh add 10.0.0.2 lladdr ca:fe:ba:be:00:01 dev host-eth0
+ ip netns exec "$host" \
+ ping -q 10.0.0.2 -I host-eth0 -c 1 -W 0.5 -s2000 > /dev/null 2>&1
+ return 0
+}
+
+test_large_mtu_tagged_traffic()
+{
+ for vid in 10 20; do
+ ip -n "$vtep" link set vxd mtu 1000
+ ip -n "$host" neigh add 10.0.$vid.2 lladdr ca:fe:ba:be:00:01 dev host-eth0.$vid
+ ip netns exec "$host" \
+ ping -q 10.0.$vid.2 -I host-eth0.$vid -c 1 -W 0.5 -s2000 > /dev/null 2>&1
+ done
+ return 0
+}
+
+do_test()
+{
+ # Frames will be dropped so ping will not succeed
+ # If it doesn't panic, it passes
+ test_large_mtu_tagged_traffic
+ test_large_mtu_untagged_traffic
+}
+
+setup && \
+echo "Test for VxLAN fragmentation with large MTU in br_netfilter:" && \
+do_test && echo "PASS!"
+exit $?
diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c
new file mode 100644
index 000000000000..51129c564d0a
--- /dev/null
+++ b/tools/testing/selftests/net/netlink-dumps.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <linux/genetlink.h>
+#include <linux/neighbour.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/mqueue.h>
+#include <linux/rtnetlink.h>
+
+#include "kselftest_harness.h"
+
+#include <ynl.h>
+
+struct ext_ack {
+ int err;
+
+ __u32 attr_offs;
+ __u32 miss_type;
+ __u32 miss_nest;
+ const char *str;
+};
+
+enum get_ea_ret {
+ ERROR = -1,
+ NO_CTRL = 0,
+ FOUND_DONE,
+ FOUND_ERR,
+ FOUND_EXTACK,
+};
+
+static enum get_ea_ret
+nl_get_extack(char *buf, size_t n, struct ext_ack *ea)
+{
+ enum get_ea_ret ret = NO_CTRL;
+ const struct nlmsghdr *nlh;
+ const struct nlattr *attr;
+ ssize_t rem;
+
+ for (rem = n; rem > 0; NLMSG_NEXT(nlh, rem)) {
+ nlh = (struct nlmsghdr *)&buf[n - rem];
+ if (!NLMSG_OK(nlh, rem))
+ return ERROR;
+
+ if (nlh->nlmsg_type == NLMSG_ERROR)
+ ret = FOUND_ERR;
+ else if (nlh->nlmsg_type == NLMSG_DONE)
+ ret = FOUND_DONE;
+ else
+ continue;
+
+ ea->err = -*(int *)NLMSG_DATA(nlh);
+
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return ret;
+
+ ynl_attr_for_each(attr, nlh, sizeof(int)) {
+ switch (ynl_attr_type(attr)) {
+ case NLMSGERR_ATTR_OFFS:
+ ea->attr_offs = ynl_attr_get_u32(attr);
+ break;
+ case NLMSGERR_ATTR_MISS_TYPE:
+ ea->miss_type = ynl_attr_get_u32(attr);
+ break;
+ case NLMSGERR_ATTR_MISS_NEST:
+ ea->miss_nest = ynl_attr_get_u32(attr);
+ break;
+ case NLMSGERR_ATTR_MSG:
+ ea->str = ynl_attr_get_str(attr);
+ break;
+ }
+ }
+
+ return FOUND_EXTACK;
+ }
+
+ return ret;
+}
+
+static const struct {
+ struct nlmsghdr nlhdr;
+ struct ndmsg ndm;
+ struct nlattr ahdr;
+ __u32 val;
+} dump_neigh_bad = {
+ .nlhdr = {
+ .nlmsg_len = sizeof(dump_neigh_bad),
+ .nlmsg_type = RTM_GETNEIGH,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
+ .nlmsg_seq = 1,
+ },
+ .ndm = {
+ .ndm_family = 123,
+ },
+ .ahdr = {
+ .nla_len = 4 + 4,
+ .nla_type = NDA_FLAGS_EXT,
+ },
+ .val = -1, // should fail MASK validation
+};
+
+TEST(dump_extack)
+{
+ int netlink_sock;
+ int i, cnt, ret;
+ char buf[8192];
+ int one = 1;
+ ssize_t n;
+
+ netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ ASSERT_GE(netlink_sock, 0);
+
+ n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_CAP_ACK,
+ &one, sizeof(one));
+ ASSERT_EQ(n, 0);
+ n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one));
+ ASSERT_EQ(n, 0);
+ n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_GET_STRICT_CHK,
+ &one, sizeof(one));
+ ASSERT_EQ(n, 0);
+
+ /* Dump so many times we fill up the buffer */
+ cnt = 80;
+ for (i = 0; i < cnt; i++) {
+ n = send(netlink_sock, &dump_neigh_bad,
+ sizeof(dump_neigh_bad), 0);
+ ASSERT_EQ(n, sizeof(dump_neigh_bad));
+ }
+
+ /* Read out the ENOBUFS */
+ n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT);
+ EXPECT_EQ(n, -1);
+ EXPECT_EQ(errno, ENOBUFS);
+
+ ret = NO_CTRL;
+ for (i = 0; i < cnt; i++) {
+ struct ext_ack ea = {};
+
+ n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT);
+ if (n < 0) {
+ ASSERT_GE(i, 10);
+ break;
+ }
+ ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr));
+
+ ret = nl_get_extack(buf, n, &ea);
+ /* Once we fill the buffer we'll see one ENOBUFS followed
+ * by a number of EBUSYs. Then the last recv() will finally
+ * trigger and complete the dump.
+ */
+ if (ret == FOUND_ERR && (ea.err == ENOBUFS || ea.err == EBUSY))
+ continue;
+ EXPECT_EQ(ret, FOUND_EXTACK);
+ EXPECT_EQ(ea.err, EINVAL);
+ EXPECT_EQ(ea.attr_offs,
+ sizeof(struct nlmsghdr) + sizeof(struct ndmsg));
+ }
+ /* Make sure last message was a full DONE+extack */
+ EXPECT_EQ(ret, FOUND_EXTACK);
+}
+
+static const struct {
+ struct nlmsghdr nlhdr;
+ struct genlmsghdr genlhdr;
+ struct nlattr ahdr;
+ __u16 val;
+ __u16 pad;
+} dump_policies = {
+ .nlhdr = {
+ .nlmsg_len = sizeof(dump_policies),
+ .nlmsg_type = GENL_ID_CTRL,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
+ .nlmsg_seq = 1,
+ },
+ .genlhdr = {
+ .cmd = CTRL_CMD_GETPOLICY,
+ .version = 2,
+ },
+ .ahdr = {
+ .nla_len = 6,
+ .nla_type = CTRL_ATTR_FAMILY_ID,
+ },
+ .val = GENL_ID_CTRL,
+ .pad = 0,
+};
+
+// Sanity check for the test itself, make sure the dump doesn't fit in one msg
+TEST(test_sanity)
+{
+ int netlink_sock;
+ char buf[8192];
+ ssize_t n;
+
+ netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ ASSERT_GE(netlink_sock, 0);
+
+ n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0);
+ ASSERT_EQ(n, sizeof(dump_policies));
+
+ n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT);
+ ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr));
+
+ n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT);
+ ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr));
+
+ close(netlink_sock);
+}
+
+TEST(close_in_progress)
+{
+ int netlink_sock;
+ ssize_t n;
+
+ netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ ASSERT_GE(netlink_sock, 0);
+
+ n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0);
+ ASSERT_EQ(n, sizeof(dump_policies));
+
+ close(netlink_sock);
+}
+
+TEST(close_with_ref)
+{
+ char cookie[NOTIFY_COOKIE_LEN] = {};
+ int netlink_sock, mq_fd;
+ struct sigevent sigev;
+ ssize_t n;
+
+ netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ ASSERT_GE(netlink_sock, 0);
+
+ n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0);
+ ASSERT_EQ(n, sizeof(dump_policies));
+
+ mq_fd = syscall(__NR_mq_open, "sed", O_CREAT | O_WRONLY, 0600, 0);
+ ASSERT_GE(mq_fd, 0);
+
+ memset(&sigev, 0, sizeof(sigev));
+ sigev.sigev_notify = SIGEV_THREAD;
+ sigev.sigev_value.sival_ptr = cookie;
+ sigev.sigev_signo = netlink_sock;
+
+ syscall(__NR_mq_notify, mq_fd, &sigev);
+
+ close(netlink_sock);
+
+ // give mqueue time to fire
+ usleep(100 * 1000);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh
index 6974474c26f3..38871bdef67f 100755
--- a/tools/testing/selftests/net/netns-name.sh
+++ b/tools/testing/selftests/net/netns-name.sh
@@ -7,10 +7,12 @@ set -o pipefail
DEV=dummy-dev0
DEV2=dummy-dev1
ALT_NAME=some-alt-name
+NSIM_ADDR=2025
RET_CODE=0
cleanup() {
+ cleanup_netdevsim $NSIM_ADDR
cleanup_ns $NS $test_ns
}
@@ -25,12 +27,15 @@ setup_ns NS test_ns
#
# Test basic move without a rename
+# Use netdevsim because it has extra asserts for notifiers.
#
-ip -netns $NS link add name $DEV type dummy || fail
-ip -netns $NS link set dev $DEV netns $test_ns ||
+
+nsim=$(create_netdevsim $NSIM_ADDR $NS)
+ip -netns $NS link set dev $nsim netns $test_ns ||
fail "Can't perform a netns move"
-ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move"
-ip -netns $test_ns link del $DEV || fail
+ip -netns $test_ns link show dev $nsim >> /dev/null ||
+ fail "Device not found after move"
+cleanup_netdevsim $NSIM_ADDR
#
# Test move with a conflict
@@ -78,6 +83,16 @@ ip -netns $NS link show dev $ALT_NAME 2> /dev/null &&
fail "Can still find alt-name after move"
ip -netns $test_ns link del $DEV || fail
+#
+# Test no conflict of the same name/ifindex in different netns
+#
+ip -netns $NS link add name $DEV index 100 type dummy || fail
+ip -netns $NS link add netns $test_ns name $DEV index 100 type dummy ||
+ fail "Can create in netns without moving"
+ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found"
+ip -netns $NS link del $DEV || fail
+ip -netns $test_ns link del $DEV || fail
+
echo -ne "$(basename $0) \t\t\t\t"
if [ $RET_CODE -eq 0 ]; then
echo "[ OK ]"
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index cd8a58097448..1f5227f3d64d 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len)
name[0] = '\0';
rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
if (rc < 0)
- log_err_errno("setsockopt(SO_BINDTODEVICE)");
+ log_err_errno("getsockopt(SO_BINDTODEVICE)");
return rc;
}
@@ -535,7 +535,7 @@ static int set_freebind(int sd, int version)
break;
case AF_INET6:
if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) {
- log_err_errno("setsockopt(IPV6_FREEBIND");
+ log_err_errno("setsockopt(IPV6_FREEBIND)");
rc = -1;
}
break;
@@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str,
sep++;
if (str_to_uint(sep, 1, pfx_len_max,
&args->prefix_len) != 0) {
- fprintf(stderr, "Invalid port\n");
+ fprintf(stderr, "Invalid prefix length\n");
return 1;
}
} else {
@@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen,
}
}
- nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
+ nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
while (1) {
FD_ZERO(&rfds);
FD_SET(sd, &rfds);
@@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args)
sd = socket(args->version, args->type, args->protocol);
if (sd < 0) {
log_err_errno("Error opening socket");
- return -1;
+ return -1;
}
if (set_reuseaddr(sd) != 0)
@@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
* waiting to be told when to continue
*/
if (read(fd, &buf, sizeof(buf)) <= 0) {
- log_err_errno("Failed to read IPC status from status");
+ log_err_errno("Failed to read IPC status from pipe");
return 1;
}
if (!buf) {
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index 93d9d914529b..5c66421ab8aa 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -2,8 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
import time
+from os import system
from lib.py import ksft_run, ksft_exit, ksft_pr
-from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
+from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait
from lib.py import NetdevFamily, NetdevSimDev, ip
@@ -18,6 +19,160 @@ def lo_check(nf) -> None:
ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0)
+def napi_list_check(nf) -> None:
+ with NetdevSimDev(queue_count=100) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 100)
+
+ for q in [50, 0, 99]:
+ for i in range(4):
+ nsim.dfs_write("queue_reset", f"{q} {i}")
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 100,
+ comment=f"queue count after reset queue {q} mode {i}")
+
+def napi_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at napi and device level.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set napi threaded and verify
+ nf.napi_set({'id': napi0_id, 'threaded': "enabled"})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # check it is not set for napi1
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ ip(f"link set dev {nsim.ifname} down")
+ ip(f"link set dev {nsim.ifname} up")
+
+ # verify if napi threaded is still set
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # check it is still not set for napi1
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ # unset napi threaded and verify
+ nf.napi_set({'id': napi0_id, 'threaded': "disabled"})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+
+ # set threaded at device level
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "enabled")
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded at device level
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ # set napi threaded for napi0
+ nf.napi_set({'id': napi0_id, 'threaded': 1})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # unset threaded at device level
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+def dev_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at device level using sysfs.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set threaded
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "enabled")
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+def nsim_rxq_reset_down(nf) -> None:
+ """
+ Test that the queue API supports resetting a queue
+ while the interface is down. We should convert this
+ test to testing real HW once more devices support
+ queue API.
+ """
+ with NetdevSimDev(queue_count=4) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} down")
+ for i in [0, 2, 3]:
+ nsim.dfs_write("queue_reset", f"1 {i}")
+
+
def page_pool_check(nf) -> None:
with NetdevSimDev() as nsimdev:
nsim = nsimdev.nsims[0]
@@ -89,7 +244,8 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
- ksft_run([empty_check, lo_check, page_pool_check],
+ ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
+ dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down],
args=(nf, ))
ksft_exit()
diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile
index 2f1508abc826..3fd1da2ec07d 100644
--- a/tools/testing/selftests/net/openvswitch/Makefile
+++ b/tools/testing/selftests/net/openvswitch/Makefile
@@ -2,7 +2,7 @@
top_srcdir = ../../../../..
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
TEST_PROGS := openvswitch.sh
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index cc0bfae2bafa..b327d3061ed5 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -25,6 +25,7 @@ tests="
nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT
netlink_checks ovsnl: validate netlink attrs and settings
upcall_interfaces ovs: test the upcall interfaces
+ tunnel_metadata ovs: test extraction of tunnel metadata
drop_reason drop: test drop reasons are emitted
psample psample: Sampling packets with psample"
@@ -113,13 +114,13 @@ ovs_add_dp () {
}
ovs_add_if () {
- info "Adding IF to DP: br:$2 if:$3"
- if [ "$4" != "-u" ]; then
- ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if "$2" "$3" \
- || return 1
+ info "Adding IF to DP: br:$3 if:$4 ($2)"
+ if [ "$5" != "-u" ]; then
+ ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if \
+ -t "$2" "$3" "$4" || return 1
else
python3 $ovs_base/ovs-dpctl.py add-if \
- -u "$2" "$3" >$ovs_dir/$3.out 2>$ovs_dir/$3.err &
+ -u -t "$2" "$3" "$4" >$ovs_dir/$4.out 2>$ovs_dir/$4.err &
pid=$!
on_exit "ovs_sbx $1 kill -TERM $pid 2>/dev/null"
fi
@@ -166,13 +167,15 @@ ovs_add_netns_and_veths () {
fi
if [ "$7" != "-u" ]; then
- ovs_add_if "$1" "$2" "$4" || return 1
+ ovs_add_if "$1" "netdev" "$2" "$4" || return 1
else
- ovs_add_if "$1" "$2" "$4" -u || return 1
+ ovs_add_if "$1" "netdev" "$2" "$4" -u || return 1
fi
- [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \
- tcpdump -i any -s 65535
+ if [ $TRACING -eq 1 ]; then
+ ovs_netns_spawn_daemon "$1" "$3" tcpdump -l -i any -s 6553
+ ovs_wait grep -q "listening on any" ${ovs_dir}/stderr
+ fi
return 0
}
@@ -328,6 +331,11 @@ test_psample() {
# - drop packets and verify the right drop reason is reported
test_drop_reason() {
which perf >/dev/null 2>&1 || return $ksft_skip
+ which pahole >/dev/null 2>&1 || return $ksft_skip
+
+ ovs_drop_subsys=$(pahole -C skb_drop_reason_subsys |
+ awk '/OPENVSWITCH/ { print $3; }' |
+ tr -d ,)
sbx_add "test_drop_reason" || return $?
@@ -371,7 +379,7 @@ test_drop_reason() {
"in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop'
ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20
- ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION
+ ovs_drop_reason_count 0x${ovs_drop_subsys}0001 # OVS_DROP_FLOW_ACTION
if [[ "$?" -ne "2" ]]; then
info "Did not detect expected drops: $?"
return 1
@@ -388,7 +396,7 @@ test_drop_reason() {
ovs_drop_record_and_run \
"test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000
- ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR
+ ovs_drop_reason_count 0x${ovs_drop_subsys}0004 # OVS_DROP_EXPLICIT_ACTION_ERROR
if [[ "$?" -ne "1" ]]; then
info "Did not detect expected explicit error drops: $?"
return 1
@@ -396,7 +404,7 @@ test_drop_reason() {
ovs_drop_record_and_run \
"test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000
- ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION
+ ovs_drop_reason_count 0x${ovs_drop_subsys}0003 # OVS_DROP_EXPLICIT_ACTION
if [[ "$?" -ne "1" ]]; then
info "Did not detect expected explicit drops: $?"
return 1
@@ -749,6 +757,79 @@ test_upcall_interfaces() {
return 0
}
+ovs_add_kernel_tunnel() {
+ local sbxname=$1; shift
+ local ns=$1; shift
+ local tnl_type=$1; shift
+ local name=$1; shift
+ local addr=$1; shift
+
+ info "setting up kernel ${tnl_type} tunnel ${name}"
+ ovs_sbx "${sbxname}" ip -netns ${ns} link add dev ${name} type ${tnl_type} $* || return 1
+ on_exit "ovs_sbx ${sbxname} ip -netns ${ns} link del ${name} >/dev/null 2>&1"
+ ovs_sbx "${sbxname}" ip -netns ${ns} addr add dev ${name} ${addr} || return 1
+ ovs_sbx "${sbxname}" ip -netns ${ns} link set dev ${name} mtu 1450 up || return 1
+}
+
+test_tunnel_metadata() {
+ which arping >/dev/null 2>&1 || return $ksft_skip
+
+ sbxname="test_tunnel_metadata"
+ sbx_add "${sbxname}" || return 1
+
+ info "setting up new DP"
+ ovs_add_dp "${sbxname}" tdp0 -V 2:1 || return 1
+
+ ovs_add_netns_and_veths "${sbxname}" tdp0 tns left0 l0 \
+ 172.31.110.1/24 || return 1
+
+ info "removing veth interface from openvswitch and setting IP"
+ ovs_del_if "${sbxname}" tdp0 left0 || return 1
+ ovs_sbx "${sbxname}" ip addr add 172.31.110.2/24 dev left0 || return 1
+ ovs_sbx "${sbxname}" ip link set left0 up || return 1
+
+ info "setting up tunnel port in openvswitch"
+ ovs_add_if "${sbxname}" "vxlan" tdp0 ovs-vxlan0 -u || return 1
+ on_exit "ovs_sbx ${sbxname} ip link del ovs-vxlan0"
+ ovs_wait ip link show ovs-vxlan0 &>/dev/null || return 1
+ ovs_sbx "${sbxname}" ip link set ovs-vxlan0 up || return 1
+
+ configs=$(echo '
+ 1 172.31.221.1/24 1155332 32 set udpcsum flags\(df\|csum\)
+ 2 172.31.222.1/24 1234567 45 set noudpcsum flags\(df\)
+ 3 172.31.223.1/24 1020304 23 unset udpcsum flags\(csum\)
+ 4 172.31.224.1/24 1357986 15 unset noudpcsum' | sed '/^$/d')
+
+ while read -r i addr id ttl df csum flags; do
+ ovs_add_kernel_tunnel "${sbxname}" tns vxlan vxlan${i} ${addr} \
+ remote 172.31.110.2 id ${id} dstport 4789 \
+ ttl ${ttl} df ${df} ${csum} || return 1
+ done <<< "${configs}"
+
+ ovs_wait grep -q 'listening on upcall packet handler' \
+ ${ovs_dir}/ovs-vxlan0.out || return 1
+
+ info "sending arping"
+ for i in 1 2 3 4; do
+ ovs_sbx "${sbxname}" ip netns exec tns \
+ arping -I vxlan${i} 172.31.22${i}.2 -c 1 \
+ >${ovs_dir}/arping.stdout 2>${ovs_dir}/arping.stderr
+ done
+
+ info "checking that received decapsulated packets carry correct metadata"
+ while read -r i addr id ttl df csum flags; do
+ arp_hdr="arp\\(sip=172.31.22${i}.1,tip=172.31.22${i}.2,op=1,sha="
+ addrs="src=172.31.110.1,dst=172.31.110.2"
+ ports="tp_src=[0-9]*,tp_dst=4789"
+ tnl_md="tunnel\\(tun_id=${id},${addrs},ttl=${ttl},${ports},${flags}\\)"
+
+ ovs_sbx "${sbxname}" grep -qE "MISS upcall.*${tnl_md}.*${arp_hdr}" \
+ ${ovs_dir}/ovs-vxlan0.out || return 1
+ done <<< "${configs}"
+
+ return 0
+}
+
run_test() {
(
tname="$1"
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 8a0396bfaf99..b521e0dea506 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -1877,7 +1877,7 @@ class OvsPacket(GenericNetlinkSocket):
elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE:
up.execute(msg)
else:
- print("Unkonwn cmd: %d" % msg["cmd"])
+ print("Unknown cmd: %d" % msg["cmd"])
except NetlinkError as ne:
raise ne
diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore
new file mode 100644
index 000000000000..ee44c081ca7c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+ovpn-cli
diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile
new file mode 100644
index 000000000000..dbe0388c8512
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES)
+VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_CFLAGS),)
+VAR_CFLAGS = -I/usr/include/libnl3
+endif
+CFLAGS += $(VAR_CFLAGS)
+
+
+LDLIBS = -lmbedtls -lmbedcrypto
+VAR_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_LDLIBS),)
+VAR_LDLIBS = -lnl-genl-3 -lnl-3
+endif
+LDLIBS += $(VAR_LDLIBS)
+
+
+TEST_FILES = common.sh
+
+TEST_PROGS := \
+ test-chachapoly.sh \
+ test-close-socket-tcp.sh \
+ test-close-socket.sh \
+ test-float.sh \
+ test-large-mtu.sh \
+ test-tcp.sh \
+ test.sh \
+# end of TEST_PROGS
+
+TEST_GEN_FILES := ovpn-cli
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh
new file mode 100644
index 000000000000..88869c675d03
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/common.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt}
+TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt}
+OVPN_CLI=${OVPN_CLI:-./ovpn-cli}
+ALG=${ALG:-aes}
+PROTO=${PROTO:-UDP}
+FLOAT=${FLOAT:-0}
+
+LAN_IP="11.11.11.11"
+
+create_ns() {
+ ip netns add peer${1}
+}
+
+setup_ns() {
+ MODE="P2P"
+
+ if [ ${1} -eq 0 ]; then
+ MODE="MP"
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p}
+
+ ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p}
+ ip -n peer0 addr add fd00:0:0:${p}::1/64 dev veth${p}
+ ip -n peer0 link set veth${p} up
+
+ ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p}
+ ip -n peer${p} addr add fd00:0:0:${p}::2/64 dev veth${p}
+ ip -n peer${p} link set veth${p} up
+ done
+ fi
+
+ ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE
+ ip -n peer${1} addr add ${2} dev tun${1}
+ # add a secondary IP to peer 1, to test a LAN behind a client
+ if [ ${1} -eq 1 -a -n "${LAN_IP}" ]; then
+ ip -n peer${1} addr add ${LAN_IP} dev tun${1}
+ ip -n peer0 route add ${LAN_IP} via $(echo ${2} |sed -e s'!/.*!!') dev tun0
+ fi
+ if [ -n "${3}" ]; then
+ ip -n peer${1} link set mtu ${3} dev tun${1}
+ fi
+ ip -n peer${1} link set tun${1} up
+}
+
+add_peer() {
+ if [ "${PROTO}" == "UDP" ]; then
+ if [ ${1} -eq 0 ]; then
+ ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 ${UDP_PEERS_FILE}
+
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \
+ data64.key
+ done
+ else
+ RADDR=$(awk "NR == ${1} {print \$2}" ${UDP_PEERS_FILE})
+ RPORT=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE})
+ LPORT=$(awk "NR == ${1} {print \$5}" ${UDP_PEERS_FILE})
+ ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} ${1} ${LPORT} \
+ ${RADDR} ${RPORT}
+ ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} ${1} 1 0 ${ALG} 1 \
+ data64.key
+ fi
+ else
+ if [ ${1} -eq 0 ]; then
+ (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${TCP_PEERS_FILE} && {
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \
+ ${ALG} 0 data64.key
+ done
+ }) &
+ sleep 5
+ else
+ ip netns exec peer${1} ${OVPN_CLI} connect tun${1} ${1} 10.10.${1}.1 1 \
+ data64.key
+ fi
+ fi
+}
+
+cleanup() {
+ # some ovpn-cli processes sleep in background so they need manual poking
+ killall $(basename ${OVPN_CLI}) 2>/dev/null || true
+
+ # netns peer0 is deleted without erasing ifaces first
+ for p in $(seq 1 10); do
+ ip -n peer${p} link set tun${p} down 2>/dev/null || true
+ ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true
+ done
+ for p in $(seq 1 10); do
+ ip -n peer0 link del veth${p} 2>/dev/null || true
+ done
+ for p in $(seq 0 10); do
+ ip netns del peer${p} 2>/dev/null || true
+ done
+}
+
+if [ "${PROTO}" == "UDP" ]; then
+ NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')}
+else
+ NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')}
+fi
+
+
diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config
new file mode 100644
index 000000000000..42699740936d
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/config
@@ -0,0 +1,10 @@
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_CHACHA20POLY1305=y
+CONFIG_CRYPTO_GCM=y
+CONFIG_DST_CACHE=y
+CONFIG_INET=y
+CONFIG_NET=y
+CONFIG_NET_UDP_TUNNEL=y
+CONFIG_OVPN=m
+CONFIG_STREAM_PARSER=y
diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key
new file mode 100644
index 000000000000..a99e88c4e290
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/data64.key
@@ -0,0 +1,5 @@
+jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3B
+ia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9
+uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6
+KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tE
+BofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w==
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
new file mode 100644
index 000000000000..0f3babf19fd0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -0,0 +1,2387 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel accelerator
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <time.h>
+
+#include <linux/ovpn.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+#include <netlink/socket.h>
+#include <netlink/netlink.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/family.h>
+#include <netlink/genl/ctrl.h>
+
+#include <mbedtls/base64.h>
+#include <mbedtls/error.h>
+
+#include <sys/socket.h>
+
+#include "kselftest.h"
+
+/* defines to make checkpatch happy */
+#define strscpy strncpy
+
+/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we
+ * have to explicitly do it to prevent the kernel from failing upon
+ * parsing of the message
+ */
+#define nla_nest_start(_msg, _type) \
+ nla_nest_start(_msg, (_type) | NLA_F_NESTED)
+
+/* libnl < 3.11.0 does not implement nla_get_uint() */
+uint64_t ovpn_nla_get_uint(struct nlattr *attr)
+{
+ if (nla_len(attr) == sizeof(uint32_t))
+ return nla_get_u32(attr);
+ else
+ return nla_get_u64(attr);
+}
+
+typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg);
+
+enum ovpn_key_direction {
+ KEY_DIR_IN = 0,
+ KEY_DIR_OUT,
+};
+
+#define KEY_LEN (256 / 8)
+#define NONCE_LEN 8
+
+#define PEER_ID_UNDEF 0x00FFFFFF
+#define MAX_PEERS 10
+
+struct nl_ctx {
+ struct nl_sock *nl_sock;
+ struct nl_msg *nl_msg;
+ struct nl_cb *nl_cb;
+
+ int ovpn_dco_id;
+};
+
+enum ovpn_cmd {
+ CMD_INVALID,
+ CMD_NEW_IFACE,
+ CMD_DEL_IFACE,
+ CMD_LISTEN,
+ CMD_CONNECT,
+ CMD_NEW_PEER,
+ CMD_NEW_MULTI_PEER,
+ CMD_SET_PEER,
+ CMD_DEL_PEER,
+ CMD_GET_PEER,
+ CMD_NEW_KEY,
+ CMD_DEL_KEY,
+ CMD_GET_KEY,
+ CMD_SWAP_KEYS,
+ CMD_LISTEN_MCAST,
+};
+
+struct ovpn_ctx {
+ enum ovpn_cmd cmd;
+
+ __u8 key_enc[KEY_LEN];
+ __u8 key_dec[KEY_LEN];
+ __u8 nonce[NONCE_LEN];
+
+ enum ovpn_cipher_alg cipher;
+
+ sa_family_t sa_family;
+
+ unsigned long peer_id;
+ unsigned long lport;
+
+ union {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+ } remote;
+
+ union {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+ } peer_ip;
+
+ bool peer_ip_set;
+
+ unsigned int ifindex;
+ char ifname[IFNAMSIZ];
+ enum ovpn_mode mode;
+ bool mode_set;
+
+ int socket;
+ int cli_sockets[MAX_PEERS];
+
+ __u32 keepalive_interval;
+ __u32 keepalive_timeout;
+
+ enum ovpn_key_direction key_dir;
+ enum ovpn_key_slot key_slot;
+ int key_id;
+
+ const char *peers_file;
+};
+
+static int ovpn_nl_recvmsgs(struct nl_ctx *ctx)
+{
+ int ret;
+
+ ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb);
+
+ switch (ret) {
+ case -NLE_INTR:
+ fprintf(stderr,
+ "netlink received interrupt due to signal - ignoring\n");
+ break;
+ case -NLE_NOMEM:
+ fprintf(stderr, "netlink out of memory error\n");
+ break;
+ case -NLE_AGAIN:
+ fprintf(stderr,
+ "netlink reports blocking read - aborting wait\n");
+ break;
+ default:
+ if (ret)
+ fprintf(stderr, "netlink reports error (%d): %s\n",
+ ret, nl_geterror(-ret));
+ break;
+ }
+
+ return ret;
+}
+
+static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd,
+ int flags)
+{
+ struct nl_ctx *ctx;
+ int err, ret;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ ctx->nl_sock = nl_socket_alloc();
+ if (!ctx->nl_sock) {
+ fprintf(stderr, "cannot allocate netlink socket\n");
+ goto err_free;
+ }
+
+ nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192);
+
+ ret = genl_connect(ctx->nl_sock);
+ if (ret) {
+ fprintf(stderr, "cannot connect to generic netlink: %s\n",
+ nl_geterror(ret));
+ goto err_sock;
+ }
+
+ /* enable Extended ACK for detailed error reporting */
+ err = 1;
+ setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK,
+ &err, sizeof(err));
+
+ ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME);
+ if (ctx->ovpn_dco_id < 0) {
+ fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n",
+ ctx->ovpn_dco_id);
+ goto err_free;
+ }
+
+ ctx->nl_msg = nlmsg_alloc();
+ if (!ctx->nl_msg) {
+ fprintf(stderr, "cannot allocate netlink message\n");
+ goto err_sock;
+ }
+
+ ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!ctx->nl_cb) {
+ fprintf(stderr, "failed to allocate netlink callback\n");
+ goto err_msg;
+ }
+
+ nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb);
+
+ genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0);
+
+ if (ovpn->ifindex > 0)
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex);
+
+ return ctx;
+nla_put_failure:
+err_msg:
+ nlmsg_free(ctx->nl_msg);
+err_sock:
+ nl_socket_free(ctx->nl_sock);
+err_free:
+ free(ctx);
+ return NULL;
+}
+
+static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd)
+{
+ return nl_ctx_alloc_flags(ovpn, cmd, 0);
+}
+
+static void nl_ctx_free(struct nl_ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ nl_socket_free(ctx->nl_sock);
+ nlmsg_free(ctx->nl_msg);
+ nl_cb_put(ctx->nl_cb);
+ free(ctx);
+}
+
+static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused,
+ struct nlmsgerr *err, void *arg)
+{
+ struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1;
+ struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1];
+ int len = nlh->nlmsg_len;
+ struct nlattr *attrs;
+ int *ret = arg;
+ int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh);
+
+ *ret = err->error;
+
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return NL_STOP;
+
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ ack_len += err->msg.nlmsg_len - sizeof(*nlh);
+
+ if (len <= ack_len)
+ return NL_STOP;
+
+ attrs = (void *)((uint8_t *)nlh + ack_len);
+ len -= ack_len;
+
+ nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL);
+ if (tb_msg[NLMSGERR_ATTR_MSG]) {
+ len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]),
+ nla_len(tb_msg[NLMSGERR_ATTR_MSG]));
+ fprintf(stderr, "kernel error: %*s\n", len,
+ (char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]));
+ }
+
+ if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) {
+ fprintf(stderr, "missing required nesting type %u\n",
+ nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST]));
+ }
+
+ if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) {
+ fprintf(stderr, "missing required attribute type %u\n",
+ nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE]));
+ }
+
+ return NL_STOP;
+}
+
+static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused,
+ void *arg)
+{
+ int *status = arg;
+
+ *status = 0;
+ return NL_SKIP;
+}
+
+static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused,
+ void *arg)
+{
+ int *status = arg;
+
+ *status = 0;
+ return NL_STOP;
+}
+
+static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb)
+{
+ int status = 1;
+
+ nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status);
+ nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish,
+ &status);
+ nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status);
+
+ if (cb)
+ nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx);
+
+ nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg);
+
+ while (status == 1)
+ ovpn_nl_recvmsgs(ctx);
+
+ if (status < 0)
+ fprintf(stderr, "failed to send netlink message: %s (%d)\n",
+ strerror(-status), status);
+
+ return status;
+}
+
+static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx)
+{
+ int idx_enc, idx_dec, ret = -1;
+ unsigned char *ckey = NULL;
+ __u8 *bkey = NULL;
+ size_t olen = 0;
+ long ckey_len;
+ FILE *fp;
+
+ fp = fopen(file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open: %s\n", file);
+ return -1;
+ }
+
+ /* get file size */
+ fseek(fp, 0L, SEEK_END);
+ ckey_len = ftell(fp);
+ rewind(fp);
+
+ /* if the file is longer, let's just read a portion */
+ if (ckey_len > 256)
+ ckey_len = 256;
+
+ ckey = malloc(ckey_len);
+ if (!ckey)
+ goto err;
+
+ ret = fread(ckey, 1, ckey_len, fp);
+ if (ret != ckey_len) {
+ fprintf(stderr,
+ "couldn't read enough data from key file: %dbytes read\n",
+ ret);
+ goto err;
+ }
+
+ olen = 0;
+ ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len);
+ if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) {
+ char buf[256];
+
+ mbedtls_strerror(ret, buf, sizeof(buf));
+ fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf,
+ ret);
+
+ goto err;
+ }
+
+ bkey = malloc(olen);
+ if (!bkey) {
+ fprintf(stderr, "cannot allocate binary key buffer\n");
+ goto err;
+ }
+
+ ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len);
+ if (ret) {
+ char buf[256];
+
+ mbedtls_strerror(ret, buf, sizeof(buf));
+ fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf,
+ ret);
+
+ goto err;
+ }
+
+ if (olen < 2 * KEY_LEN + NONCE_LEN) {
+ fprintf(stderr,
+ "not enough data in key file, found %zdB but needs %dB\n",
+ olen, 2 * KEY_LEN + NONCE_LEN);
+ goto err;
+ }
+
+ switch (ctx->key_dir) {
+ case KEY_DIR_IN:
+ idx_enc = 0;
+ idx_dec = 1;
+ break;
+ case KEY_DIR_OUT:
+ idx_enc = 1;
+ idx_dec = 0;
+ break;
+ default:
+ goto err;
+ }
+
+ memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN);
+ memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN);
+ memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN);
+
+ ret = 0;
+
+err:
+ fclose(fp);
+ free(bkey);
+ free(ckey);
+
+ return ret;
+}
+
+static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx)
+{
+ if (strcmp(cipher, "aes") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_AES_GCM;
+ else if (strcmp(cipher, "chachapoly") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305;
+ else if (strcmp(cipher, "none") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_NONE;
+ else
+ return -ENOTSUP;
+
+ return 0;
+}
+
+static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx)
+{
+ int in_dir;
+
+ in_dir = strtoll(dir, NULL, 10);
+ switch (in_dir) {
+ case KEY_DIR_IN:
+ case KEY_DIR_OUT:
+ ctx->key_dir = in_dir;
+ break;
+ default:
+ fprintf(stderr,
+ "invalid key direction provided. Can be 0 or 1 only\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto)
+{
+ struct sockaddr_storage local_sock = { 0 };
+ struct sockaddr_in6 *in6;
+ struct sockaddr_in *in;
+ int ret, s, sock_type;
+ size_t sock_len;
+
+ if (proto == IPPROTO_UDP)
+ sock_type = SOCK_DGRAM;
+ else if (proto == IPPROTO_TCP)
+ sock_type = SOCK_STREAM;
+ else
+ return -EINVAL;
+
+ s = socket(family, sock_type, 0);
+ if (s < 0) {
+ perror("cannot create socket");
+ return -1;
+ }
+
+ switch (family) {
+ case AF_INET:
+ in = (struct sockaddr_in *)&local_sock;
+ in->sin_family = family;
+ in->sin_port = htons(ctx->lport);
+ in->sin_addr.s_addr = htonl(INADDR_ANY);
+ sock_len = sizeof(*in);
+ break;
+ case AF_INET6:
+ in6 = (struct sockaddr_in6 *)&local_sock;
+ in6->sin6_family = family;
+ in6->sin6_port = htons(ctx->lport);
+ in6->sin6_addr = in6addr_any;
+ sock_len = sizeof(*in6);
+ break;
+ default:
+ return -1;
+ }
+
+ int opt = 1;
+
+ ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+ if (ret < 0) {
+ perror("setsockopt for SO_REUSEADDR");
+ return ret;
+ }
+
+ ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+ if (ret < 0) {
+ perror("setsockopt for SO_REUSEPORT");
+ return ret;
+ }
+
+ if (family == AF_INET6) {
+ opt = 0;
+ if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt,
+ sizeof(opt))) {
+ perror("failed to set IPV6_V6ONLY");
+ return -1;
+ }
+ }
+
+ ret = bind(s, (struct sockaddr *)&local_sock, sock_len);
+ if (ret < 0) {
+ perror("cannot bind socket");
+ goto err_socket;
+ }
+
+ ctx->socket = s;
+ ctx->sa_family = family;
+ return 0;
+
+err_socket:
+ close(s);
+ return -1;
+}
+
+static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family)
+{
+ return ovpn_socket(ctx, family, IPPROTO_UDP);
+}
+
+static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family)
+{
+ int ret;
+
+ ret = ovpn_socket(ctx, family, IPPROTO_TCP);
+ if (ret < 0)
+ return ret;
+
+ ret = listen(ctx->socket, 10);
+ if (ret < 0) {
+ perror("listen");
+ close(ctx->socket);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ovpn_accept(struct ovpn_ctx *ctx)
+{
+ socklen_t socklen;
+ int ret;
+
+ socklen = sizeof(ctx->remote);
+ ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen);
+ if (ret < 0) {
+ perror("accept");
+ goto err;
+ }
+
+ fprintf(stderr, "Connection received!\n");
+
+ switch (socklen) {
+ case sizeof(struct sockaddr_in):
+ case sizeof(struct sockaddr_in6):
+ break;
+ default:
+ fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n");
+ close(ret);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ return ret;
+err:
+ close(ctx->socket);
+ return ret;
+}
+
+static int ovpn_connect(struct ovpn_ctx *ovpn)
+{
+ socklen_t socklen;
+ int s, ret;
+
+ s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0);
+ if (s < 0) {
+ perror("cannot create socket");
+ return -1;
+ }
+
+ switch (ovpn->remote.in4.sin_family) {
+ case AF_INET:
+ socklen = sizeof(struct sockaddr_in);
+ break;
+ case AF_INET6:
+ socklen = sizeof(struct sockaddr_in6);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen);
+ if (ret < 0) {
+ perror("connect");
+ goto err;
+ }
+
+ fprintf(stderr, "connected\n");
+
+ ovpn->socket = s;
+
+ return 0;
+err:
+ close(s);
+ return ret;
+}
+
+static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket);
+
+ if (!is_tcp) {
+ switch (ovpn->remote.in4.sin_family) {
+ case AF_INET:
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4,
+ ovpn->remote.in4.sin_addr.s_addr);
+ NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+ ovpn->remote.in4.sin_port);
+ break;
+ case AF_INET6:
+ NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6,
+ sizeof(ovpn->remote.in6.sin6_addr),
+ &ovpn->remote.in6.sin6_addr);
+ NLA_PUT_U32(ctx->nl_msg,
+ OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID,
+ ovpn->remote.in6.sin6_scope_id);
+ NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+ ovpn->remote.in6.sin6_port);
+ break;
+ default:
+ fprintf(stderr,
+ "Invalid family for remote socket address\n");
+ goto nla_put_failure;
+ }
+ }
+
+ if (ovpn->peer_ip_set) {
+ switch (ovpn->peer_ip.in4.sin_family) {
+ case AF_INET:
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4,
+ ovpn->peer_ip.in4.sin_addr.s_addr);
+ break;
+ case AF_INET6:
+ NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6,
+ sizeof(struct in6_addr),
+ &ovpn->peer_ip.in6.sin6_addr);
+ break;
+ default:
+ fprintf(stderr, "Invalid family for peer address\n");
+ goto nla_put_failure;
+ }
+ }
+
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_set_peer(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL,
+ ovpn->keepalive_interval);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT,
+ ovpn->keepalive_timeout);
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_del_peer(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused)
+{
+ struct nlattr *pattrs[OVPN_A_PEER_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+ __u16 rport = 0, lport = 0;
+
+ nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!attrs[OVPN_A_PEER]) {
+ fprintf(stderr, "no packet content in netlink message\n");
+ return NL_SKIP;
+ }
+
+ nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]),
+ nla_len(attrs[OVPN_A_PEER]), NULL);
+
+ if (pattrs[OVPN_A_PEER_ID])
+ fprintf(stderr, "* Peer %u\n",
+ nla_get_u32(pattrs[OVPN_A_PEER_ID]));
+
+ if (pattrs[OVPN_A_PEER_SOCKET_NETNSID])
+ fprintf(stderr, "\tsocket NetNS ID: %d\n",
+ nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID]));
+
+ if (pattrs[OVPN_A_PEER_VPN_IPV4]) {
+ char buf[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]),
+ buf, sizeof(buf));
+ fprintf(stderr, "\tVPN IPv4: %s\n", buf);
+ }
+
+ if (pattrs[OVPN_A_PEER_VPN_IPV6]) {
+ char buf[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]),
+ buf, sizeof(buf));
+ fprintf(stderr, "\tVPN IPv6: %s\n", buf);
+ }
+
+ if (pattrs[OVPN_A_PEER_LOCAL_PORT])
+ lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT]));
+
+ if (pattrs[OVPN_A_PEER_REMOTE_PORT])
+ rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT]));
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) {
+ void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6];
+ char buf[INET6_ADDRSTRLEN];
+ int scope_id = -1;
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) {
+ void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID];
+
+ scope_id = nla_get_u32(p);
+ }
+
+ inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport,
+ scope_id);
+
+ if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) {
+ void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6];
+
+ inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+ }
+ }
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) {
+ void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4];
+ char buf[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport);
+
+ if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) {
+ void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4];
+
+ inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf));
+ fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+ }
+ }
+
+ if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) {
+ void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL];
+
+ fprintf(stderr, "\tKeepalive interval: %u sec\n",
+ nla_get_u32(p));
+ }
+
+ if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])
+ fprintf(stderr, "\tKeepalive timeout: %u sec\n",
+ nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]));
+
+ if (pattrs[OVPN_A_PEER_VPN_RX_BYTES])
+ fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_VPN_TX_BYTES])
+ fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS])
+ fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS])
+ fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_LINK_RX_BYTES])
+ fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_LINK_TX_BYTES])
+ fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS])
+ fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS])
+ fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS]));
+
+ return NL_SKIP;
+}
+
+static int ovpn_get_peer(struct ovpn_ctx *ovpn)
+{
+ int flags = 0, ret = -1;
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+
+ if (ovpn->peer_id == PEER_ID_UNDEF)
+ flags = NLM_F_DUMP;
+
+ ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (ovpn->peer_id != PEER_ID_UNDEF) {
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, attr);
+ }
+
+ ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_new_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf, *key_dir;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher);
+
+ key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+ nla_nest_end(ctx->nl_msg, key_dir);
+
+ key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+ nla_nest_end(ctx->nl_msg, key_dir);
+
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_del_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused)
+{
+ struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+
+ nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!attrs[OVPN_A_KEYCONF]) {
+ fprintf(stderr, "no packet content in netlink message\n");
+ return NL_SKIP;
+ }
+
+ nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]),
+ nla_len(attrs[OVPN_A_KEYCONF]), NULL);
+
+ if (kattrs[OVPN_A_KEYCONF_PEER_ID])
+ fprintf(stderr, "* Peer %u\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID]));
+ if (kattrs[OVPN_A_KEYCONF_SLOT]) {
+ fprintf(stderr, "\t- Slot: ");
+ switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) {
+ case OVPN_KEY_SLOT_PRIMARY:
+ fprintf(stderr, "primary\n");
+ break;
+ case OVPN_KEY_SLOT_SECONDARY:
+ fprintf(stderr, "secondary\n");
+ break;
+ default:
+ fprintf(stderr, "invalid (%u)\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT]));
+ break;
+ }
+ }
+ if (kattrs[OVPN_A_KEYCONF_KEY_ID])
+ fprintf(stderr, "\t- Key ID: %u\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID]));
+ if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) {
+ fprintf(stderr, "\t- Cipher: ");
+ switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) {
+ case OVPN_CIPHER_ALG_NONE:
+ fprintf(stderr, "none\n");
+ break;
+ case OVPN_CIPHER_ALG_AES_GCM:
+ fprintf(stderr, "aes-gcm\n");
+ break;
+ case OVPN_CIPHER_ALG_CHACHA20_POLY1305:
+ fprintf(stderr, "chacha20poly1305\n");
+ break;
+ default:
+ fprintf(stderr, "invalid (%u)\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG]));
+ break;
+ }
+ }
+
+ return NL_SKIP;
+}
+
+static int ovpn_get_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, ovpn_handle_key);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_swap_keys(struct ovpn_ctx *ovpn)
+{
+ struct nl_ctx *ctx;
+ struct nlattr *kc;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP);
+ if (!ctx)
+ return -ENOMEM;
+
+ kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, kc);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+/* Helper function used to easily add attributes to a rtnl message */
+static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type,
+ const void *data, int alen)
+{
+ int len = RTA_LENGTH(alen);
+ struct rtattr *rta;
+
+ if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen) {
+ fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n",
+ __func__, maxlen);
+ return -EMSGSIZE;
+ }
+
+ rta = nlmsg_tail(n);
+ rta->rta_type = type;
+ rta->rta_len = len;
+
+ if (!data)
+ memset(RTA_DATA(rta), 0, alen);
+ else
+ memcpy(RTA_DATA(rta), data, alen);
+
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+
+ return 0;
+}
+
+static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size,
+ int attr)
+{
+ struct rtattr *nest = nlmsg_tail(msg);
+
+ if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0)
+ return NULL;
+
+ return nest;
+}
+
+static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest)
+{
+ nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest;
+}
+
+#define RT_SNDBUF_SIZE (1024 * 2)
+#define RT_RCVBUF_SIZE (1024 * 4)
+
+/* Open RTNL socket */
+static int ovpn_rt_socket(void)
+{
+ int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (fd < 0) {
+ fprintf(stderr, "%s: cannot open netlink socket\n", __func__);
+ return fd;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf,
+ sizeof(sndbuf)) < 0) {
+ fprintf(stderr, "%s: SO_SNDBUF\n", __func__);
+ close(fd);
+ return -1;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf,
+ sizeof(rcvbuf)) < 0) {
+ fprintf(stderr, "%s: SO_RCVBUF\n", __func__);
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+/* Bind socket to Netlink subsystem */
+static int ovpn_rt_bind(int fd, uint32_t groups)
+{
+ struct sockaddr_nl local = { 0 };
+ socklen_t addr_len;
+
+ local.nl_family = AF_NETLINK;
+ local.nl_groups = groups;
+
+ if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
+ fprintf(stderr, "%s: cannot bind netlink socket: %d\n",
+ __func__, errno);
+ return -errno;
+ }
+
+ addr_len = sizeof(local);
+ if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) {
+ fprintf(stderr, "%s: cannot getsockname: %d\n", __func__,
+ errno);
+ return -errno;
+ }
+
+ if (addr_len != sizeof(local)) {
+ fprintf(stderr, "%s: wrong address length %d\n", __func__,
+ addr_len);
+ return -EINVAL;
+ }
+
+ if (local.nl_family != AF_NETLINK) {
+ fprintf(stderr, "%s: wrong address family %d\n", __func__,
+ local.nl_family);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg);
+
+/* Send Netlink message and run callback on reply (if specified) */
+static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer,
+ unsigned int groups, ovpn_parse_reply_cb cb,
+ void *arg_cb)
+{
+ int len, rem_len, fd, ret, rcv_len;
+ struct sockaddr_nl nladdr = { 0 };
+ struct nlmsgerr *err;
+ struct nlmsghdr *h;
+ char buf[1024 * 16];
+ struct iovec iov = {
+ .iov_base = payload,
+ .iov_len = payload->nlmsg_len,
+ };
+ struct msghdr nlmsg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ nladdr.nl_family = AF_NETLINK;
+ nladdr.nl_pid = peer;
+ nladdr.nl_groups = groups;
+
+ payload->nlmsg_seq = time(NULL);
+
+ /* no need to send reply */
+ if (!cb)
+ payload->nlmsg_flags |= NLM_F_ACK;
+
+ fd = ovpn_rt_socket();
+ if (fd < 0) {
+ fprintf(stderr, "%s: can't open rtnl socket\n", __func__);
+ return -errno;
+ }
+
+ ret = ovpn_rt_bind(fd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: can't bind rtnl socket\n", __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ ret = sendmsg(fd, &nlmsg, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ /* prepare buffer to store RTNL replies */
+ memset(buf, 0, sizeof(buf));
+ iov.iov_base = buf;
+
+ while (1) {
+ /*
+ * iov_len is modified by recvmsg(), therefore has to be initialized before
+ * using it again
+ */
+ iov.iov_len = sizeof(buf);
+ rcv_len = recvmsg(fd, &nlmsg, 0);
+ if (rcv_len < 0) {
+ if (errno == EINTR || errno == EAGAIN) {
+ fprintf(stderr, "%s: interrupted call\n",
+ __func__);
+ continue;
+ }
+ fprintf(stderr, "%s: rtnl: error on recvmsg()\n",
+ __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ if (rcv_len == 0) {
+ fprintf(stderr,
+ "%s: rtnl: socket reached unexpected EOF\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (nlmsg.msg_namelen != sizeof(nladdr)) {
+ fprintf(stderr,
+ "%s: sender address length: %u (expected %zu)\n",
+ __func__, nlmsg.msg_namelen, sizeof(nladdr));
+ ret = -EIO;
+ goto out;
+ }
+
+ h = (struct nlmsghdr *)buf;
+ while (rcv_len >= (int)sizeof(*h)) {
+ len = h->nlmsg_len;
+ rem_len = len - sizeof(*h);
+
+ if (rem_len < 0 || len > rcv_len) {
+ if (nlmsg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "%s: truncated message\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+ fprintf(stderr, "%s: malformed message: len=%d\n",
+ __func__, len);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (h->nlmsg_type == NLMSG_DONE) {
+ ret = 0;
+ goto out;
+ }
+
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ err = (struct nlmsgerr *)NLMSG_DATA(h);
+ if (rem_len < (int)sizeof(struct nlmsgerr)) {
+ fprintf(stderr, "%s: ERROR truncated\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (err->error) {
+ fprintf(stderr, "%s: (%d) %s\n",
+ __func__, err->error,
+ strerror(-err->error));
+ ret = err->error;
+ goto out;
+ }
+
+ ret = 0;
+ if (cb) {
+ int r = cb(h, arg_cb);
+
+ if (r <= 0)
+ ret = r;
+ }
+ goto out;
+ }
+
+ if (cb) {
+ int r = cb(h, arg_cb);
+
+ if (r <= 0) {
+ ret = r;
+ goto out;
+ }
+ } else {
+ fprintf(stderr, "%s: RTNL: unexpected reply\n",
+ __func__);
+ }
+
+ rcv_len -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr *)((uint8_t *)h +
+ NLMSG_ALIGN(len));
+ }
+
+ if (nlmsg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "%s: message truncated\n", __func__);
+ continue;
+ }
+
+ if (rcv_len) {
+ fprintf(stderr, "%s: rtnl: %d not parsed bytes\n",
+ __func__, rcv_len);
+ ret = -1;
+ goto out;
+ }
+ }
+out:
+ close(fd);
+
+ return ret;
+}
+
+struct ovpn_link_req {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[256];
+};
+
+static int ovpn_new_iface(struct ovpn_ctx *ovpn)
+{
+ struct rtattr *linkinfo, *data;
+ struct ovpn_link_req req = { 0 };
+ int ret = -1;
+
+ fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname,
+ ovpn->mode);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname,
+ strlen(ovpn->ifname) + 1) < 0)
+ goto err;
+
+ linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO);
+ if (!linkinfo)
+ goto err;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME,
+ strlen(OVPN_FAMILY_NAME) + 1) < 0)
+ goto err;
+
+ if (ovpn->mode_set) {
+ data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA);
+ if (!data)
+ goto err;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE,
+ &ovpn->mode, sizeof(uint8_t)) < 0)
+ goto err;
+
+ ovpn_nest_end(&req.n, data);
+ }
+
+ ovpn_nest_end(&req.n, linkinfo);
+
+ req.i.ifi_family = AF_PACKET;
+
+ ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+err:
+ return ret;
+}
+
+static int ovpn_del_iface(struct ovpn_ctx *ovpn)
+{
+ struct ovpn_link_req req = { 0 };
+
+ fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname,
+ ovpn->ifindex);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+ req.n.nlmsg_flags = NLM_F_REQUEST;
+ req.n.nlmsg_type = RTM_DELLINK;
+
+ req.i.ifi_family = AF_PACKET;
+ req.i.ifi_index = ovpn->ifindex;
+
+ return ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+}
+
+static int nl_seq_check(struct nl_msg (*msg)__always_unused,
+ void (*arg)__always_unused)
+{
+ return NL_OK;
+}
+
+struct mcast_handler_args {
+ const char *group;
+ int id;
+};
+
+static int mcast_family_handler(struct nl_msg *msg, void *arg)
+{
+ struct mcast_handler_args *grp = arg;
+ struct nlattr *tb[CTRL_ATTR_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *mcgrp;
+ int rem_mcgrp;
+
+ nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!tb[CTRL_ATTR_MCAST_GROUPS])
+ return NL_SKIP;
+
+ nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) {
+ struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1];
+
+ nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX,
+ nla_data(mcgrp), nla_len(mcgrp), NULL);
+
+ if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] ||
+ !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID])
+ continue;
+ if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]),
+ grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME])))
+ continue;
+ grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]);
+ break;
+ }
+
+ return NL_SKIP;
+}
+
+static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused,
+ struct nlmsgerr *err, void *arg)
+{
+ int *ret = arg;
+
+ *ret = err->error;
+ return NL_STOP;
+}
+
+static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg)
+{
+ int *ret = arg;
+
+ *ret = 0;
+ return NL_STOP;
+}
+
+static int ovpn_handle_msg(struct nl_msg *msg, void *arg)
+{
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+ struct nlmsghdr *nlh = nlmsg_hdr(msg);
+ char ifname[IF_NAMESIZE];
+ int *ret = arg;
+ __u32 ifindex;
+
+ fprintf(stderr, "received message from ovpn-dco\n");
+
+ *ret = -1;
+
+ if (!genlmsg_valid_hdr(nlh, 0)) {
+ fprintf(stderr, "invalid header\n");
+ return NL_STOP;
+ }
+
+ if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL)) {
+ fprintf(stderr, "received bogus data from ovpn-dco\n");
+ return NL_STOP;
+ }
+
+ if (!attrs[OVPN_A_IFINDEX]) {
+ fprintf(stderr, "no ifindex in this message\n");
+ return NL_STOP;
+ }
+
+ ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]);
+ if (!if_indextoname(ifindex, ifname)) {
+ fprintf(stderr, "cannot resolve ifname for ifindex: %u\n",
+ ifindex);
+ return NL_STOP;
+ }
+
+ switch (gnlh->cmd) {
+ case OVPN_CMD_PEER_DEL_NTF:
+ fprintf(stdout, "received CMD_PEER_DEL_NTF\n");
+ break;
+ case OVPN_CMD_KEY_SWAP_NTF:
+ fprintf(stdout, "received CMD_KEY_SWAP_NTF\n");
+ break;
+ default:
+ fprintf(stderr, "received unknown command: %d\n", gnlh->cmd);
+ return NL_STOP;
+ }
+
+ *ret = 0;
+ return NL_OK;
+}
+
+static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family,
+ const char *group)
+{
+ struct nl_msg *msg;
+ struct nl_cb *cb;
+ int ret, ctrlid;
+ struct mcast_handler_args grp = {
+ .group = group,
+ .id = -ENOENT,
+ };
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ return -ENOMEM;
+
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!cb) {
+ ret = -ENOMEM;
+ goto out_fail_cb;
+ }
+
+ ctrlid = genl_ctrl_resolve(sock, "nlctrl");
+
+ genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
+
+ ret = -ENOBUFS;
+ NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+ ret = nl_send_auto_complete(sock, msg);
+ if (ret < 0)
+ goto nla_put_failure;
+
+ ret = 1;
+
+ nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret);
+ nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp);
+
+ while (ret > 0)
+ nl_recvmsgs(sock, cb);
+
+ if (ret == 0)
+ ret = grp.id;
+ nla_put_failure:
+ nl_cb_put(cb);
+ out_fail_cb:
+ nlmsg_free(msg);
+ return ret;
+}
+
+static int ovpn_listen_mcast(void)
+{
+ struct nl_sock *sock;
+ struct nl_cb *cb;
+ int mcid, ret;
+
+ sock = nl_socket_alloc();
+ if (!sock) {
+ fprintf(stderr, "cannot allocate netlink socket\n");
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ nl_socket_set_buffer_size(sock, 8192, 8192);
+
+ ret = genl_connect(sock);
+ if (ret < 0) {
+ fprintf(stderr, "cannot connect to generic netlink: %s\n",
+ nl_geterror(ret));
+ goto err_free;
+ }
+
+ mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS);
+ if (mcid < 0) {
+ fprintf(stderr, "cannot get mcast group: %s\n",
+ nl_geterror(mcid));
+ goto err_free;
+ }
+
+ ret = nl_socket_add_membership(sock, mcid);
+ if (ret) {
+ fprintf(stderr, "failed to join mcast group: %d\n", ret);
+ goto err_free;
+ }
+
+ ret = 1;
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret);
+ nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret);
+
+ while (ret == 1) {
+ int err = nl_recvmsgs(sock, cb);
+
+ if (err < 0) {
+ fprintf(stderr,
+ "cannot receive netlink message: (%d) %s\n",
+ err, nl_geterror(-err));
+ ret = -1;
+ break;
+ }
+ }
+
+ nl_cb_put(cb);
+err_free:
+ nl_socket_free(sock);
+ return ret;
+}
+
+static void usage(const char *cmd)
+{
+ fprintf(stderr,
+ "Usage %s <command> <iface> [arguments..]\n",
+ cmd);
+ fprintf(stderr, "where <command> can be one of the following\n\n");
+
+ fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tmode:\n");
+ fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n");
+ fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n");
+
+ fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+
+ fprintf(stderr,
+ "* listen <iface> <lport> <peers_file> [ipv6]: listen for incoming peer TCP connections\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: TCP port to listen to\n");
+ fprintf(stderr,
+ "\tpeers_file: file containing one peer per line: Line format:\n");
+ fprintf(stderr, "\t\t<peer_id> <vpnaddr>\n");
+ fprintf(stderr,
+ "\tipv6: whether the socket should listen to the IPv6 wildcard address\n");
+
+ fprintf(stderr,
+ "* connect <iface> <peer_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the connecting peer\n");
+ fprintf(stderr, "\traddr: peer IP address to connect to\n");
+ fprintf(stderr, "\trport: peer TCP port to connect to\n");
+ fprintf(stderr,
+ "\tkey_file: file containing the symmetric key for encryption\n");
+
+ fprintf(stderr,
+ "* new_peer <iface> <peer_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: local UDP port to bind to\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID to be used in data packets to/from this peer\n");
+ fprintf(stderr, "\traddr: peer IP address\n");
+ fprintf(stderr, "\trport: peer UDP port\n");
+ fprintf(stderr, "\tvpnaddr: peer VPN IP\n");
+
+ fprintf(stderr,
+ "* new_multi_peer <iface> <lport> <peers_file>: add multiple peers as listed in the file\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: local UDP port to bind to\n");
+ fprintf(stderr,
+ "\tpeers_file: text file containing one peer per line. Line format:\n");
+ fprintf(stderr, "\t\t<peer_id> <raddr> <rport> <vpnaddr>\n");
+
+ fprintf(stderr,
+ "* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+ fprintf(stderr,
+ "\tkeepalive_interval: interval for sending ping messages\n");
+ fprintf(stderr,
+ "\tkeepalive_timeout: time after which a peer is timed out\n");
+
+ fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n");
+
+ fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n");
+
+ fprintf(stderr,
+ "* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID of the peer to configure the key for\n");
+ fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+ fprintf(stderr, "\tkey_id: an ID from 0 to 7\n");
+ fprintf(stderr,
+ "\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n");
+ fprintf(stderr,
+ "\tkey_dir: key direction, must 0 on one host and 1 on the other\n");
+ fprintf(stderr, "\tkey_file: file containing the pre-shared key\n");
+
+ fprintf(stderr,
+ "* del_key <iface> <peer_id> [slot]: erase existing data channel key\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+ fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n");
+
+ fprintf(stderr,
+ "* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n");
+ fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+
+ fprintf(stderr,
+ "* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+
+ fprintf(stderr,
+ "* listen_mcast: listen to ovpn netlink multicast messages\n");
+}
+
+static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host,
+ const char *service, const char *vpnip)
+{
+ int ret;
+ struct addrinfo *result;
+ struct addrinfo hints = {
+ .ai_family = ovpn->sa_family,
+ .ai_socktype = SOCK_DGRAM,
+ .ai_protocol = IPPROTO_UDP
+ };
+
+ if (host) {
+ ret = getaddrinfo(host, service, &hints, &result);
+ if (ret) {
+ fprintf(stderr, "getaddrinfo on remote error: %s\n",
+ gai_strerror(ret));
+ return -1;
+ }
+
+ if (!(result->ai_family == AF_INET &&
+ result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+ !(result->ai_family == AF_INET6 &&
+ result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen);
+ }
+
+ if (vpnip) {
+ ret = getaddrinfo(vpnip, NULL, &hints, &result);
+ if (ret) {
+ fprintf(stderr, "getaddrinfo on vpnip error: %s\n",
+ gai_strerror(ret));
+ return -1;
+ }
+
+ if (!(result->ai_family == AF_INET &&
+ result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+ !(result->ai_family == AF_INET6 &&
+ result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen);
+ ovpn->sa_family = result->ai_family;
+
+ ovpn->peer_ip_set = true;
+ }
+
+ ret = 0;
+out:
+ freeaddrinfo(result);
+ return ret;
+}
+
+static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id,
+ const char *raddr, const char *rport,
+ const char *vpnip)
+{
+ ovpn->peer_id = strtoul(peer_id, NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ return ovpn_parse_remote(ovpn, raddr, rport, vpnip);
+}
+
+static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn)
+{
+ int slot = strtoul(arg, NULL, 10);
+
+ if (errno == ERANGE || slot < 1 || slot > 2) {
+ fprintf(stderr, "key slot out of range\n");
+ return -1;
+ }
+
+ switch (slot) {
+ case 1:
+ ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+ break;
+ case 2:
+ ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY;
+ break;
+ }
+
+ return 0;
+}
+
+static int ovpn_send_tcp_data(int socket)
+{
+ uint16_t len = htons(1000);
+ uint8_t buf[1002];
+ int ret;
+
+ memcpy(buf, &len, sizeof(len));
+ memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len));
+
+ ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+ fprintf(stdout, "Sent %u bytes over TCP socket\n", ret);
+
+ return ret > 0 ? 0 : ret;
+}
+
+static int ovpn_recv_tcp_data(int socket)
+{
+ uint8_t buf[1002];
+ uint16_t len;
+ int ret;
+
+ ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+ if (ret < 2) {
+ fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret);
+ return ret;
+ }
+
+ memcpy(&len, buf, sizeof(len));
+ len = ntohs(len);
+
+ fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n",
+ ret, len);
+
+ return 0;
+}
+
+static enum ovpn_cmd ovpn_parse_cmd(const char *cmd)
+{
+ if (!strcmp(cmd, "new_iface"))
+ return CMD_NEW_IFACE;
+
+ if (!strcmp(cmd, "del_iface"))
+ return CMD_DEL_IFACE;
+
+ if (!strcmp(cmd, "listen"))
+ return CMD_LISTEN;
+
+ if (!strcmp(cmd, "connect"))
+ return CMD_CONNECT;
+
+ if (!strcmp(cmd, "new_peer"))
+ return CMD_NEW_PEER;
+
+ if (!strcmp(cmd, "new_multi_peer"))
+ return CMD_NEW_MULTI_PEER;
+
+ if (!strcmp(cmd, "set_peer"))
+ return CMD_SET_PEER;
+
+ if (!strcmp(cmd, "del_peer"))
+ return CMD_DEL_PEER;
+
+ if (!strcmp(cmd, "get_peer"))
+ return CMD_GET_PEER;
+
+ if (!strcmp(cmd, "new_key"))
+ return CMD_NEW_KEY;
+
+ if (!strcmp(cmd, "del_key"))
+ return CMD_DEL_KEY;
+
+ if (!strcmp(cmd, "get_key"))
+ return CMD_GET_KEY;
+
+ if (!strcmp(cmd, "swap_keys"))
+ return CMD_SWAP_KEYS;
+
+ if (!strcmp(cmd, "listen_mcast"))
+ return CMD_LISTEN_MCAST;
+
+ return CMD_INVALID;
+}
+
+/* Send process to background and waits for signal.
+ *
+ * This helper is called at the end of commands
+ * creating sockets, so that the latter stay alive
+ * along with the process that created them.
+ *
+ * A signal is expected to be delivered in order to
+ * terminate the waiting processes
+ */
+static void ovpn_waitbg(void)
+{
+ daemon(1, 1);
+ pause();
+}
+
+static int ovpn_run_cmd(struct ovpn_ctx *ovpn)
+{
+ char peer_id[10], vpnip[INET6_ADDRSTRLEN], laddr[128], lport[10];
+ char raddr[128], rport[10];
+ int n, ret;
+ FILE *fp;
+
+ switch (ovpn->cmd) {
+ case CMD_NEW_IFACE:
+ ret = ovpn_new_iface(ovpn);
+ break;
+ case CMD_DEL_IFACE:
+ ret = ovpn_del_iface(ovpn);
+ break;
+ case CMD_LISTEN:
+ ret = ovpn_listen(ovpn, ovpn->sa_family);
+ if (ret < 0) {
+ fprintf(stderr, "cannot listen on TCP socket\n");
+ return ret;
+ }
+
+ fp = fopen(ovpn->peers_file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open file: %s\n",
+ ovpn->peers_file);
+ return -1;
+ }
+
+ int num_peers = 0;
+
+ while ((n = fscanf(fp, "%s %s\n", peer_id, vpnip)) == 2) {
+ struct ovpn_ctx peer_ctx = { 0 };
+
+ if (num_peers == MAX_PEERS) {
+ fprintf(stderr, "max peers reached!\n");
+ return -E2BIG;
+ }
+
+ peer_ctx.ifindex = ovpn->ifindex;
+ peer_ctx.sa_family = ovpn->sa_family;
+
+ peer_ctx.socket = ovpn_accept(ovpn);
+ if (peer_ctx.socket < 0) {
+ fprintf(stderr, "cannot accept connection!\n");
+ return -1;
+ }
+
+ /* store peer sockets to test TCP I/O */
+ ovpn->cli_sockets[num_peers] = peer_ctx.socket;
+
+ ret = ovpn_parse_new_peer(&peer_ctx, peer_id, NULL,
+ NULL, vpnip);
+ if (ret < 0) {
+ fprintf(stderr, "error while parsing line\n");
+ return -1;
+ }
+
+ ret = ovpn_new_peer(&peer_ctx, true);
+ if (ret < 0) {
+ fprintf(stderr,
+ "cannot add peer to VPN: %s %s\n",
+ peer_id, vpnip);
+ return ret;
+ }
+ num_peers++;
+ }
+
+ for (int i = 0; i < num_peers; i++) {
+ ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]);
+ if (ret < 0)
+ break;
+ }
+ ovpn_waitbg();
+ break;
+ case CMD_CONNECT:
+ ret = ovpn_connect(ovpn);
+ if (ret < 0) {
+ fprintf(stderr, "cannot connect TCP socket\n");
+ return ret;
+ }
+
+ ret = ovpn_new_peer(ovpn, true);
+ if (ret < 0) {
+ fprintf(stderr, "cannot add peer to VPN\n");
+ close(ovpn->socket);
+ return ret;
+ }
+
+ if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) {
+ ret = ovpn_new_key(ovpn);
+ if (ret < 0) {
+ fprintf(stderr, "cannot set key\n");
+ return ret;
+ }
+ }
+
+ ret = ovpn_send_tcp_data(ovpn->socket);
+ ovpn_waitbg();
+ break;
+ case CMD_NEW_PEER:
+ ret = ovpn_udp_socket(ovpn, AF_INET6);
+ if (ret < 0)
+ return ret;
+
+ ret = ovpn_new_peer(ovpn, false);
+ ovpn_waitbg();
+ break;
+ case CMD_NEW_MULTI_PEER:
+ ret = ovpn_udp_socket(ovpn, AF_INET6);
+ if (ret < 0)
+ return ret;
+
+ fp = fopen(ovpn->peers_file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open file: %s\n",
+ ovpn->peers_file);
+ return -1;
+ }
+
+ while ((n = fscanf(fp, "%s %s %s %s %s %s\n", peer_id, laddr,
+ lport, raddr, rport, vpnip)) == 6) {
+ struct ovpn_ctx peer_ctx = { 0 };
+
+ peer_ctx.ifindex = ovpn->ifindex;
+ peer_ctx.socket = ovpn->socket;
+ peer_ctx.sa_family = AF_UNSPEC;
+
+ ret = ovpn_parse_new_peer(&peer_ctx, peer_id, raddr,
+ rport, vpnip);
+ if (ret < 0) {
+ fprintf(stderr, "error while parsing line\n");
+ return -1;
+ }
+
+ ret = ovpn_new_peer(&peer_ctx, false);
+ if (ret < 0) {
+ fprintf(stderr,
+ "cannot add peer to VPN: %s %s %s %s\n",
+ peer_id, raddr, rport, vpnip);
+ return ret;
+ }
+ }
+ ovpn_waitbg();
+ break;
+ case CMD_SET_PEER:
+ ret = ovpn_set_peer(ovpn);
+ break;
+ case CMD_DEL_PEER:
+ ret = ovpn_del_peer(ovpn);
+ break;
+ case CMD_GET_PEER:
+ if (ovpn->peer_id == PEER_ID_UNDEF)
+ fprintf(stderr, "List of peers connected to: %s\n",
+ ovpn->ifname);
+
+ ret = ovpn_get_peer(ovpn);
+ break;
+ case CMD_NEW_KEY:
+ ret = ovpn_new_key(ovpn);
+ break;
+ case CMD_DEL_KEY:
+ ret = ovpn_del_key(ovpn);
+ break;
+ case CMD_GET_KEY:
+ ret = ovpn_get_key(ovpn);
+ break;
+ case CMD_SWAP_KEYS:
+ ret = ovpn_swap_keys(ovpn);
+ break;
+ case CMD_LISTEN_MCAST:
+ ret = ovpn_listen_mcast();
+ break;
+ case CMD_INVALID:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[])
+{
+ int ret;
+
+ /* no args required for LISTEN_MCAST */
+ if (ovpn->cmd == CMD_LISTEN_MCAST)
+ return 0;
+
+ /* all commands need an ifname */
+ if (argc < 3)
+ return -EINVAL;
+
+ strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1);
+ ovpn->ifname[IFNAMSIZ - 1] = '\0';
+
+ /* all commands, except NEW_IFNAME, needs an ifindex */
+ if (ovpn->cmd != CMD_NEW_IFACE) {
+ ovpn->ifindex = if_nametoindex(ovpn->ifname);
+ if (!ovpn->ifindex) {
+ fprintf(stderr, "cannot find interface: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ }
+
+ switch (ovpn->cmd) {
+ case CMD_NEW_IFACE:
+ if (argc < 4)
+ break;
+
+ if (!strcmp(argv[3], "P2P")) {
+ ovpn->mode = OVPN_MODE_P2P;
+ } else if (!strcmp(argv[3], "MP")) {
+ ovpn->mode = OVPN_MODE_MP;
+ } else {
+ fprintf(stderr, "Cannot parse iface mode: %s\n",
+ argv[3]);
+ return -1;
+ }
+ ovpn->mode_set = true;
+ break;
+ case CMD_DEL_IFACE:
+ break;
+ case CMD_LISTEN:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ ovpn->peers_file = argv[4];
+
+ ovpn->sa_family = AF_INET;
+ if (argc > 5 && !strcmp(argv[5], "ipv6"))
+ ovpn->sa_family = AF_INET6;
+ break;
+ case CMD_CONNECT:
+ if (argc < 6)
+ return -EINVAL;
+
+ ovpn->sa_family = AF_INET;
+
+ ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5],
+ NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Cannot parse remote peer data\n");
+ return -1;
+ }
+
+ if (argc > 6) {
+ ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+ ovpn->key_id = 0;
+ ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM;
+ ovpn->key_dir = KEY_DIR_OUT;
+
+ ret = ovpn_parse_key(argv[6], ovpn);
+ if (ret)
+ return -1;
+ }
+ break;
+ case CMD_NEW_PEER:
+ if (argc < 7)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[4], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ const char *vpnip = (argc > 7) ? argv[7] : NULL;
+
+ ret = ovpn_parse_new_peer(ovpn, argv[3], argv[5], argv[6],
+ vpnip);
+ if (ret < 0)
+ return -1;
+ break;
+ case CMD_NEW_MULTI_PEER:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ ovpn->peers_file = argv[4];
+ break;
+ case CMD_SET_PEER:
+ if (argc < 6)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ovpn->keepalive_interval = strtoul(argv[4], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr,
+ "keepalive interval value out of range\n");
+ return -1;
+ }
+
+ ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr,
+ "keepalive interval value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_DEL_PEER:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_GET_PEER:
+ ovpn->peer_id = PEER_ID_UNDEF;
+ if (argc > 3) {
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ }
+ break;
+ case CMD_NEW_KEY:
+ if (argc < 9)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return -1;
+
+ ovpn->key_id = strtoul(argv[5], NULL, 10);
+ if (errno == ERANGE || ovpn->key_id > 2) {
+ fprintf(stderr, "key ID out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_cipher(argv[6], ovpn);
+ if (ret < 0)
+ return -1;
+
+ ret = ovpn_parse_key_direction(argv[7], ovpn);
+ if (ret < 0)
+ return -1;
+
+ ret = ovpn_parse_key(argv[8], ovpn);
+ if (ret)
+ return -1;
+ break;
+ case CMD_DEL_KEY:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return ret;
+ break;
+ case CMD_GET_KEY:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return ret;
+ break;
+ case CMD_SWAP_KEYS:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_LISTEN_MCAST:
+ break;
+ case CMD_INVALID:
+ break;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct ovpn_ctx ovpn;
+ int ret;
+
+ if (argc < 2) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ memset(&ovpn, 0, sizeof(ovpn));
+ ovpn.sa_family = AF_UNSPEC;
+ ovpn.cipher = OVPN_CIPHER_ALG_NONE;
+
+ ovpn.cmd = ovpn_parse_cmd(argv[1]);
+ if (ovpn.cmd == CMD_INVALID) {
+ fprintf(stderr, "Error: unknown command.\n\n");
+ usage(argv[0]);
+ return -1;
+ }
+
+ ret = ovpn_parse_cmd_args(&ovpn, argc, argv);
+ if (ret < 0) {
+ fprintf(stderr, "Error: invalid arguments.\n\n");
+ if (ret == -EINVAL)
+ usage(argv[0]);
+ return ret;
+ }
+
+ ret = ovpn_run_cmd(&ovpn);
+ if (ret)
+ fprintf(stderr, "Cannot execute command: %s (%d)\n",
+ strerror(-ret), ret);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt
new file mode 100644
index 000000000000..d753eebe8716
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt
@@ -0,0 +1,5 @@
+1 5.5.5.2
+2 5.5.5.3
+3 5.5.5.4
+4 5.5.5.5
+5 5.5.5.6
diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
new file mode 100755
index 000000000000..32504079a2b8
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+ALG="chachapoly"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
new file mode 100755
index 000000000000..093d44772ffd
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test-close-socket.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh
new file mode 100755
index 000000000000..5e48a8b67928
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ setup_ns ${p} 5.5.5.$((${p} + 1))/24
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+done
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh
new file mode 100755
index 000000000000..ba5d725e18b0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-float.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+FLOAT="1"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-large-mtu.sh b/tools/testing/selftests/net/ovpn/test-large-mtu.sh
new file mode 100755
index 000000000000..ce2a2cb64f72
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-large-mtu.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+MTU="1500"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh
new file mode 100755
index 000000000000..ba3f1f315a34
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh
new file mode 100755
index 000000000000..e8acdc303307
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ setup_ns ${p} 5.5.5.$((${p} + 1))/24 ${MTU}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+ ip netns exec peer0 ping -qfc 500 -s 3000 -w 3 5.5.5.$((${p} + 1))
+done
+
+# ping LAN behind client 1
+ip netns exec peer0 ping -qfc 500 -w 3 ${LAN_IP}
+
+if [ "$FLOAT" == "1" ]; then
+ # make clients float..
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p}
+ ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p}
+ done
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1
+ done
+fi
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+echo "Adding secondary key and then swap:"
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 data64.key
+ ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} ${p} 2 1 ${ALG} 1 data64.key
+ ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} ${p}
+done
+
+sleep 1
+
+echo "Querying all peers:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0
+ip netns exec peer1 ${OVPN_CLI} get_peer tun1
+
+echo "Querying peer 1:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1
+
+echo "Querying non-existent peer 10:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 10 || true
+
+echo "Deleting peer 1:"
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1
+ip netns exec peer1 ${OVPN_CLI} del_peer tun1 1
+
+echo "Querying keys:"
+for p in $(seq 2 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 1
+ ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 2
+done
+
+echo "Deleting peer while sending traffic:"
+(ip netns exec peer2 ping -qf -w 4 5.5.5.1)&
+sleep 2
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2
+# following command fails in TCP mode
+# (both ends get conn reset when one peer disconnects)
+ip netns exec peer2 ${OVPN_CLI} del_peer tun2 2 || true
+
+echo "Deleting keys:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 1
+ ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 2
+done
+
+echo "Setting timeout to 3s MP:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 0 0
+done
+# wait for peers to timeout
+sleep 5
+
+echo "Setting timeout to 3s P2P:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 3 3
+done
+sleep 5
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt
new file mode 100644
index 000000000000..e9773ddf875c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/udp_peers.txt
@@ -0,0 +1,6 @@
+1 10.10.1.1 1 10.10.1.2 1 5.5.5.2
+2 10.10.2.1 1 10.10.2.2 1 5.5.5.3
+3 10.10.3.1 1 10.10.3.2 1 5.5.5.4
+4 fd00:0:0:4::1 1 fd00:0:0:4::2 1 5.5.5.5
+5 fd00:0:0:5::1 1 fd00:0:0:5::2 1 5.5.5.6
+6 fd00:0:0:6::1 1 fd00:0:0:6::2 1 5.5.5.7
diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile
new file mode 100644
index 000000000000..ff54641493e9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_INCLUDES := \
+ defaults.sh \
+ ksft_runner.sh \
+ set_sysctls.py \
+ ../../kselftest/ktap_helpers.sh \
+# end of TEST_INCLUDES
+
+TEST_PROGS := $(wildcard *.pkt)
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config
new file mode 100644
index 000000000000..c4a19a785521
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/config
@@ -0,0 +1,11 @@
+CONFIG_HZ=1000
+CONFIG_HZ_1000=y
+CONFIG_IPV6=y
+CONFIG_NET_NS=y
+CONFIG_NET_SCH_FIFO=y
+CONFIG_NET_SCH_FQ=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYN_COOKIES=y
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_TUN=y
diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh
new file mode 100755
index 000000000000..37edd3dc3b07
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/defaults.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Set standard production config values that relate to TCP behavior.
+
+# Flush old cached data (fastopen cookies).
+ip tcp_metrics flush all > /dev/null 2>&1
+
+# TCP min, default, and max receive and send buffer sizes.
+sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))"
+sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304"
+
+# TCP timestamps.
+sysctl -q net.ipv4.tcp_timestamps=1
+
+# TCP SYN(ACK) retry thresholds
+sysctl -q net.ipv4.tcp_syn_retries=5
+sysctl -q net.ipv4.tcp_synack_retries=5
+
+# TCP Forward RTO-Recovery, RFC 5682.
+sysctl -q net.ipv4.tcp_frto=2
+
+# TCP Selective Acknowledgements (SACK)
+sysctl -q net.ipv4.tcp_sack=1
+
+# TCP Duplicate Selective Acknowledgements (DSACK)
+sysctl -q net.ipv4.tcp_dsack=1
+
+# TCP FACK (Forward Acknowldgement)
+sysctl -q net.ipv4.tcp_fack=0
+
+# TCP reordering degree ("dupthresh" threshold for entering Fast Recovery).
+sysctl -q net.ipv4.tcp_reordering=3
+
+# TCP congestion control.
+sysctl -q net.ipv4.tcp_congestion_control=cubic
+
+# TCP slow start after idle.
+sysctl -q net.ipv4.tcp_slow_start_after_idle=0
+
+# TCP RACK and TLP.
+sysctl -q net.ipv4.tcp_early_retrans=4 net.ipv4.tcp_recovery=1
+
+# TCP method for deciding when to defer sending to accumulate big TSO packets.
+sysctl -q net.ipv4.tcp_tso_win_divisor=3
+
+# TCP Explicit Congestion Notification (ECN)
+sysctl -q net.ipv4.tcp_ecn=0
+
+sysctl -q net.ipv4.tcp_pacing_ss_ratio=200
+sysctl -q net.ipv4.tcp_pacing_ca_ratio=120
+sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1
+
+sysctl -q net.ipv4.tcp_fastopen=0x3
+# Use TFO_COOKIE in ksft_runner.sh for this key.
+sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4
+
+sysctl -q net.ipv4.tcp_syncookies=1
+
+# Override the default qdisc on the tun device.
+# Many tests fail with timing errors if the default
+# is FQ and that paces their flows.
+tc qdisc add dev tun0 root pfifo
+
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
new file mode 100755
index 000000000000..b34e5cf0112e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh"
+
+declare -A ip_args=(
+ [ipv4]="--ip_version=ipv4
+ --local_ip=192.168.0.1
+ --gateway_ip=192.168.0.1
+ --netmask_ip=255.255.0.0
+ --remote_ip=192.0.2.1
+ -D TFO_COOKIE=3021b9d889017eeb
+ -D TFO_COOKIE_ZERO=b7c12350a90dc8f5
+ -D CMSG_LEVEL_IP=SOL_IP
+ -D CMSG_TYPE_RECVERR=IP_RECVERR"
+ [ipv6]="--ip_version=ipv6
+ --mtu=1520
+ --local_ip=fd3d:0a0b:17d6::1
+ --gateway_ip=fd3d:0a0b:17d6:8888::1
+ --remote_ip=fd3d:fa7b:d17d::1
+ -D TFO_COOKIE=c1d1e9742a47a9bc
+ -D TFO_COOKIE_ZERO=82af1a8f9a205c34
+ -D CMSG_LEVEL_IP=SOL_IPV6
+ -D CMSG_TYPE_RECVERR=IPV6_RECVERR"
+)
+
+if [ $# -ne 1 ]; then
+ ktap_exit_fail_msg "usage: $0 <script>"
+ exit "$KSFT_FAIL"
+fi
+script="$(basename $1)"
+
+if [ -z "$(which packetdrill)" ]; then
+ ktap_skip_all "packetdrill not found in PATH"
+ exit "$KSFT_SKIP"
+fi
+
+declare -a optargs
+failfunc=ktap_test_fail
+
+if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
+ optargs+=('--tolerance_usecs=14000')
+ failfunc=ktap_test_xfail
+fi
+
+ip_versions=$(grep -E '^--ip_version=' $script | cut -d '=' -f 2)
+if [[ -z $ip_versions ]]; then
+ ip_versions="ipv4 ipv6"
+elif [[ ! "$ip_versions" =~ ^ipv[46]$ ]]; then
+ ktap_exit_fail_msg "Too many or unsupported --ip_version: $ip_versions"
+ exit "$KSFT_FAIL"
+fi
+
+ktap_print_header
+ktap_set_plan $(echo $ip_versions | wc -w)
+
+for ip_version in $ip_versions; do
+ unshare -n packetdrill ${ip_args[$ip_version]} ${optargs[@]} $script > /dev/null \
+ && ktap_test_pass $ip_version || $failfunc $ip_version
+done
+
+ktap_finished
diff --git a/tools/testing/selftests/net/packetdrill/set_sysctls.py b/tools/testing/selftests/net/packetdrill/set_sysctls.py
new file mode 100755
index 000000000000..5ddf456ae973
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/set_sysctls.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Sets sysctl values and writes a file that restores them.
+
+The arguments are of the form "<proc-file>=<val>" separated by spaces.
+The program first reads the current value of the proc-file and creates
+a shell script named "/tmp/sysctl_restore_${PACKETDRILL_PID}.sh" which
+restores the values when executed. It then sets the new values.
+
+PACKETDRILL_PID is set by packetdrill to the pid of itself, so a .pkt
+file could restore sysctls by running `/tmp/sysctl_restore_${PPID}.sh`
+at the end.
+"""
+
+import os
+import subprocess
+import sys
+
+filename = '/tmp/sysctl_restore_%s.sh' % os.environ['PACKETDRILL_PID']
+
+# Open file for restoring sysctl values
+restore_file = open(filename, 'w')
+print('#!/bin/bash', file=restore_file)
+
+for a in sys.argv[1:]:
+ sysctl = a.split('=')
+ # sysctl[0] contains the proc-file name, sysctl[1] the new value
+
+ # read current value and add restore command to file
+ cur_val = subprocess.check_output(['cat', sysctl[0]], universal_newlines=True)
+ print('echo "%s" > %s' % (cur_val.strip(), sysctl[0]), file=restore_file)
+
+ # set new value
+ cmd = 'echo "%s" > %s' % (sysctl[1], sysctl[0])
+ os.system(cmd)
+
+os.system('chmod u+x %s' % filename)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt
new file mode 100644
index 000000000000..38535701656e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking accept.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0...0.200 accept(3, ..., ...) = 4
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +.1 write(4, ..., 2000) = 2000
+ +0 > P. 1:2001(2000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt
new file mode 100644
index 000000000000..3692ef102381
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking connect.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+
+ +.1...0.200 connect(3, ..., ...) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.1 < S. 0:0(0) ack 1 win 5792 <mss 1460,nop,wscale 2,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
new file mode 100644
index 000000000000..657e42ca65b5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking read.
+
+--tolerance_usecs=10000
+--mss=1000
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+
+ +0...0.100 read(4, ..., 2000) = 2000
+ +.1 < P. 1:2001(2000) ack 1 win 257
+ +0 > . 1:1(0) ack 2001
+
+ +.1...0.200 read(4, ..., 2000) = 2000
+ +.1 < P. 2001:4001(2000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001
+
+ +.1 < P. 4001:6001(2000) ack 1 win 257
+ +0 > . 1:1(0) ack 6001
+ +0...0.000 read(4, ..., 1000) = 1000
+ +0...0.000 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt
new file mode 100644
index 000000000000..cec5a0725d95
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking write.
+--tolerance_usecs=10000
+
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_min_tso_segs=10
+`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 50000 <mss 1000,nop,wscale 0>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 50000
+ +0 accept(3, ..., ...) = 4
+
+// Kernel doubles our value -> sk->sk_sndbuf is set to 42000
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [21000], 4) = 0
+ +0 getsockopt(4, SOL_SOCKET, SO_SNDBUF, [42000], [4]) = 0
+
+// A write of 60000 does not block.
+ +0...0.300 write(4, ..., 61000) = 61000 // this write() blocks
+
+ +.1 < . 1:1(0) ack 10001 win 50000
+
+ +.1 < . 1:1(0) ack 30001 win 50000
+
+// This ACK should wakeup the write(). An ACK of 35001 does not.
+ +.1 < . 1:1(0) ack 36001 win 50000
+
+// Reset to sysctls defaults.
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt
new file mode 100644
index 000000000000..8514d6bdbb6d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test basic connection teardown where local process closes first:
+// the local process calls close() first, so we send a FIN, and receive an ACK.
+// Then we receive a FIN and ACK it.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +.01...0.011 connect(3, ..., ...) = 0
+ +0 > S 0:0(0) <...>
+ +0 < S. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
+
+ +0 write(3, ..., 1000) = 1000
+ +0 > P. 1:1001(1000) ack 1
+ +0 < . 1:1(0) ack 1001 win 257
+
+ +0 close(3) = 0
+ +0 > F. 1001:1001(0) ack 1
+ +0 < . 1:1(0) ack 1002 win 257
+
+ +0 < F. 1:1(0) ack 1002 win 257
+ +0 > . 1002:1002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt
new file mode 100644
index 000000000000..04103134bd99
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test to make sure no RST is being sent when close()
+// is called on a socket with SYN_SENT state.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <...>
+
+// Application decideds to close the socket in SYN_SENT state
+// Make sure no RST is sent after close().
+ +0 close(3) = 0
+
+// Receive syn-ack to trigger the send side packet examination:
+// If a RESET were sent right after close(), it would have failed with
+// a mismatched timestamp.
+ +.1 < S. 0:0(0) ack 1 win 32000 <mss 1460,nop,wscale 7>
+ +0 > R 1:1(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt
new file mode 100644
index 000000000000..5f3a2914213a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Verify behavior for the sequence: remote side sends FIN, then we close().
+// Since the remote side (client) closes first, we test our LAST_ACK code path.
+
+`./defaults.sh`
+
+// Initialize a server socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+// Client closes first.
+ +.01 < F. 1:1(0) ack 1 win 257
+ +0 > . 1:1(0) ack 2
+
+// App notices that client closed.
+ +0 read(4, ..., 1000) = 0
+
+// Then we close.
+ +.01 close(4) = 0
+ +0 > F. 1:1(0) ack 2
+
+// Client ACKs our FIN.
+ +.01 < . 2:2(0) ack 2 win 257
+
+// Verify that we send RST in response to any incoming segments
+// (because the kernel no longer has any record of this socket).
+ +.01 < . 2:2(0) ack 2 win 257
+ +0 > R 2:2(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt
new file mode 100644
index 000000000000..eef01d5f1118
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +.1 < . 1:1(0) ack 1 win 32792
+
+
+ +0 accept(3, ..., ...) = 4
+ +0 < . 1:1001(1000) ack 1 win 32792
+ +0 > . 1:1(0) ack 1001
+ +0 read(4, ..., 1000) = 1000
+
+// resend the payload + a FIN
+ +0 < F. 1:1001(1000) ack 1 win 32792
+// Why do we have a delay and no dsack ?
+ +0~+.04 > . 1:1(0) ack 1002
+
+ +0 close(4) = 0
+
+// According to RFC 2525, section 2.17
+// we should _not_ send an RST here, because there was no data to consume.
+ +0 > F. 1:1(0) ack 1002
diff --git a/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
new file mode 100644
index 000000000000..c790d0af635e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test various DSACK (RFC 2883) behaviors.
+
+--mss=1000
+
+`./defaults.sh`
+
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+// First SACK range.
+ +0 < P. 1001:2001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop, nop, sack 1001:2001>
+
+// Check SACK coalescing (contiguous sequence).
+ +0 < P. 2001:3001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 1001:3001>
+
+// Check we have two SACK ranges for non contiguous sequences.
+ +0 < P. 4001:5001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 4001:5001 1001:3001>
+
+// Three ranges.
+ +0 < P. 7001:8001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 7001:8001 4001:5001 1001:3001>
+
+// DSACK (1001:3001) + SACK (6001:7001)
+ +0 < P. 1:6001(6000) ack 1 win 1024
+ +0 > . 1:1(0) ack 6001 <nop,nop,sack 1001:3001 7001:8001>
+
+// DSACK (7001:8001)
+ +0 < P. 6001:8001(2000) ack 1 win 1024
+ +0 > . 1:1(0) ack 8001 <nop,nop,sack 7001:8001>
+
+// DSACK for an older segment.
+ +0 < P. 1:1001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 8001 <nop,nop,sack 1:1001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt
new file mode 100644
index 000000000000..643baf3267cf
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test ECN: verify that Linux TCP ECN sending code uses ECT0 (not ECT1).
+//
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1 # fully enabled
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+
+// ECN handshake: send EW flags in SYN packet, E flag in SYN-ACK response
++.002 ... 0.004 connect(4, ..., ...) = 0
+
+ +0 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.002 < SE. 0:0(0) ack 1 win 32767 <mss 1000,nop,wscale 6,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
+
+// Write 1 MSS.
++.002 write(4, ..., 1000) = 1000
+// Send 1 MSS with ect0.
+ +0 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt
new file mode 100644
index 000000000000..f95b9b3c9fa1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk. The large chunk itself should be packetized as
+// usual.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+ +0 write(4, ..., 10400) = 10400
+
+// Write another 10040B chunk with no coalescing options.
+ +0 send(4, ..., 10400, MSG_EOR) = 10400
+
+// Write a 2KB chunk. This chunk should not be appended to the packets created
+// the previous chunk.
+ +0 write(4, ..., 2000) = 2000
+
+ +0 > P. 1:10001(10000) ack 1
++.001 < . 1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+ +0 > P. 10001:20801(10800) ack 1
++.001 < . 1:1(0) ack 20801 win 514
+// This 2KB packet should be sent alone.
+ +0 > P. 20801:22801(2000) ack 1
++.001 < . 1:1(0) ack 22801 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt
new file mode 100644
index 000000000000..2ff66075288e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk. Also, when packets are retransmitted, they
+// will not be coalesce into the same skb.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+ +0 write(4, ..., 10400) = 10400
+
+// Write 10 400B chunks with no coalescing options.
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+// This chunk should not be appended to the skbs created for the previous chunk.
+ +0 write(4, ..., 10000) = 10000
+
+ +0 > P. 1:10001(10000) ack 1
++.001 < . 1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+ +0 > P. 10001:10801(800) ack 1
+// The 9 remaining 400B chunks should be sent as individual packets.
+ +0 > P. 10801:11201(400) ack 1
+ +0 > P. 11201:11601(400) ack 1
+ +0 > P. 11601:12001(400) ack 1
+ +0 > P. 12001:12401(400) ack 1
+ +0 > P. 12401:12801(400) ack 1
+ +0 > P. 12801:13201(400) ack 1
+ +0 > P. 13201:13601(400) ack 1
+ +0 > P. 13601:14001(400) ack 1
+ +0 > P. 14001:14401(400) ack 1
+// The last 10KB chunk should be sent separately.
+ +0 > P. 14401:24401(10000) ack 1
+
++.001 < . 1:1(0) ack 10401 win 514
++.001 < . 1:1(0) ack 10801 win 514
++.001 < . 1:1(0) ack 11201 win 514
++.001 < . 1:1(0) ack 11601 win 514
++.001 < . 1:1(0) ack 12001 win 514 <sack 13201:14401,nop,nop>
+// TCP should fill the hole but no coalescing should happen, and all
+// retransmissions should be sent out as individual packets.
+
+// Note : This is timeout based retransmit.
+// Do not put +0 here or flakes will come back.
++.004~+.008 > P. 12001:12401(400) ack 1
+
++.001 < . 1:1(0) ack 12401 win 514 <sack 13201:14401,nop,nop>
+ +0 > P. 12401:12801(400) ack 1
+ +0 > P. 12801:13201(400) ack 1
++.001 < . 1:1(0) ack 12801 win 514 <sack 13201:14401,nop,nop>
++.001 < . 1:1(0) ack 14401 win 514
++.001 < . 1:1(0) ack 24401 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt
new file mode 100644
index 000000000000..77039c5aac39
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+ +0 write(4, ..., 10400) = 10400
+
+// Write a 400B chunk with no coalescing options.
+ +0 send(4, ..., 400, MSG_EOR) = 400
+
+// This chunk should not be appended to the skbs created for the previous chunk.
+ +0 write(4, ..., 10000) = 10000
+
+ +0 > P. 1:10001(10000) ack 1
++.001 < . 1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+ +0 > P. 10001:10801(800) ack 1
+ +0 > P. 10801:20801(10000) ack 1
++.001 < . 1:1(0) ack 10401 win 514
++.001 < . 1:1(0) ack 10801 win 514
++.001 < . 1:1(0) ack 20801 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt
new file mode 100644
index 000000000000..dd5a06250595
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk even though we have 10 back-to-back small
+// writes.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+ +0 write(4, ..., 10400) = 10400
+
+// Write 10 400B chunks with no coalescing options.
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+// This chunk should not be appended to the skbs created for the previous chunk.
+ +0 write(4, ..., 10000) = 10000
+
+ +0 > P. 1:10001(10000) ack 1
++.001 < . 1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+ +0 > P. 10001:10801(800) ack 1
+// The 9 remaining 400B chunks should be sent as individual packets.
+ +0 > P. 10801:11201(400) ack 1
+ +0 > P. 11201:11601(400) ack 1
+ +0 > P. 11601:12001(400) ack 1
+ +0 > P. 12001:12401(400) ack 1
+ +0 > P. 12401:12801(400) ack 1
+ +0 > P. 12801:13201(400) ack 1
+ +0 > P. 13201:13601(400) ack 1
+ +0 > P. 13601:14001(400) ack 1
+ +0 > P. 14001:14401(400) ack 1
+// The last 10KB chunk should be sent separately.
+ +0 > P. 14401:24401(10000) ack 1
+
++.001 < . 1:1(0) ack 10401 win 514
++.001 < . 1:1(0) ack 10801 win 514
++.001 < . 1:1(0) ack 11201 win 514
++.001 < . 1:1(0) ack 11601 win 514
++.001 < . 1:1(0) ack 12001 win 514
++.001 < . 1:1(0) ack 12401 win 514
++.001 < . 1:1(0) ack 12801 win 514
++.001 < . 1:1(0) ack 13201 win 514
++.001 < . 1:1(0) ack 13601 win 514
++.001 < . 1:1(0) ack 14001 win 514
++.001 < . 1:1(0) ack 14401 win 514
++.001 < . 1:1(0) ack 24401 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt
new file mode 100644
index 000000000000..0d3c8077e830
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation.
+// In this variant we test a simple case where in-flight == ssthresh
+// all the way through recovery, so during fast recovery we send one segment
+// for each segment SACKed/ACKed.
+
+// Set up config.
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// RTT 100ms
+ +.1 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Send 10 data segments.
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// Lost packet 1:1001.
+ +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop>
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop>
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:4001,nop,nop>
+// Enter fast recovery.
+ +0 > . 1:1001(1000) ack 1
+ +.01 %{
+assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state
+assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd
+assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh
+}%
+
+// Write some more, which we will send 1 MSS at a time,
+// as in-flight segments are SACKed or ACKed.
+ +.01 write(4, ..., 7000) = 7000
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:5001,nop,nop>
+ +0 > . 10001:11001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:6001,nop,nop>
+ +0 > . 11001:12001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:7001,nop,nop>
+ +0 > . 12001:13001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:8001,nop,nop>
+ +0 > . 13001:14001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:9001,nop,nop>
+ +0 > . 14001:15001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:10001,nop,nop>
+ +0 > . 15001:16001(1000) ack 1
+
+ +.02 < . 1:1(0) ack 10001 win 320
+ +0 > P. 16001:17001(1000) ack 1
+// Leave fast recovery.
+ +.01 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd
+assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh
+}%
+
+ +.03 < . 1:1(0) ack 12001 win 320
+ +.02 < . 1:1(0) ack 14001 win 320
+ +.02 < . 1:1(0) ack 16001 win 320
+ +.02 < . 1:1(0) ack 17001 win 320
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt
new file mode 100644
index 000000000000..7842a10b6967
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation. The sender sends 20 packets. Packet
+// 1 to 4, and 11 to 16 are dropped.
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+ +.01 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Write 20 data segments.
+ +0 write(4, ..., 20000) = 20000
+ +0 > P. 1:10001(10000) ack 1
+
+// Receive first DUPACK, entering PRR part
+ +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop>
+ +0 > . 10001:11001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop>
+ +0 > . 11001:12001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop>
+ +0 > . 1:1001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop>
+ +0 > . 2001:3001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop>
+ +0 > . 3001:4001(1000) ack 1
+// Enter PRR CRB
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:11001,nop,nop>
+ +0 > . 12001:13001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:12001,nop,nop>
+ +0 > . 13001:14001(1000) ack 1
+// Enter PRR slow start
+ +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:12001,nop,nop>
+ +0 > P. 14001:16001(2000) ack 1
++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:12001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 > . 16001:17001(1000) ack 1
+// inflight reaches ssthresh, goes into packet conservation mode
++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:13001,nop,nop>
+ +0 > . 17001:18001(1000) ack 1
++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:14001,nop,nop>
+ +0 > . 18001:19001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt
new file mode 100644
index 000000000000..b66d7644c3b6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation. The sender sends 20 packets. Packet
+// 1 to 4 are lost. The sender writes another 10 packets.
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+ +.01 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Send 20 data segments.
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// Lost packet 1,2,3,4
+ +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop>
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop>
+ +0 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop>
+ +0 > . 1:1001(1000) ack 1
+ +0 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop>
+ +0 > . 2001:3001(1000) ack 1
+ +0 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop>
+ +0 > . 3001:4001(1000) ack 1
+
+// Receiver ACKs all data.
+ +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:10001,nop,nop>
+ +0 < . 1:1(0) ack 2001 win 320 <sack 4001:10001,nop,nop>
+ +0 < . 1:1(0) ack 3001 win 320 <sack 4001:10001,nop,nop>
+ +0 < . 1:1(0) ack 10001 win 320
+
+// Writes another 10 packets, which the ssthresh*mss amount
+// should be sent right away
+ +.01 write(4, ..., 10000) = 10000
+ +0 > . 10001:17001(7000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt
new file mode 100644
index 000000000000..8e87bfecabb5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation.
+// In this variant we verify that the sender uses SACK info on an ACK
+// below snd_una.
+
+// Set up config.
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 8>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// RTT 10ms
+ +.01 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Send 10 data segments.
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// Lost packet 1:1001,4001:5001,7001:8001.
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop>
+ +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop>
+ +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001 8001:9001,nop,nop>
+ +0 > . 1:1001(1000) ack 1
+
++.012 < . 1:1(0) ack 4001 win 320 <sack 8001:9001,nop,nop>
+ +0 > . 4001:7001(3000) ack 1
+
+ +0 write(4, ..., 10000) = 10000
+
+// The following ACK was reordered - delayed so that it arrives with
+// an ACK field below snd_una. Here we check that the newly-SACKed
+// 2MSS at 5001:7001 cause us to send out 2 more MSS.
++.002 < . 1:1(0) ack 3001 win 320 <sack 5001:7001,nop,nop>
+ +0 > . 7001:8001(1000) ack 1
+ +0 > . 10001:11001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt
new file mode 100644
index 000000000000..32aff9bc4052
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test TFO_SERVER_COOKIE_NOT_REQD flag on receiving
+// SYN with data but without Fast Open cookie option.
+
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x202`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Since TFO_SERVER_COOKIE_NOT_REQD, a TFO socket will be created with
+// the data accepted.
+ +0 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 read(4, ..., 1024) = 1000
+
+// Data After SYN will be accepted too.
+ +0 < . 1001:2001(1000) ack 1 win 5840
+ +0 > . 1:1(0) ack 2001
+
+// Should change the implementation later to set the SYN flag as well.
+ +0 read(4, ..., 1024) = 1000
+ +0 write(4, ..., 1000) = 1000
+ +0 > P. 1:1001(1000) ack 2001
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt
new file mode 100644
index 000000000000..649997a58099
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test TFO_SERVER_WO_SOCKOPT1 without setsockopt(TCP_FASTOPEN)
+
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x402`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+ +0 read(4, ..., 512) = 10
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt
new file mode 100644
index 000000000000..4a00e0d994f2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Server w/o TCP_FASTOPEN socket option
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,FO TFO_COOKIE>
+
+// Data is ignored since TCP_FASTOPEN is not set on the listener
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable)
+
+// The above should block until ack comes in below.
+ +0 < . 1:31(30) ack 1 win 5840
+ +0 accept(3, ..., ...) = 4
+
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+ +0 read(4, ..., 512) = 30
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt
new file mode 100644
index 000000000000..345ed26ff7f8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test that TFO-enabled server would not respond SYN-ACK with any TFO option
+// when receiving a pure SYN-data. It should respond a pure SYN-ack.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6>
+ +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 100
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+ +0 close(3) = 0
+
+// Test ECN-setup SYN with ECN disabled because this has happened in reality
+ +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6>
+ +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 100
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+ +0 close(3) = 0
+
+// Test ECN-setup SYN w/ ECN enabled
+ +0 `sysctl -q net.ipv4.tcp_ecn=2`
+ +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6>
+ +0 > SE. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 100
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+ +0 close(3) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt
new file mode 100644
index 000000000000..98e6f84497cd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test TFO server with SYN that has TFO cookie and data.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+ +0 read(4, ..., 512) = 10
+ +0 write(4, ..., 100) = 100
+ +0 > P. 1:101(100) ack 11
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt
new file mode 100644
index 000000000000..95b1047ffdd5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test zero-payload packet w/ valid TFO cookie - a TFO socket will
+// still be created and accepted but read() will not return until a
+// later pkt with 10 byte.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+
+// A TFO socket is created and is writable.
+ +0 write(4, ..., 100) = 100
+ +0 > P. 1:101(100) ack 1
+ +0...0.300 read(4, ..., 512) = 10
+ +.3 < P. 1:11(10) ack 1 win 5840
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt
new file mode 100644
index 000000000000..f75efd51ed0c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// A reproducer case for a TFO SYNACK RTO undo bug in:
+// 794200d66273 ("tcp: undo cwnd on Fast Open spurious SYNACK retransmit")
+// This sequence that tickles this bug is:
+// - Fast Open server receives TFO SYN with data, sends SYNACK
+// - (client receives SYNACK and sends ACK, but ACK is lost)
+// - server app sends some data packets
+// - (N of the first data packets are lost)
+// - server receives client ACK that has a TS ECR matching first SYNACK,
+// and also SACKs suggesting the first N data packets were lost
+// - server performs undo of SYNACK RTO, then immediately enters recovery
+// - buggy behavior in 794200d66273 then performed an undo that caused
+// the connection to be in a bad state, in CA_Open with retrans_out != 0
+
+// Check that outbound TS Val ticks are as we would expect with 1000 usec per
+// timestamp tick:
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:1000(1000) win 65535 <mss 1012,sackOK,TS val 1000 ecr 0,wscale 7,nop,nop,nop,FO TFO_COOKIE>
+ +0 > S. 0:0(0) ack 1001 <mss 1460,sackOK,TS val 2000 ecr 1000,nop,wscale 8>
+ +0 accept(3, ..., ...) = 4
+
+// Application writes more data
+ +.010 write(4, ..., 10000) = 10000
+ +0 > P. 1:5001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000>
+ +0 > P. 5001:10001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000>
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +0 < . 1001:1001(0) ack 1 win 257 <TS val 1010 ecr 2000,sack 2001:5001>
+ +0 > P. 1:2001(2000) ack 1001 <nop,nop,TS val 2010 ecr 1010>
+ +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }%
+ +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }%
+
+ +0 < . 1001:1001(0) ack 1 win 257 <TS val 1011 ecr 2000,sack 2001:6001>
+ +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }%
+ +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt
new file mode 100644
index 000000000000..c3cb0e8bdcf8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Test the Experimental Option
+//
+// SYN w/ FOEXP w/o cookie must generates SYN+ACK w/ FOEXP
+// w/ a valid cookie, and the cookie must be the same one
+// with one generated by IANA FO
+
+`./defaults.sh`
+
+// Request a TFO cookie by Experimental Option
+// This must generate the same TFO_COOKIE
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,FOEXP TFO_COOKIE>
+
+ +0 close(3) = 0
+
+// Test if FOEXP with a valid cookie creates a TFO socket
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP TFO_COOKIE>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+ +0 read(4, ..., 512) = 10
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt
new file mode 100644
index 000000000000..dc09f8d9a381
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a FIN pkt with the ACK bit to a TFO socket.
+// The socket will go to TCP_CLOSE_WAIT state and data can be
+// read until the socket is closed, at which time a FIN will be sent.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// FIN is acked and the socket goes to TCP_CLOSE_WAIT state
+// in tcp_fin() called from tcp_data_queue().
+ +0 < F. 11:11(0) ack 1 win 32792
+ +0 > . 1:1(0) ack 12
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 %{ assert tcpi_state == TCP_CLOSE_WAIT, tcpi_state }%
+
+ +0 read(4, ..., 512) = 10
+ +0 close(4) = 0
+ +0 > F. 1:1(0) ack 12
+ * > F. 1:1(0) ack 12
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt
new file mode 100644
index 000000000000..d5543672e2bd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send an ICMP host_unreachable pkt to a pending SYN_RECV req.
+//
+// If it's a TFO req, the ICMP error will cause it to switch
+// to TCP_CLOSE state but remains in the acceptor queue.
+
+--ip_version=ipv4
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// Out-of-window icmp is ignored but accounted.
+ +0 `nstat > /dev/null`
+ +0 < icmp unreachable [5000:6000(1000)]
+ +0 `nstat | grep TcpExtOutOfWindowIcmps > /dev/null`
+
+// Valid ICMP unreach.
+ +0 < icmp unreachable host_unreachable [0:10(10)]
+
+// Unlike the non-TFO case, the req is still there to be accepted.
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+// tcp_done_with_error() in tcp_v4_err() sets sk->sk_state
+// to TCP_CLOSE
+ +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+// The 1st read will succeed and return the data in SYN
+ +0 read(4, ..., 512) = 10
+
+// The 2nd read will fail.
+ +0 read(4, ..., 512) = -1 EHOSTUNREACH (No route to host)
+
+// But is no longer writable because it's in TCP_CLOSE state.
+ +0 write(4, ..., 100) = -1 EPIPE (Broken Pipe)
+
+// inbound pkt will trigger RST because the socket has been moved
+// off the TCP hash tables.
+ +0 < . 1:1(0) ack 1 win 32792
+ +0 > R 1:1(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt
new file mode 100644
index 000000000000..040d5547ed80
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a RST to a TFO socket after it has been accepted.
+//
+// First read() will return all the data and this is consistent
+// with the non-TFO case. Second read will return -1
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }%
+
+// 1st read will return the data from SYN.
+// tcp_reset() sets sk->sk_err to ECONNRESET for SYN_RECV.
+ +0 < R. 11:11(0) win 32792
+ +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+// This one w/o ACK bit will cause the same effect.
+// +0 < R 11:11(0) win 32792
+// See Step 2 in tcp_validate_incoming().
+
+// found_ok_skb in tcp_recvmsg_locked()
+ +0 read(4, ..., 512) = 10
+
+// !copied && sk->sk_err -> sock_error(sk)
+ +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer)
+ +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt
new file mode 100644
index 000000000000..7f9de6c66cbd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a RST to a TFO socket before it is accepted.
+//
+// The socket won't go away and after it's accepted the data
+// in the SYN pkt can still be read. But that's about all that
+// the acceptor can do with the socket.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// 1st read will return the data from SYN.
+ +0 < R. 11:11(0) win 257
+
+// This one w/o ACK bit will cause the same effect.
+// +0 < R 11:11(0) win 257
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+ +0 read(4, ..., 512) = 10
+ +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer)
+ +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt
new file mode 100644
index 000000000000..548a87701b5d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a RST to a TFO socket after it is accepted.
+//
+// The socket will change to TCP_CLOSE state with pending data so
+// write() will fail. Pending data can be still be read and close()
+// won't trigger RST if data is not read
+//
+// 565b7b2d2e63 ("tcp: do not send reset to already closed sockets")
+// https://lore.kernel.org/netdev/4C1A2502.1030502@openvz.org/
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop, FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }%
+
+// tcp_done() sets sk->sk_state to TCP_CLOSE and clears tp->fastopen_rsk
+ +0 < R. 11:11(0) win 32792
+ +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+ +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer)
+ +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt
new file mode 100644
index 000000000000..20090bf77655
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a RST to a fully established socket with pending data before
+// it is accepted.
+//
+// The socket with pending data won't go away and can still be accepted
+// with data read. But it will be in TCP_CLOSE state.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Invalid cookie, so accept() fails.
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO aaaaaaaaaaaaaaaa,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK, FO TFO_COOKIE,nop,nop>
+
+ +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable)
+
+// Complete 3WHS and send data and RST
+ +0 < . 1:1(0) ack 1 win 32792
+ +0 < . 1:11(10) ack 1 win 32792
+ +0 < R. 11:11(0) win 32792
+
+// A valid reset won't make the fully-established socket go away.
+// It's just that the acceptor will get a dead, unusable socket
+// in TCP_CLOSE state.
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+ +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+ +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer)
+ +0 read(4, ..., 512) = 10
+ +0 read(4, ..., 512) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt
new file mode 100644
index 000000000000..9f52d7de3436
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Test the server cookie is generated by aes64 encoding of remote and local
+// IP addresses with a master key specified via sockopt TCP_FASTOPEN_KEY
+//
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen_key=00000000-00000000-00000000-00000000`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+// Set a key of a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 (big endian).
+// This would produce a cookie of TFO_COOKIE like many other
+// tests (which the same key but set via sysctl).
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY,
+ "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Request a valid cookie TFO_COOKIE
+ +0 < S 1428932:1428942(10) win 10000 <mss 1012,nop,nop,FO,sackOK,TS val 1 ecr 0,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1428933 <mss 1460,sackOK,TS val 10000 ecr 1,nop,wscale 8,FO TFO_COOKIE,nop,nop>
+ +0 < . 1:1(0) ack 1 win 257 <nop,nop,TS val 2 ecr 10000>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+
+ +0 close(4) = 0
+ +0 > F. 1:1(0) ack 1 <nop,nop,TS val 10001 ecr 2>
+ +0 < F. 1:1(0) ack 2 win 257 <nop,nop,TS val 3 ecr 10001>
+ +0 > . 2:2(0) ack 2 <nop,nop,TS val 10002 ecr 3>
+
+ +0 close(3) = 0
+
+// Restart the listener
+ +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Test setting the key in the listen state, and produces an identical cookie
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY,
+ "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0
+
+ +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7>
+ +0 > S. 0:0(0) ack 6815001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 < . 1001:1001(0) ack 1 win 257 <nop,nop,TS val 12 ecr 10000>
+ +0 read(4, ..., 8192) = 1000
+
+ +0 close(4) = 0
+ +0 > F. 1:1(0) ack 1001 <nop,nop,TS val 10101 ecr 12>
+ +0 < F. 1001:1001(0) ack 2 win 257 <nop,nop,TS val 112 ecr 10101>
+ +0 > . 2:2(0) ack 1002 <nop,nop,TS val 10102 ecr 112>
+
+ +0 close(3) = 0
+
+// Restart the listener
+ +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Test invalid key length (must be 16 bytes)
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 0) = -1 (Invalid Argument)
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 3) = -1 (Invalid Argument)
+
+// Previous cookie won't be accepted b/c this listener uses the global key (0-0-0-0)
+ +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7>
+ +0 > S. 0:0(0) ack 6814001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8,FO TFO_COOKIE_ZERO,nop,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt
new file mode 100644
index 000000000000..e82e06da44c9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Close a listener socket with pending TFO child.
+// This will trigger RST pkt to go out.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// RST pkt is generated for each not-yet-accepted TFO child.
+// inet_csk_listen_stop() -> inet_child_forget() -> tcp_disconnect()
+// -> tcp_need_reset() is true for SYN_RECV
+ +0 close(3) = 0
+ +0 > R. 1:1(0) ack 11
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt
new file mode 100644
index 000000000000..2a148bb14cbf
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,nop,nop,sackOK,nop,nop,FO TFO_COOKIE>
+ +0 > S. 0:0(0) ack 11 win 65535 <mss 1460,nop,nop,sackOK>
+
+// sk->sk_state is TCP_SYN_RECV
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }%
+
+// tcp_disconnect() sets sk->sk_state to TCP_CLOSE
+ +0 connect(4, AF_UNSPEC, ...) = 0
+ +0 > R. 1:1(0) ack 11 win 65535
+ +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+// connect() sets sk->sk_state to TCP_SYN_SENT
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation is now in progress)
+ +0 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 %{ assert tcpi_state == TCP_SYN_SENT, tcpi_state }%
+
+// tp->fastopen_rsk must be NULL
+ +1 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt
new file mode 100644
index 000000000000..09fb63f78a0e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Close a TFO socket with unread data.
+// This will trigger a RST pkt.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }%
+
+// data_was_unread == true in __tcp_close()
+ +0 close(4) = 0
+ +0 > R. 1:1(0) ack 11
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
new file mode 100644
index 000000000000..e13f0eee9795
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP_INQ and TCP_CM_INQ on the client side.
+
+--mss=1000
+
+`./defaults.sh
+`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Connect to the server and enable TCP_INQ.
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 setsockopt(3, SOL_TCP, TCP_INQ, [1], 4) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 5792 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 7>
+ +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700>
+
+// Now we have 10K of data ready on the socket.
+ +0 < . 1:10001(10000) ack 1 win 514
+ +0 > . 1:1(0) ack 10001 <nop,nop,TS val 200 ecr 700>
+
+// We read 1K and we should have 9K ready to read.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{..., 1000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=9000}]}, 0) = 1000
+// We read 9K and we should have no further data ready to read.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{..., 9000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=0}]}, 0) = 9000
+
+// Server sends more data and closes the connections.
+ +0 < F. 10001:20001(10000) ack 1 win 514
+ +0 > . 1:1(0) ack 20002 <nop,nop,TS val 200 ecr 700>
+
+// We read 10K and we should have one "fake" byte because the connection is
+// closed.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{..., 10000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=1}]}, 0) = 10000
+// Now, receive EOF.
+ +0 read(3, ..., 2000) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
new file mode 100644
index 000000000000..14dd5f813d50
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP_INQ and TCP_CM_INQ on the server side.
+
+--mss=1000
+
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+// Accept the connection and enable TCP_INQ.
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_TCP, TCP_INQ, [1], 4) = 0
+
+// Now we have 10K of data ready on the socket.
+ +0 < . 1:10001(10000) ack 1 win 514
+ +0 > . 1:1(0) ack 10001
+
+// We read 2K and we should have 8K ready to read.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 2000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=8000}]}, 0) = 2000
+// We read 8K and we should have no further data ready to read.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 8000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=0}]}, 0) = 8000
+// Client sends more data and closes the connections.
+ +0 < F. 10001:20001(10000) ack 1 win 514
+ +0 > . 1:1(0) ack 20002
+
+// We read 10K and we should have one "fake" byte because the connection is
+// closed.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 10000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=1}]}, 0) = 10000
+// Now, receive error.
+ +0 read(3, ..., 2000) = -1 ENOTCONN (Transport endpoint is not connected)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt
new file mode 100644
index 000000000000..96b01eb5b7a4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test RFC 3042 "Limited Transmit": "sending a new data segment in
+// response to each of the first two duplicate acknowledgments that
+// arrive at the sender".
+// This variation tests a receiver that doesn't support SACK.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Write some data, and send the initial congestion window.
+ +0 write(4, ..., 15000) = 15000
+ +0 > P. 1:10001(10000) ack 1
+
+// Limited transmit: on first dupack, send a new data segment.
+ +.11 < . 1:1(0) ack 1 win 320
+ +0 > . 10001:11001(1000) ack 1
+
+// Limited transmit: on second dupack, send a new data segment.
+ +.01 < . 1:1(0) ack 1 win 320
+ +0 > . 11001:12001(1000) ack 1
+
+// It turned out to be reordering, not loss.
+// We have one packet newly acked (1001:3001 were DUP-ACK'd)
+// So we revert state back to Open. Slow start cwnd from 10 to 11
+// and send 11 - 9 = 2 packets
+ +.01 < . 1:1(0) ack 3001 win 320
+ +0 > P. 12001:14001(2000) ack 1
+
+ +.02 < . 1:1(0) ack 5001 win 320
+ +0 > P. 14001:15001(1000) ack 1
+
+// Client gradually ACKs all data.
+ +.02 < . 1:1(0) ack 7001 win 320
+ +.02 < . 1:1(0) ack 9001 win 320
+ +.02 < . 1:1(0) ack 11001 win 320
+ +.02 < . 1:1(0) ack 13001 win 320
+ +.02 < . 1:1(0) ack 15001 win 320
+
+// Clean up.
+ +.17 close(4) = 0
+ +0 > F. 15001:15001(0) ack 1
+ +.1 < F. 1:1(0) ack 15002 win 257
+ +0 > . 15002:15002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt
new file mode 100644
index 000000000000..642da51ec3a4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test RFC 3042 "Limited Transmit": "sending a new data segment in
+// response to each of the first two duplicate acknowledgments that
+// arrive at the sender".
+// This variation tests a receiver that supports SACK.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Write some data, and send the initial congestion window.
+ +0 write(4, ..., 15000) = 15000
+ +0 > P. 1:10001(10000) ack 1
+
+// Limited transmit: on first dupack, send a new data segment.
+ +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop>
+ +0 > . 10001:11001(1000) ack 1
+
+// Limited transmit: on second dupack, send a new data segment.
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop>
+ +0 > . 11001:12001(1000) ack 1
+
+// It turned out to be reordering, not loss.
+ +.01 < . 1:1(0) ack 3001 win 320
+ +0 > P. 12001:14001(2000) ack 1
+
+ +.02 < . 1:1(0) ack 5001 win 320
+ +0 > P. 14001:15001(1000) ack 1
+
+// Client gradually ACKs all data.
+ +.02 < . 1:1(0) ack 7001 win 320
+ +.02 < . 1:1(0) ack 9001 win 320
+ +.02 < . 1:1(0) ack 11001 win 320
+ +.02 < . 1:1(0) ack 13001 win 320
+ +.02 < . 1:1(0) ack 15001 win 320
+
+// Clean up.
+ +.17 close(4) = 0
+ +0 > F. 15001:15001(0) ack 1
+ +.1 < F. 1:1(0) ack 15002 win 257
+ +0 > . 15002:15002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt
new file mode 100644
index 000000000000..25dfef95d3f8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test what happens when client does not provide MD5 on SYN,
+// but then does on the ACK that completes the three-way handshake.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// Ooh, weird: client provides MD5 option on the ACK:
+ +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop>
+ +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop>
+
+// The TCP listener refcount should be 2, but on buggy kernels it can be 0:
+ +0 `grep " 0A " /proc/net/tcp /proc/net/tcp6 | grep ":1F90"`
+
+// Now here comes the legit ACK:
+ +.01 < . 1:1(0) ack 1 win 514
+
+// Make sure the connection is OK:
+ +0 accept(3, ..., ...) = 4
+
+ +.01 write(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt
new file mode 100644
index 000000000000..7adae7a9ef4a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// This is a test inspired by an Android client app using SSL. This
+// test verifies using TCP_NODELAY would save application latency
+// (Perhaps even better with TCP_NAGLE).
+//
+`./defaults.sh
+ethtool -K tun0 tso off gso off
+./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+
+ +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < S. 0:0(0) ack 1 win 5792 <mss 974,nop,nop,sackOK,nop,wscale 7>
+ +0 > . 1:1(0) ack 1
+
+// SSL handshake (resumed session)
+ +0 write(4, ..., 517) = 517
+ +0 > P. 1:518(517) ack 1
+ +.1 < . 1:1(0) ack 518 win 229
+
+ +0 < P. 1:144(143) ack 1 win 229
+ +0 > . 518:518(0) ack 144
+ +0 read(4, ..., 1000) = 143
+
+// Application POST header (51B) and body (2002B)
+ +0 write(4, ..., 51) = 51
+ +0 > P. 518:569(51) ack 144
+ +.03 write(4, ..., 2002) = 2002
+ +0 > . 569:1543(974) ack 144
+ +0 > P. 1543:2517(974) ack 144
+// Without disabling Nagle, this packet will not happen until the remote ACK.
+ +0 > P. 2517:2571(54) ack 144
+
+ +.1 < . 1:1(0) ack 2571 win 229
+
+// Reset sysctls
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt
new file mode 100644
index 000000000000..fa9c01813996
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test the MSG_MORE flag will correctly corks the tiny writes
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+// Disable Nagle by default on this socket.
+ +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+
+// Test the basic case: MSG_MORE overwrites TCP_NODELAY and enables Nagle.
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 40}], msg_flags=0}, MSG_MORE) = 40
+ +.21~+.215 > P. 1:41(40) ack 1
+ +.01 < . 1:1(0) ack 41 win 257
+
+// Test unsetting MSG_MORE releases the packet
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 100}], msg_flags=0}, MSG_MORE) = 100
++.005 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 160}], msg_flags=0}, MSG_MORE) = 160
+ +.01 sendmsg(4, {msg_name(...)=...,
+ msg_iov(3)=[{..., 100}, {..., 200}, {..., 195}],
+ msg_flags=0}, MSG_MORE) = 495
++.008 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 5}], msg_flags=0}, 0) = 5
+ +0 > P. 41:801(760) ack 1
+ +.02 < . 1:1(0) ack 801 win 257
+
+
+// Test >MSS write will unleash MSS packets but hold on the remaining data.
+ +.1 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 3100}], msg_flags=0}, MSG_MORE) = 3100
+ +0 > . 801:3801(3000) ack 1
++.003 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 50}], msg_flags=0}, MSG_MORE) = 50
+
+ +.01 < . 1:1(0) ack 2801 win 257
+// Err... we relase the remaining right after the ACK? note that PUSH is reset
+ +0 > . 3801:3951(150) ack 1
+
+// Test we'll hold on the subsequent writes when inflight (3801:3951) > 0
++.001 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 1}], msg_flags=0}, MSG_MORE) = 1
++.002 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 2}], msg_flags=0}, MSG_MORE) = 2
++.003 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 3}], msg_flags=0}, MSG_MORE) = 3
++.004 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 4}], msg_flags=0}, MSG_MORE) = 4
+ +.02 < . 1:1(0) ack 3951 win 257
+ +0 > . 3951:3961(10) ack 1
+ +.02 < . 1:1(0) ack 3961 win 257
+
+
+// Test the case a MSG_MORE send followed by a write flushes the data
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 20}], msg_flags=0}, MSG_MORE) = 20
+ +.05 write(4, ..., 20) = 20
+ +0 > P. 3961:4001(40) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt
new file mode 100644
index 000000000000..0ddec5f7dc1a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP_CORK and TCP_NODELAY sockopt behavior
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+// Set TCP_CORK sockopt to hold small packets
+ +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0
+
+ +0 write(4, ..., 40) = 40
+ +.05 write(4, ..., 40) = 40
+
+// Unset TCP_CORK should push pending bytes out
+ +.01 setsockopt(4, SOL_TCP, TCP_CORK, [0], 4) = 0
+ +0 > P. 1:81(80) ack 1
+ +.01 < . 1:1(0) ack 81 win 257
+
+// Set TCP_CORK sockopt to hold small packets
+ +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0
+
+ +0 write(4, ..., 40) = 40
+ +.05 write(4, ..., 40) = 40
+
+// Set TCP_NODELAY sockopt should push pending bytes out
+ +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+ +0 > P. 81:161(80) ack 1
+ +.01 < . 1:1(0) ack 161 win 257
+
+// Set MSG_MORE to hold small packets
+ +0 send(4, ..., 40, MSG_MORE) = 40
+ +.05 send(4, ..., 40, MSG_MORE) = 40
+
+// Set TCP_NODELAY sockopt should push pending bytes out
+ +.01 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+ +0 > . 161:241(80) ack 1
+ +.01 < . 1:1(0) ack 241 win 257
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt
new file mode 100644
index 000000000000..09aabc775e80
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
+
+// Test that a not-yet-accepted socket does not change
+// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets.
+
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 < . 2001:41001(39000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001>
+ +0 < . 41001:101001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001>
+ +0 < . 1:1001(1000) ack 1 win 257
+ +0 > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001>
+ +0 < . 1001:2001(1000) ack 1 win 257
+ +0 > . 1:1(0) ack 101001
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }%
+
+ +0 close(4) = 0
+ +0 close(3) = 0
+
+// Test that ooo packets for accepted sockets do increase sk_rcvbuf
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 2001:41001(39000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001>
+ +0 < . 41001:101001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001>
+
+ +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }%
+
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
new file mode 100644
index 000000000000..7e6bc5fb0c8d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
+
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 2001:11001(9000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001>
+
+// check that ooo packet properly updates tcpi_rcv_mss
+ +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }%
+
+ +0 < . 11001:21001(10000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001>
+
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
new file mode 100644
index 000000000000..3848b419e68c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:4001(4000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +0 < P. 4001:54001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +1 < P. 5001:55001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +0 < P. 70001:80001(10000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+ +0 read(4, ..., 100000) = 4000
+
+// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd
+ +0 < P. 4001:54001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 54001 win 0
+
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times.
++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
new file mode 100644
index 000000000000..f575c0ff89da
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:20001(20000) ack 1 win 257
+ +.04 > . 1:1(0) ack 20001 win 18000
+
+ +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 20001 win 18000
+
+ +0 read(4, ..., 20000) = 20000
+// A too big packet is accepted if the receive queue is empty
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 80001 win 0
+
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt
new file mode 100644
index 000000000000..47550df124ce
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Test SYN+ACK RTX with 1s RTO.
+//
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_rto_max_ms=1000`
+
+//
+// Test 1: TFO SYN+ACK
+//
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 1000 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// RTO must be capped to 1s
+ +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+ +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+ +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 < . 11:11(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+ +0 close(4) = 0
+ +0 close(3) = 0
+
+
+//
+// Test 2: non-TFO SYN+ACK
+//
+ +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 1000 <mss 1460,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+// RTO must be capped to 1s
+ +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+ +0 < . 1:1(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+
+ +0 close(4) = 0
+ +0 close(3) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt
new file mode 100644
index 000000000000..310ef31518da
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+// Verify that setsockopt calls that force a route refresh do not
+// cause problems matching SACKs with packets in the write queue.
+// This variant tests IP_TOS.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_IP, IP_MTU_DISCOVER, [IP_PMTUDISC_DONT], 1) = 0
+ +0...0.010 connect(3, ..., ...) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 65535 <mss 1460,nop,wscale 2,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
+
+ +.01 write(3, ..., 5840) = 5840
+ +0 > P. 1:5841(5840) ack 1
+ +.01 < . 1:1(0) ack 5841 win 65535
+
+ +.01 write(3, ..., 5840) = 5840
+ +0 > P. 5841:11681(5840) ack 1
+ +.01 < . 1:1(0) ack 11681 win 65535
+
+ +.01 write(3, ..., 14600) = 14600
+ +0 > P. 11681:26281(14600) ack 1
+
+// Try the socket option that we know can force a route refresh.
+ +0 setsockopt(3, SOL_IP, IP_TOS, [4], 1) = 0
+// Then revert to avoid routing/mangling/etc implications of that setting.
+ +0 setsockopt(3, SOL_IP, IP_TOS, [0], 1) = 0
+
+// Verify that we do not retransmit the SACKed segments.
+ +.01 < . 1:1(0) ack 13141 win 65535 <sack 16061:17521 20441:26281,nop,nop>
+ +0 > . 13141:16061(2920) ack 1
+ +0 > P. 17521:20441(2920) ack 1
+ +.01 < . 1:1(0) ack 26281 win 65535
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt
new file mode 100644
index 000000000000..f185e1ac57ea
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb.
+// This variant tests non-FACK SACK with SACKs coming in the order
+// 2 6 8 3 9, to test what happens when we get a new SACKed range
+// (for packet 3) that is on the right of an existing SACKed range
+// (for packet 2).
+
+`./defaults.sh`
+
+// Establish a connection and send 10 MSS.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+ +.1 < . 1:1(0) ack 1 win 257 <sack 2001:3001,nop,nop>
++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001,nop,nop>
++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001 8001:9001,nop,nop>
+
+// 3 SACKed packets, so we enter Fast Recovery.
+ +0 > . 1:1001(1000) ack 1
+ +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }%
+ +0 %{ assert tcpi_lost == 6, tcpi_lost }%
+
+// SACK for 3001:4001.
+// This SACK for an adjacent range causes the sender to
+// shift the newly-SACKed range onto the previous skb.
++.007 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:9001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 %{ assert tcpi_lost == 5, tcpi_lost }%
+ +0 %{ assert tcpi_reordering == 6, tcpi_reordering }% // 8001:9001 -> 3001:4001 is 6
+
+// SACK for 9001:10001.
+ +.01 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop>
+ +0 %{ assert tcpi_lost == 5, tcpi_lost }%
+
+// ACK for 1:1001 as packets from t=0.303 arrive.
++.083 < . 1:1(0) ack 1001 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop>
+ +0 %{ assert tcpi_lost == 4,tcpi_lost }%
+
+// ACK for 1:4001 as packets from t=0.310 arrive.
++.017 < . 1:1(0) ack 4001 win 257 <sack 6001:7001 8001:10001,nop,nop>
+ +0 %{ assert tcpi_lost == 3,tcpi_lost }%
+
+// ACK for 1:7001 as packets from t=0.320 arrive.
+ +.01 < . 1:1(0) ack 7001 win 257 <sack 8001:10001,nop,nop>
+
+// ACK for all data as packets from t=0.403 arrive.
+ +.1 < . 1:1(0) ack 10001 win 257
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_unacked == 0, tcpi_unacked
+assert tcpi_sacked == 0, tcpi_sacked
+assert tcpi_lost == 0, tcpi_lost
+assert tcpi_retrans == 0, tcpi_retrans
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt
new file mode 100644
index 000000000000..0093b4973934
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb.
+// This variant tests the case where we mark packets 0-4 lost, then
+// get a SACK for 3, and then a SACK for 4.
+
+`./defaults.sh`
+
+// Establish a connection and send 10 MSS.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// SACK for 7001:8001. Using RACK we delay the fast retransmit.
+ +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop>
+// RACK reordering timer
++.027 > . 1:1001(1000) ack 1
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state
+assert tcpi_lost == 7, tcpi_lost # RACK thinks 1:7001 are lost
+assert tcpi_reordering == 3, tcpi_reordering
+}%
+
+// SACK for 3001:4001.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:4001 7001:8001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 %{
+assert tcpi_lost == 6, tcpi_lost # since 3001:4001 is no longer lost
+assert tcpi_reordering == 5, tcpi_reordering # 7001:8001 -> 3001:4001
+}%
+
+// SACK for 4001:5001.
+// This SACK for an adjacent range causes the sender to
+// shift the newly-SACKed range onto the previous skb.
+// It uses the RFC3517 algorithm to mark 1:3001 lost
+// because >=3 higher-sequence packets are SACKed.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:8001,nop,nop>
+ +0 > . 2001:3001(1000) ack 1
+ +0 %{
+assert tcpi_lost == 5,tcpi_lost # SACK/RFC3517 thinks 1:3001 are lost
+}%
+
+// SACK for 8001:9001.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:9001,nop,nop>
+
+// SACK for 9001:10001.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:10001,nop,nop>
+ +0 > . 5001:6001(1000) ack 1
+
+// To simplify clean-up, say we get an ACK for all data.
+ +.1 < . 1:1(0) ack 10001 win 257
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_unacked == 0, tcpi_unacked
+assert tcpi_sacked == 0, tcpi_sacked
+assert tcpi_lost == 0, tcpi_lost
+assert tcpi_retrans == 0, tcpi_retrans
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt
new file mode 100644
index 000000000000..980a832dc81c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb.
+// This variant tests the case where we mark packets 0-4 lost, then
+// get a SACK for 5, and then a SACK for 6.
+
+`./defaults.sh`
+
+// Establish a connection and send 10 MSS.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// SACK for 7001:8001. Using RACK we delay a fast retransmit.
+ +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop>
++.027 > . 1:1001(1000) ack 1
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state
+assert tcpi_lost == 7,tcpi_lost # RACK thinks 1:7001 are lost
+assert tcpi_reordering == 3, tcpi_reordering
+}%
+
+// SACK for 5001:6001.
+ +0 < . 1:1(0) ack 1 win 257 <sack 5001:6001 7001:8001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 %{
+assert tcpi_lost == 6, tcpi_lost
+assert tcpi_reordering == 3, tcpi_reordering # 7001:8001 -> 5001:6001 is 3
+}%
+
+// SACK for 6001:7001.
+// This SACK for an adjacent range causes the sender to
+// shift the newly-SACKed range onto the previous skb.
+ +0 < . 1:1(0) ack 1 win 257 <sack 5001:8001,nop,nop>
+ +0 > . 2001:3001(1000) ack 1
+ +0 %{ assert tcpi_lost == 5, tcpi_lost }%
+
+// SACK for 8001:9001.
+ +0 < . 1:1(0) ack 1 win 257 <sack 5001:9001,nop,nop>
+ +0 > . 3001:4001(1000) ack 1
+
+// SACK for 9001:10001.
+ +0 < . 1:1(0) ack 1 win 257 <sack 5001:10001,nop,nop>
+ +0 > . 4001:5001(1000) ack 1
+
+// To simplify clean-up, say we get an ACK for all data.
+ +.1 < . 1:1(0) ack 10001 win 257
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_unacked == 0, tcpi_unacked
+assert tcpi_sacked == 0, tcpi_sacked
+assert tcpi_lost == 0, tcpi_lost
+assert tcpi_retrans == 0, tcpi_retrans
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt
new file mode 100644
index 000000000000..6740859a1360
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+// Simplest possible test of open() and then sendfile().
+// We write some zeroes into a file (since packetdrill expects payloads
+// to be all zeroes) and then open() the file, then use sendfile()
+// and verify that the correct number of zeroes goes out.
+
+`./defaults.sh
+/bin/rm -f /tmp/testfile
+/bin/dd bs=1 count=5 if=/dev/zero of=/tmp/testfile status=none
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 open("/tmp/testfile", O_RDONLY) = 5
+ +0 sendfile(4, 5, [0], 5) = 5
+ +0 > P. 1:6(5) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt
new file mode 100644
index 000000000000..795c476d222d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow.
+// In this variant, the receiver ACKs every packet.
+
+// Set up config. To keep things simple, disable the
+// mechanism that defers sending in order to send bigger TSO packets.
+`./defaults.sh
+sysctl -q net.ipv4.tcp_tso_win_divisor=100`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 30000) = 30000
+ +0 > P. 1:10001(10000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 1001 win 257
+ +0 > P. 10001:12001(2000) ack 1
+
+ +0 < . 1:1(0) ack 2001 win 257
+ +0 > P. 12001:14001(2000) ack 1
+
++.005 < . 1:1(0) ack 3001 win 257
+ +0 > P. 14001:16001(2000) ack 1
+
+ +0 < . 1:1(0) ack 4001 win 257
+ +0 > P. 16001:18001(2000) ack 1
+
++.005 < . 1:1(0) ack 5001 win 257
+ +0 > P. 18001:20001(2000) ack 1
+
+ +0 < . 1:1(0) ack 6001 win 257
+ +0 > P. 20001:22001(2000) ack 1
+
++.005 < . 1:1(0) ack 7001 win 257
+ +0 > P. 22001:24001(2000) ack 1
+
+ +0 < . 1:1(0) ack 8001 win 257
+ +0 > P. 24001:26001(2000) ack 1
+
++.005 < . 1:1(0) ack 9001 win 257
+ +0 > P. 26001:28001(2000) ack 1
+
+ +0 < . 1:1(0) ack 10001 win 257
+ +0 > P. 28001:30001(2000) ack 1
+
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt
new file mode 100644
index 000000000000..9212ae1fd0f2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when an outstanding flight of packets is
+// less than the current cwnd, and not big enough to bump up cwnd.
+//
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Set up config.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+
+// Only send 5 packets.
+ +0 write(4, ..., 5000) = 5000
+ +0 > P. 1:5001(5000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 2001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 4001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 5001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt
new file mode 100644
index 000000000000..416c901ddf51
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when an outstanding flight of packets is
+// less than the current cwnd, but still big enough that in slow
+// start we want to increase our cwnd a little.
+//
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Set up config.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+
+// Only send 6 packets.
+ +0 write(4, ..., 6000) = 6000
+ +0 > P. 1:6001(6000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 2001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 4001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 6001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt
new file mode 100644
index 000000000000..a894b7d4559c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow.
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Set up config. To keep things simple, disable the
+// mechanism that defers sending in order to send bigger TSO packets.
+`./defaults.sh
+sysctl -q net.ipv4.tcp_tso_win_divisor=100`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 30000) = 30000
+ +0 > P. 1:10001(10000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2001 win 257
+ +0 > P. 10001:14001(4000) ack 1
+
++.005 < . 1:1(0) ack 4001 win 257
+ +0 > P. 14001:18001(4000) ack 1
+
++.005 < . 1:1(0) ack 6001 win 257
+ +0 > P. 18001:22001(4000) ack 1
+
++.005 < . 1:1(0) ack 8001 win 257
+ +0 > P. 22001:26001(4000) ack 1
+
++.005 < . 1:1(0) ack 10001 win 257
+ +0 > P. 26001:30001(4000) ack 1
+
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt
new file mode 100644
index 000000000000..065fae9e9abd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow.
+// In this variant, the receiver sends one ACK per 4 packets.
+
+// Set up config. To keep things simple, disable the
+// mechanism that defers sending in order to send bigger TSO packets.
+`./defaults.sh
+sysctl -q net.ipv4.tcp_tso_win_divisor=100`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 30000) = 30000
+ +0 > P. 1:10001(10000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +.11 < . 1:1(0) ack 4001 win 257
+ +0 > P. 10001:18001(8000) ack 1
+
+ +.01 < . 1:1(0) ack 8001 win 257
+ +0 > P. 18001:26001(8000) ack 1
+
++.005 < . 1:1(0) ack 10001 win 257
+ +0 > P. 26001:30001(4000) ack 1
+
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt
new file mode 100644
index 000000000000..11b213be1138
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start after idle
+// This test expects tso size to be at least initial cwnd * mss
+
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \
+ /proc/sys/net/ipv4/tcp_min_tso_segs=10`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 511
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 26000) = 26000
+ +0 > P. 1:5001(5000) ack 1
+ +0 > P. 5001:10001(5000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +.1 < . 1:1(0) ack 10001 win 511
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+ +0 > P. 10001:20001(10000) ack 1
+ +0 > P. 20001:26001(6000) ack 1
+
+ +.1 < . 1:1(0) ack 26001 win 511
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+ +2 write(4, ..., 20000) = 20000
+// If slow start after idle works properly, we should send 5 MSS here (cwnd/2)
+ +0 > P. 26001:31001(5000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+// Reset sysctls
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt
new file mode 100644
index 000000000000..577ed8c8852c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start after window update
+// This test expects tso size to be at least initial cwnd * mss
+
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \
+ /proc/sys/net/ipv4/tcp_min_tso_segs=10`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 511
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 26000) = 26000
+ +0 > P. 1:5001(5000) ack 1
+ +0 > P. 5001:10001(5000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +.1 < . 1:1(0) ack 10001 win 511
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+ +0 > P. 10001:20001(10000) ack 1
+ +0 > P. 20001:26001(6000) ack 1
+
+ +.1 < . 1:1(0) ack 26001 win 0
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+ +0 write(4, ..., 20000) = 20000
+// 1st win0 probe
++.3~+.310 > . 26000:26000(0) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+// 2nd win0 probe
++.6~+.620 > . 26000:26000(0) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+// 3rd win0 probe
++1.2~+1.240 > . 26000:26000(0) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+ +.9 < . 1:1(0) ack 26001 win 511
+ +0 > P. 26001:31001(5000) ack 1
+
+// Reset sysctls
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt
new file mode 100644
index 000000000000..869f32c35a2a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when application-limited: in this case,
+// with IW10, if we don't fully use our cwnd but instead
+// send just 9 packets, then cwnd should grow to twice that
+// value, or 18 packets.
+
+// Set up config.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 9000) = 9000
+ +0 > P. 1:9001(9000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 4001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 6001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 8001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 9001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt
new file mode 100644
index 000000000000..0f77b7955db6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when application-limited: in this case,
+// with IW10, if we send exactly 10 packets then cwnd should grow to 20.
+
+// Set up config.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 4001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 6001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 8001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 10001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt
new file mode 100644
index 000000000000..7e9c83d617c2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow, even if TSQ triggers.
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Note we use FQ/pacing to check if TCP Small Queues is not hurting
+
+`./defaults.sh
+tc qdisc replace dev tun0 root fq
+sysctl -q net/ipv4/tcp_pacing_ss_ratio=200
+sysctl -e -q net.ipv4.tcp_min_tso_segs=2`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 500
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 40000) = 40000
+// This might change if we cook the initial packet with 10 MSS.
+ +0 > P. 1:2921(2920) ack 1
+ +0 > P. 2921:5841(2920) ack 1
+ +0 > P. 5841:8761(2920) ack 1
+ +0 > P. 8761:11681(2920) ack 1
+ +0 > P. 11681:14601(2920) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2921 win 500
+ +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+
+// Note: after this commit : "net_sched: sch_fq: account for schedule/timers drifts"
+// FQ notices that this packet missed the 'time to send next packet' computed
+// when prior packet (11681:14601(2920)) was sent.
+// So FQ will allow following packet to be sent a bit earlier (quantum/2)
+// (FQ commit allows an application/cwnd limited flow to get at most quantum/2 extra credit)
+ +0 > P. 14601:17521(2920) ack 1
+
++.003 < . 1:1(0) ack 5841 win 500
+ +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+
++.001 > P. 17521:20441(2920) ack 1
+
++.001 < . 1:1(0) ack 8761 win 500
+ +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+
+// remaining packets are delivered at a constant rate.
++.007 > P. 20441:23361(2920) ack 1
+
++.002 < . 1:1(0) ack 11681 win 500
+ +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
++.001 < . 1:1(0) ack 14601 win 500
+
++.004 > P. 23361:26281(2920) ack 1
+
++.007 > P. 26281:29201(2920) ack 1
+
+ +0 %{ assert tcpi_snd_cwnd == 20, 'cwnd=%d' % tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt
new file mode 100644
index 000000000000..0cbd43253236
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh`
+
+// Initialize a server socket
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_IP, IP_FREEBIND, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Connection should get accepted
+ +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <...>
+ +0 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+
+ +0 pipe([5, 6]) = 0
+ +0 < U. 1:101(100) ack 1 win 257 urg 100
+ +0 splice(4, NULL, 6, NULL, 99, 0) = 99
+ +0 splice(4, NULL, 6, NULL, 1, 0) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt
new file mode 100644
index 000000000000..8940726a3ec2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP fastopen behavior with NULL as buffer pointer, but a non-zero
+// buffer length.
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0`
+
+// Cache warmup: send a Fast Open cookie request
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress)
++0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO,nop,nop>
++0 < S. 123:123(0) ack 1 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6,FO abcd1234,nop,nop>
++0 > . 1:1(0) ack 1
++0 close(3) = 0
++0 > F. 1:1(0) ack 1
++0 < F. 1:1(0) ack 2 win 92
++0 > . 2:2(0) ack 2
+
+// Test with MSG_FASTOPEN without TCP_FASTOPEN_CONNECT.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 sendto(4, NULL, 1, MSG_FASTOPEN, ..., ...) = -1
++0 close(4) = 0
+
+// Test with TCP_FASTOPEN_CONNECT without MSG_FASTOPEN.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5
++0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 setsockopt(5, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
++0 connect(5, ..., ...) = 0
++0 sendto(5, NULL, 1, 0, ..., ...) = -1
++0 close(5) = 0
+
+// Test with both TCP_FASTOPEN_CONNECT and MSG_FASTOPEN.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 6
++0 fcntl(6, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 setsockopt(6, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
++0 connect(6, ..., ...) = 0
++0 sendto(6, NULL, 1, MSG_FASTOPEN, ..., ...) = -1
++0 close(6) = 0
+
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
new file mode 100644
index 000000000000..454441e7ecff
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we correctly skip zero-length IOVs.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(4)=[{..., 0}, {..., 40}, {..., 0}, {..., 20}],
+ msg_flags=0}, 0) = 60
+ +0 > P. 1:61(60) ack 1
+ +.01 < . 1:1(0) ack 61 win 257
+
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(4)=[{..., 0}, {..., 0}, {..., 0}, {..., 0}],
+ msg_flags=0}, MSG_ZEROCOPY) = 0
+
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(4)=[{..., 0}, {..., 10}, {..., 0}, {..., 50}],
+ msg_flags=0}, MSG_ZEROCOPY) = 60
+ +0 > P. 61:121(60) ack 1
+ +.01 < . 1:1(0) ack 121 win 257
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt
new file mode 100644
index 000000000000..59f5903f285c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test kernel behavior with NULL as buffer pointer
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.2 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, NULL, 1000) = -1 EFAULT (Bad address)
+ +0 send(4, NULL, 1000, 0) = -1 EFAULT (Bad address)
+ +0 sendto(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address)
+
+ +0 < . 1:1001(1000) ack 1 win 200
+ +0 read(4, NULL, 1000) = -1 EFAULT (Bad address)
+ +0 recv(4, NULL, 1000, 0) = -1 EFAULT (Bad address)
+ +0 recvfrom(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt
new file mode 100644
index 000000000000..d7fdb43a8e89
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test tcpi_last_data_recv for active session
+`./defaults.sh`
+
+// Create a socket and set it to non-blocking.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
++0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.030 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8>
++0 > . 1:1(0) ack 1
+
++1 %{ assert 990 <= tcpi_last_data_recv <= 1010, tcpi_last_data_recv }%
+
++0 < . 1:1001(1000) ack 1 win 300
++0 > . 1:1(0) ack 1001
+
++0 %{ assert tcpi_last_data_recv <= 10, tcpi_last_data_recv }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt
new file mode 100644
index 000000000000..a9bcd46f6cb6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test rwnd limited time in tcp_info for client side.
+
+`./defaults.sh`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+
+// Server advertises 0 receive window.
+ +.01 < S. 0:0(0) ack 1 win 0 <mss 1000,nop,nop,sackOK>
+
+ +0 > . 1:1(0) ack 1
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+ +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking
+
+// Make sure that initial rwnd limited time is 0.
+ +0 %{ assert tcpi_rwnd_limited == 0, tcpi_rwnd_limited }%
+
+// Receive window limited time starts here.
+ +0 write(3, ..., 1000) = 1000
+
+// Check that rwnd limited time in tcp_info is around 0.1s.
+ +.1 %{ assert 98000 <= tcpi_rwnd_limited <= 110000, tcpi_rwnd_limited }%
+
+// Server opens the receive window.
+ +.1 < . 1:1(0) ack 1 win 2000
+
+// Check that rwnd limited time in tcp_info is around 0.2s.
+ +0 %{ assert 198000 <= tcpi_rwnd_limited <= 210000, tcpi_rwnd_limited }%
+
+ +0 > P. 1:1001(1000) ack 1
+
+// Server advertises a very small receive window.
+ +.03 < . 1:1(0) ack 1001 win 10
+
+// Receive window limited time starts again.
+ +0 write(3, ..., 1000) = 1000
+
+// Server opens the receive window again.
+ +.1 < . 1:1(0) ack 1001 win 2000
+// Check that rwnd limited time in tcp_info is around 0.3s
+// and busy time is 0.3 + 0.03 (server opened small window temporarily).
+ +0 %{ assert 298000 <= tcpi_rwnd_limited <= 310000, tcpi_rwnd_limited;\
+ assert 328000 <= tcpi_busy_time <= 340000, tcpi_busy_time;\
+}%
+
+ +0 > P. 1001:2001(1000) ack 1
+ +.02 < . 1:1(0) ack 2001 win 2000
+ +0 %{ assert 348000 <= tcpi_busy_time <= 360000, tcpi_busy_time }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt
new file mode 100644
index 000000000000..f0de2acd0f8e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test send-buffer-limited time in tcp_info for client side.
+`./defaults.sh`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8>
+ +0 > . 1:1(0) ack 1
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+ +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking
+ +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [10000], 4) = 0
+ +0 getsockopt(3, SOL_SOCKET, SO_SNDBUF, [20000], [4]) = 0
+
+ +.09...0.14 write(3, ..., 150000) = 150000
+
+ +.01 < . 1:1(0) ack 10001 win 10000
+
+ +.01 < . 1:1(0) ack 30001 win 10000
+
+// cwnd goes from 40(60KB) to 80(120KB), and that we hit the tiny sndbuf limit 10KB
+ +.01 < . 1:1(0) ack 70001 win 10000
+
+ +.02 < . 1:1(0) ack 95001 win 10000
+ +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited; \
+ assert 49000 <= tcpi_busy_time <= 52000, tcpi_busy_time; \
+ assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }%
+
+// This ack frees up enough buffer so we are no longer
+// buffer limited (socket flag SOCK_NOSPACE is cleared)
+ +.02 < . 1:1(0) ack 150001 win 10000
+ +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited;\
+ assert 69000 <= tcpi_busy_time <= 73000, tcpi_busy_time;\
+ assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt
new file mode 100644
index 000000000000..2087ec0c746a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that tx timestamping sends timestamps only for
+// the last byte of each sendmsg.
+`./defaults.sh
+`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Establish connection and verify that there was no error.
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 20000 <mss 1000,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+ +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking
+
+ +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING,
+ [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+ +0 write(3, ..., 11000) = 11000
+ +0 > P. 1:10001(10000) ack 1
+ +.01 < . 1:1(0) ack 10001 win 4000
+ +0 > P. 10001:11001(1000) ack 1
+ +.01 < . 1:1(0) ack 11001 win 4000
+
+// Make sure that internal TCP timestamps are not overwritten and we have sane
+// RTT measurement.
+ +0 %{
+assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt
+}%
+
+// SCM_TSTAMP_SCHED for the last byte should be received almost immediately
+// once 10001 is acked at t=20ms.
+// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...)
+// is called after when SYN is acked. So, we expect the last byte of the first
+// chunk to have a timestamp key of 10999 (i.e., 11000 - 1).
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SCHED,
+ ee_data=10999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the last byte should be received almost immediately
+// once 10001 is acked at t=20ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SND,
+ ee_data=10999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the last byte should be received at t=30ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=30000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=10999}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt
new file mode 100644
index 000000000000..876024a31110
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test tx timestamping for partial writes (IPv4).
+`./defaults.sh
+`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Establish connection and verify that there was no error.
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 2000 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7>
+ +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700>
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+
+ +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [1000], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING,
+ [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+// We have a partial write.
+ +0 write(3, ..., 10000) = 2964
+ +0 > . 1:989(988) ack 1 <nop,nop,TS val 110 ecr 700>
+ +0 > P. 989:1977(988) ack 1 <nop,nop,TS val 110 ecr 700>
+ +.01 < . 1:1(0) ack 1977 win 92 <nop,nop,TS val 800 ecr 200>
+ +0 > P. 1977:2965(988) ack 1 <nop,nop,TS val 114 ecr 800>
+ +.01 < . 1:1(0) ack 2965 win 92 <nop,nop,TS val 800 ecr 200>
+
+// Make sure that internal TCP timestamps are not overwritten and we have sane
+// RTT measurement.
+ +0 %{
+assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt
+}%
+
+// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately
+// after the first ack at t=20ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SCHED,
+ ee_data=2963}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the first chunk should be received almost immediately
+// after the first ack at t=20ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SND,
+ ee_data=2963}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the first chunk should be received after the last ack at
+// t=30ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=30000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=2963}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt
new file mode 100644
index 000000000000..84d94780e6be
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test tx timestamping for server-side (IPv4).
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_TIMESTAMPING,
+ [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+// Write two 2KB chunks.
+// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...)
+// is called after when SYN is acked. So, we expect the last byte of the first
+// and the second chunks to have timestamp keys of 1999 (i.e., 2000 - 1) and
+// 3999 (i.e., 4000 - 1) respectively.
+ +0 write(4, ..., 2000) = 2000
+ +0 write(4, ..., 2000) = 2000
+ +0 > P. 1:2001(2000) ack 1
+ +0 > P. 2001:4001(2000) ack 1
+ +.01 < . 1:1(0) ack 2001 win 514
+ +.01 < . 1:1(0) ack 4001 win 514
+
+// Make sure that internal TCP timestamps are not overwritten and we have sane
+// RTT measurement.
+ +0 %{
+assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt
+}%
+
+// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately
+// after write at t=10ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=10000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SCHED,
+ ee_data=1999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the first chunk should be received almost immediately
+// after write at t=10ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=10000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SND,
+ ee_data=1999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SCHED for the second chunk should be received almost immediately
+// after that at t=10ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=10000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SCHED,
+ ee_data=3999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the second chunk should be received almost immediately
+// after that at t=10ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=10000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SND,
+ ee_data=3999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the first chunk should be received at t=20ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=1999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the second chunk should be received at t=30ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=30000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=3999}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt
new file mode 100644
index 000000000000..e61424a7bd0a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we send FIN packet with correct TSval
+--tcp_ts_tick_usecs=1000
+--tolerance_usecs=7000
+
+`./defaults.sh`
+
+// Create a socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Establish a connection.
+ +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0>
+ +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100>
+ +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100>
+ +0 accept(3, ..., ...) = 4
+
+ +1 close(4) = 0
+// Check that FIN TSval is updated properly, one second has passed since last sent packet.
+ +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1200 ecr 200>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt
new file mode 100644
index 000000000000..174ce9a1bfc0
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we reject TS val updates on a packet with invalid ACK sequence
+
+`./defaults.sh
+`
+
+// Create a socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Establish a connection.
+ +.1 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0>
+ +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100>
+ +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100>
+ +0 accept(3, ..., ...) = 4
+
+// bad packet with high tsval (its ACK sequence is above our sndnxt)
+ +0 < F. 1:1(0) ack 9999 win 20000 <nop,nop,TS val 200000 ecr 100>
+
+
+ +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100>
+ +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt
new file mode 100644
index 000000000000..2e3b3bb7493a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we send RST packet with correct TSval
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh`
+
+// Create a socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Establish a connection.
+ +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0>
+ +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100>
+ +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100>
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100>
+ +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201>
+
+ +1 close(4) = 0
+// Check that RST TSval is updated properly, one second has passed since last sent packet.
+ +0 > R. 1:1(0) ack 1001 <nop,nop,TS val 1200 ecr 201>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
new file mode 100644
index 000000000000..6882b8240a8a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+
+ +0 < S 0:0(0) win 0 <mss 1460>
+ +0 > S. 0:0(0) ack 1 <mss 1460>
+
+ +.1 < . 1:1(0) ack 1 win 65530
+ +0 accept(3, ..., ...) = 4
+
+ +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0
+ +0 write(4, ..., 24) = 24
+ +0 > P. 1:25(24) ack 1
+ +.1 < . 1:1(0) ack 25 win 65530
+ +0 %{ assert tcpi_probes == 0, tcpi_probes; \
+ assert tcpi_backoff == 0, tcpi_backoff }%
+
+// install a qdisc dropping all packets
+ +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0`
+
+ +0 write(4, ..., 24) = 24
+ // When qdisc is congested we retry every 500ms
+ // (TCP_RESOURCE_PROBE_INTERVAL) and therefore
+ // we retry 6 times before hitting 3s timeout.
+ // First verify that the connection is alive:
++3 write(4, ..., 24) = 24
+
+ // Now verify that shortly after that the socket is dead:
++1 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out)
+
+ +0 %{ assert tcpi_probes == 6, tcpi_probes; \
+ assert tcpi_backoff == 0, tcpi_backoff }%
+ +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt
new file mode 100644
index 000000000000..2efe02bfba9c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +.1 < . 1:1(0) ack 1 win 32792
+
+
+ +0 accept(3, ..., ...) = 4
+
+// Okay, we received nothing, and decide to close this idle socket.
+// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth
+// trying hard to cleanly close this flow, at the price of keeping
+// a TCP structure in kernel for about 1 minute !
+ +2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0
+ +0 close(4) = 0
+
+ +0 > F. 1:1(0) ack 1
+ +.3~+.400 > F. 1:1(0) ack 1
+ +.3~+.400 > F. 1:1(0) ack 1
+ +.6~+.800 > F. 1:1(0) ack 1
+
+// We finally receive something from the peer, but it is way too late
+// Our socket vanished because TCP_USER_TIMEOUT was really small
+ +0 < . 1:2(1) ack 1 win 32792
+ +0 > R 1:1(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt
new file mode 100644
index 000000000000..8bd60226ccfc
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Verify that established connections drop a segment without the ACK flag set.
+
+`./defaults.sh`
+
+// Create a socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Establish a connection.
+ +0 < S 0:0(0) win 20000 <mss 1000,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +.01 < . 1:1(0) ack 1 win 20000
+ +0 accept(3, ..., ...) = 4
+
+// Receive a segment with no flags set, verify that it's not enqueued.
+ +.01 < - 1:1001(1000) win 20000
+ +0 ioctl(4, SIOCINQ, [0]) = 0
+
+// Receive a segment with ACK flag set, verify that it is enqueued.
+ +.01 < . 1:1001(1000) ack 1 win 20000
+ +0 ioctl(4, SIOCINQ, [1000]) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
new file mode 100644
index 000000000000..0a0700afdaa3
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+// basic zerocopy test:
+//
+// send a packet with MSG_ZEROCOPY and receive the notification ID
+// repeat and verify IDs are consecutive
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=1,
+ ee_data=1}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
new file mode 100644
index 000000000000..df91675d2991
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// batch zerocopy test:
+//
+// send multiple packets, then read one range of all notifications.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_MARK, [666], 4) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=1}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
new file mode 100644
index 000000000000..2963cfcb14df
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+// Minimal client-side zerocopy test
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0...0 connect(4, ..., ...) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+ +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > . 1:1(0) ack 1
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
new file mode 100644
index 000000000000..ea0c2fa73c2d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+// send with MSG_ZEROCOPY on a non-established socket
+//
+// verify that a send in state TCP_CLOSE correctly aborts the zerocopy
+// operation, specifically it does not increment the zerocopy counter.
+//
+// First send on a closed socket and wait for (absent) notification.
+// Then connect and send and verify that notification nr. is zero.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = -1 EPIPE (Broken pipe)
+
+ +0.1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable)
+
+ +0...0 connect(4, ..., ...) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+ +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > . 1:1(0) ack 1
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
new file mode 100644
index 000000000000..4df978a9b82e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+// epoll zerocopy test:
+//
+// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but
+// it is not level-triggered either.
+//
+// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
+// is correctly fired only once, when EPOLLET is set. send another packet with
+// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 epoll_create(1) = 5
+ +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, {events=EPOLLOUT|EPOLLET, fd=4}) = 0
+ +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+// receive only one EPOLLERR for the two sends above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 8001:12001(4000) ack 1
+ +0 < . 1:1(0) ack 12001 win 257
+
+// receive only one EPOLLERR for the third send above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=2}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
new file mode 100644
index 000000000000..36b6edc4858c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// epoll zerocopy test:
+//
+// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but
+// it is not level-triggered either. this tests verify that the same behavior is
+// maintained when we have EPOLLEXCLUSIVE.
+//
+// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
+// is correctly fired only once, when EPOLLET is set. send another packet with
+// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 epoll_create(1) = 5
+ +0 epoll_ctl(5, EPOLL_CTL_ADD, 4,
+ {events=EPOLLOUT|EPOLLET|EPOLLEXCLUSIVE, fd=4}) = 0
+ +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+// receive only one EPOLLERR for the two sends above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 8001:12001(4000) ack 1
+ +0 < . 1:1(0) ack 12001 win 257
+
+// receive only one EPOLLERR for the third send above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=2}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
new file mode 100644
index 000000000000..1bea6f3b4558
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+// epoll zerocopy test:
+//
+// This is a test to confirm that EPOLLERR is only fired once for an FD when
+// EPOLLONESHOT is set.
+//
+// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
+// is correctly fired only once, when EPOLLONESHOT is set. send another packet
+// with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and
+// confirm that EPOLLERR is correctly set.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 epoll_create(1) = 5
+ +0 epoll_ctl(5, EPOLL_CTL_ADD, 4,
+ {events=EPOLLOUT|EPOLLET|EPOLLONESHOT, fd=4}) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+// receive only one EPOLLERR for the two sends above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 8001:12001(4000) ack 1
+ +0 < . 1:1(0) ack 12001 win 257
+
+// receive no EPOLLERR for the third send above.
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+// rearm the FD and verify the EPOLLERR is fired again.
+ +0 epoll_ctl(5, EPOLL_CTL_MOD, 4, {events=EPOLLOUT|EPOLLONESHOT, fd=4}) = 0
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=2}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
new file mode 100644
index 000000000000..e27c21ff5d18
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+// Fastopen client zerocopy test:
+//
+// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
+// kernel returns the notification ID.
+//
+// Fastopen requires a stored cookie. Create two sockets. The first
+// one will have no data in the initial send. On return 0 the
+// zerocopy notification counter is not incremented. Verify this too.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh`
+
+// Send a FastOpen request, no cookie yet so no data in SYN
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 sendto(3, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 1000 ecr 0,nop,wscale 8,FO,nop,nop>
+ +.01 < S. 123:123(0) ack 1 win 14600 <mss 940,TS val 2000 ecr 1000,sackOK,nop,wscale 6, FO abcd1234,nop,nop>
+ +0 > . 1:1(0) ack 1 <nop,nop,TS val 1001 ecr 2000>
+
+// Read from error queue: no zerocopy notification
+ +1 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable)
+
+ +.01 close(3) = 0
+ +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1002 ecr 2000>
+ +.01 < F. 1:1(0) ack 2 win 92 <nop,nop,TS val 2001 ecr 1002>
+ +0 > . 2:2(0) ack 2 <nop,nop,TS val 1003 ecr 2001>
+
+// Send another Fastopen request, now SYN will have data
+ +.07 `sysctl -q net.ipv4.tcp_timestamps=0`
+ +.1 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5
+ +0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 setsockopt(5, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 sendto(5, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = 500
+ +0 > S 0:500(500) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO abcd1234,nop,nop>
+ +.05 < S. 5678:5678(0) ack 501 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6>
+ +0 > . 501:501(0) ack 1
+
+// Read from error queue: now has first zerocopy notification
+ +0.5 recvmsg(5, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
new file mode 100644
index 000000000000..b1fa77c77dfa
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+// Fastopen server zerocopy test:
+//
+// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
+// kernel returns the notification ID.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207`
+
+// Set up a TFO server listening socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +.1 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [2], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+
+// Client sends a SYN with data.
+ +.1 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK>
+
+// Server accepts and replies with data.
++.005 accept(3, ..., ...) = 4
+ +0 read(4, ..., 1024) = 1000
+ +0 sendto(4, ..., 1000, MSG_ZEROCOPY, ..., ...) = 1000
+ +0 > P. 1:1001(1000) ack 1001
+ +.05 < . 1001:1001(0) ack 1001 win 32792
+
+// Read from error queue: now has first zerocopy notification
+ +0.1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
+
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
new file mode 100644
index 000000000000..2f5317d0a9fa
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+// tcp_MAX_SKB_FRAGS test
+//
+// Verify that sending an iovec of tcp_MAX_SKB_FRAGS + 1 elements will
+// 1) fit in a single packet without zerocopy
+// 2) spill over into a second packet with zerocopy,
+// because each iovec element becomes a frag
+// 3) the PSH bit is set on an skb when it runs out of fragments
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+
+ // Each pinned zerocopy page is fully accounted to skb->truesize.
+ // This test generates a worst case packet with each frag storing
+ // one byte, but increasing truesize with a page (64KB on PPC).
+ +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [2000000], 4) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ // send an iov of 18 elements: just becomes a linear skb
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}],
+ msg_flags=0}, 0) = 18
+
+ +0 > P. 1:19(18) ack 1
+ +0 < . 1:1(0) ack 19 win 257
+
+ // send a zerocopy iov of 18 elements:
+ +1 sendmsg(4, {msg_name(...)=...,
+ msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}],
+ msg_flags=0}, MSG_ZEROCOPY) = 18
+
+ // verify that it is split in one skb of 17 frags + 1 of 1 frag
+ // verify that both have the PSH bit set
+ +0 > P. 19:36(17) ack 1
+ +0 < . 1:1(0) ack 36 win 257
+
+ +0 > P. 36:37(1) ack 1
+ +0 < . 1:1(0) ack 37 win 257
+
+ +1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
+
+ // send a zerocopy iov of 64 elements:
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(64)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1}],
+ msg_flags=0}, MSG_ZEROCOPY) = 64
+
+ // verify that it is split in skbs with 17 frags
+ +0 > P. 37:54(17) ack 1
+ +0 < . 1:1(0) ack 54 win 257
+
+ +0 > P. 54:71(17) ack 1
+ +0 < . 1:1(0) ack 71 win 257
+
+ +0 > P. 71:88(17) ack 1
+ +0 < . 1:1(0) ack 88 win 257
+
+ +0 > P. 88:101(13) ack 1
+ +0 < . 1:1(0) ack 101 win 257
+
+ +1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=1,
+ ee_data=1}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
new file mode 100644
index 000000000000..9d5272c6b207
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+// small packet zerocopy test:
+//
+// verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy
+// packets of all sizes, including the smallest payload, 1B.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ // send 1B
+ +0 send(4, ..., 1, MSG_ZEROCOPY) = 1
+ +0 > P. 1:2(1) ack 1
+ +0 < . 1:1(0) ack 2 win 257
+
+ +1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
+
+ // send 1B again
+ +0 send(4, ..., 1, MSG_ZEROCOPY) = 1
+ +0 > P. 2:3(1) ack 1
+ +0 < . 1:1(0) ack 3 win 257
+
+ +1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=1,
+ ee_data=1}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 5175c0c83a23..a3323c21f001 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -197,9 +197,14 @@
#
# - pmtu_ipv6_route_change
# Same as above but with IPv6
+#
+# - pmtu_ipv4_mp_exceptions
+# Use the same topology as in pmtu_ipv4, but add routeable addresses
+# on host A and B on lo reachable via both routers. Host A and B
+# addresses have multipath routes to each other, b_r1 mtu = 1500.
+# Check that PMTU exceptions are created for both paths.
source lib.sh
-source net_helper.sh
PAUSE_ON_FAIL=no
VERBOSE=0
@@ -266,7 +271,8 @@ tests="
list_flush_ipv4_exception ipv4: list and flush cached exceptions 1
list_flush_ipv6_exception ipv6: list and flush cached exceptions 1
pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1
- pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1"
+ pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1
+ pmtu_ipv4_mp_exceptions ipv4: PMTU multipath nh exceptions 1"
# Addressing and routing for tests with routers: four network segments, with
# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
@@ -343,6 +349,9 @@ tunnel6_a_addr="fd00:2::a"
tunnel6_b_addr="fd00:2::b"
tunnel6_mask="64"
+host4_a_addr="192.168.99.99"
+host4_b_addr="192.168.88.88"
+
dummy6_0_prefix="fc00:1000::"
dummy6_1_prefix="fc00:1001::"
dummy6_mask="64"
@@ -681,13 +690,7 @@ setup_xfrm() {
}
setup_nettest_xfrm() {
- if ! which nettest >/dev/null; then
- PATH=$PWD:$PATH
- if ! which nettest >/dev/null; then
- echo "'nettest' command not found; skipping tests"
- return 1
- fi
- fi
+ check_gen_prog "nettest"
[ ${1} -eq 6 ] && proto="-6" || proto=""
port=${2}
@@ -990,6 +993,52 @@ setup_ovs_bridge() {
run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2
}
+setup_multipath_new() {
+ # Set up host A with multipath routes to host B host4_b_addr
+ run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo
+ run_cmd ${ns_a} ip nexthop add id 401 via ${prefix4}.${a_r1}.2 dev veth_A-R1
+ run_cmd ${ns_a} ip nexthop add id 402 via ${prefix4}.${a_r2}.2 dev veth_A-R2
+ run_cmd ${ns_a} ip nexthop add id 403 group 401/402
+ run_cmd ${ns_a} ip route add ${host4_b_addr} src ${host4_a_addr} nhid 403
+
+ # Set up host B with multipath routes to host A host4_a_addr
+ run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo
+ run_cmd ${ns_b} ip nexthop add id 401 via ${prefix4}.${b_r1}.2 dev veth_B-R1
+ run_cmd ${ns_b} ip nexthop add id 402 via ${prefix4}.${b_r2}.2 dev veth_B-R2
+ run_cmd ${ns_b} ip nexthop add id 403 group 401/402
+ run_cmd ${ns_b} ip route add ${host4_a_addr} src ${host4_b_addr} nhid 403
+}
+
+setup_multipath_old() {
+ # Set up host A with multipath routes to host B host4_b_addr
+ run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo
+ run_cmd ${ns_a} ip route add ${host4_b_addr} \
+ src ${host4_a_addr} \
+ nexthop via ${prefix4}.${a_r1}.2 weight 1 \
+ nexthop via ${prefix4}.${a_r2}.2 weight 1
+
+ # Set up host B with multipath routes to host A host4_a_addr
+ run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo
+ run_cmd ${ns_b} ip route add ${host4_a_addr} \
+ src ${host4_b_addr} \
+ nexthop via ${prefix4}.${b_r1}.2 weight 1 \
+ nexthop via ${prefix4}.${b_r2}.2 weight 1
+}
+
+setup_multipath() {
+ if [ "$USE_NH" = "yes" ]; then
+ setup_multipath_new
+ else
+ setup_multipath_old
+ fi
+
+ # Set up routers with routes to dummies
+ run_cmd ${ns_r1} ip route add ${host4_a_addr} via ${prefix4}.${a_r1}.1
+ run_cmd ${ns_r2} ip route add ${host4_a_addr} via ${prefix4}.${a_r2}.1
+ run_cmd ${ns_r1} ip route add ${host4_b_addr} via ${prefix4}.${b_r1}.1
+ run_cmd ${ns_r2} ip route add ${host4_b_addr} via ${prefix4}.${b_r2}.1
+}
+
setup() {
[ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
@@ -1040,10 +1089,11 @@ cleanup() {
cleanup_all_ns
- ip link del veth_A-C 2>/dev/null
- ip link del veth_A-R1 2>/dev/null
- cleanup_del_ovs_internal
- cleanup_del_ovs_vswitchd
+ [ -e "/sys/class/net/veth_A-C" ] && ip link del veth_A-C
+ [ -e "/sys/class/net/veth_A-R1" ] && ip link del veth_A-R1
+ [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_internal
+ [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_vswitchd
+
rm -f "$tmpoutfile"
}
@@ -1082,23 +1132,15 @@ link_get_mtu() {
}
route_get_dst_exception() {
- ns_cmd="${1}"
- dst="${2}"
- dsfield="${3}"
-
- if [ -z "${dsfield}" ]; then
- dsfield=0
- fi
+ ns_cmd="${1}"; shift
- ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}"
+ ${ns_cmd} ip route get "$@"
}
route_get_dst_pmtu_from_exception() {
- ns_cmd="${1}"
- dst="${2}"
- dsfield="${3}"
+ ns_cmd="${1}"; shift
- mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")"
+ mtu_parse "$(route_get_dst_exception "${ns_cmd}" "$@")"
}
check_pmtu_value() {
@@ -1241,10 +1283,10 @@ test_pmtu_ipv4_dscp_icmp_exception() {
run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}"
# Check that exceptions have been created with the correct PMTU
- pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")"
check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
- pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")"
check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
}
@@ -1291,9 +1333,9 @@ test_pmtu_ipv4_dscp_udp_exception() {
UDP:"${dst2}":50000,tos="${dsfield}"
# Check that exceptions have been created with the correct PMTU
- pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")"
check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
- pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")"
check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
}
@@ -1447,7 +1489,7 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
size=$(du -sb $tmpoutfile)
size=${size%%/tmp/*}
- [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1
+ [ $size -ne 1048576 ] && err "File size $size mismatches expected value in locally bridged vxlan test" && return 1
done
rm -f "$tmpoutfile"
@@ -2062,7 +2104,7 @@ check_running() {
pid=${1}
cmd=${2}
- [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ]
+ [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "${cmd}" ]
}
test_cleanup_vxlanX_exception() {
@@ -2335,6 +2377,36 @@ test_pmtu_ipv6_route_change() {
test_pmtu_ipvX_route_change 6
}
+test_pmtu_ipv4_mp_exceptions() {
+ setup namespaces routing multipath || return $ksft_skip
+
+ trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
+ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
+ "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
+
+ # Set up initial MTU values
+ mtu "${ns_a}" veth_A-R1 2000
+ mtu "${ns_r1}" veth_R1-A 2000
+ mtu "${ns_r1}" veth_R1-B 1500
+ mtu "${ns_b}" veth_B-R1 1500
+
+ mtu "${ns_a}" veth_A-R2 2000
+ mtu "${ns_r2}" veth_R2-A 2000
+ mtu "${ns_r2}" veth_R2-B 1500
+ mtu "${ns_b}" veth_B-R2 1500
+
+ # Ping and expect two nexthop exceptions for two routes
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -c 1 -s 1800 "${host4_b_addr}"
+
+ # Check that exceptions have been created with the correct PMTU
+ pmtu_a_R1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R1)"
+ pmtu_a_R2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R2)"
+
+ check_pmtu_value "1500" "${pmtu_a_R1}" "exceeding MTU (veth_A-R1)" || return 1
+ check_pmtu_value "1500" "${pmtu_a_R2}" "exceeding MTU (veth_A-R2)" || return 1
+}
+
usage() {
echo
echo "$0 [OPTIONS] [TEST]..."
diff --git a/tools/testing/selftests/net/proc_net_pktgen.c b/tools/testing/selftests/net/proc_net_pktgen.c
new file mode 100644
index 000000000000..fab3b5c2e25d
--- /dev/null
+++ b/tools/testing/selftests/net/proc_net_pktgen.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * proc_net_pktgen: kselftest for /proc/net/pktgen interface
+ *
+ * Copyright (c) 2025 Peter Seiderer <ps.report@gmx.net>
+ *
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "kselftest_harness.h"
+
+static const char ctrl_cmd_stop[] = "stop";
+static const char ctrl_cmd_start[] = "start";
+static const char ctrl_cmd_reset[] = "reset";
+
+static const char wrong_ctrl_cmd[] = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789";
+
+static const char thr_cmd_add_loopback_0[] = "add_device lo@0";
+static const char thr_cmd_rm_loopback_0[] = "rem_device_all";
+
+static const char wrong_thr_cmd[] = "forsureawrongcommand";
+static const char legacy_thr_cmd[] = "max_before_softirq";
+
+static const char wrong_dev_cmd[] = "forsurewrongcommand";
+static const char dev_cmd_min_pkt_size_0[] = "min_pkt_size";
+static const char dev_cmd_min_pkt_size_1[] = "min_pkt_size ";
+static const char dev_cmd_min_pkt_size_2[] = "min_pkt_size 0";
+static const char dev_cmd_min_pkt_size_3[] = "min_pkt_size 1";
+static const char dev_cmd_min_pkt_size_4[] = "min_pkt_size 100";
+static const char dev_cmd_min_pkt_size_5[] = "min_pkt_size=1001";
+static const char dev_cmd_min_pkt_size_6[] = "min_pkt_size =2002";
+static const char dev_cmd_min_pkt_size_7[] = "min_pkt_size= 3003";
+static const char dev_cmd_min_pkt_size_8[] = "min_pkt_size = 4004";
+static const char dev_cmd_max_pkt_size_0[] = "max_pkt_size 200";
+static const char dev_cmd_pkt_size_0[] = "pkt_size 300";
+static const char dev_cmd_imix_weights_0[] = "imix_weights 0,7 576,4 1500,1";
+static const char dev_cmd_imix_weights_1[] = "imix_weights 101,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20";
+static const char dev_cmd_imix_weights_2[] = "imix_weights 100,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20 121,21";
+static const char dev_cmd_imix_weights_3[] = "imix_weights";
+static const char dev_cmd_imix_weights_4[] = "imix_weights ";
+static const char dev_cmd_imix_weights_5[] = "imix_weights 0";
+static const char dev_cmd_imix_weights_6[] = "imix_weights 0,";
+static const char dev_cmd_debug_0[] = "debug 1";
+static const char dev_cmd_debug_1[] = "debug 0";
+static const char dev_cmd_frags_0[] = "frags 100";
+static const char dev_cmd_delay_0[] = "delay 100";
+static const char dev_cmd_delay_1[] = "delay 2147483647";
+static const char dev_cmd_rate_0[] = "rate 0";
+static const char dev_cmd_rate_1[] = "rate 100";
+static const char dev_cmd_ratep_0[] = "ratep 0";
+static const char dev_cmd_ratep_1[] = "ratep 200";
+static const char dev_cmd_udp_src_min_0[] = "udp_src_min 1";
+static const char dev_cmd_udp_dst_min_0[] = "udp_dst_min 2";
+static const char dev_cmd_udp_src_max_0[] = "udp_src_max 3";
+static const char dev_cmd_udp_dst_max_0[] = "udp_dst_max 4";
+static const char dev_cmd_clone_skb_0[] = "clone_skb 1";
+static const char dev_cmd_clone_skb_1[] = "clone_skb 0";
+static const char dev_cmd_count_0[] = "count 100";
+static const char dev_cmd_src_mac_count_0[] = "src_mac_count 100";
+static const char dev_cmd_dst_mac_count_0[] = "dst_mac_count 100";
+static const char dev_cmd_burst_0[] = "burst 0";
+static const char dev_cmd_node_0[] = "node 100";
+static const char dev_cmd_xmit_mode_0[] = "xmit_mode start_xmit";
+static const char dev_cmd_xmit_mode_1[] = "xmit_mode netif_receive";
+static const char dev_cmd_xmit_mode_2[] = "xmit_mode queue_xmit";
+static const char dev_cmd_xmit_mode_3[] = "xmit_mode nonsense";
+static const char dev_cmd_flag_0[] = "flag UDPCSUM";
+static const char dev_cmd_flag_1[] = "flag !UDPCSUM";
+static const char dev_cmd_flag_2[] = "flag nonsense";
+static const char dev_cmd_dst_min_0[] = "dst_min 101.102.103.104";
+static const char dev_cmd_dst_0[] = "dst 101.102.103.104";
+static const char dev_cmd_dst_max_0[] = "dst_max 201.202.203.204";
+static const char dev_cmd_dst6_0[] = "dst6 2001:db38:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_dst6_min_0[] = "dst6_min 2001:db8:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_dst6_max_0[] = "dst6_max 2001:db8:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_src6_0[] = "src6 2001:db38:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_src_min_0[] = "src_min 101.102.103.104";
+static const char dev_cmd_src_max_0[] = "src_max 201.202.203.204";
+static const char dev_cmd_dst_mac_0[] = "dst_mac 01:02:03:04:05:06";
+static const char dev_cmd_src_mac_0[] = "src_mac 11:12:13:14:15:16";
+static const char dev_cmd_clear_counters_0[] = "clear_counters";
+static const char dev_cmd_flows_0[] = "flows 100";
+static const char dev_cmd_spi_0[] = "spi 100";
+static const char dev_cmd_flowlen_0[] = "flowlen 100";
+static const char dev_cmd_queue_map_min_0[] = "queue_map_min 1";
+static const char dev_cmd_queue_map_max_0[] = "queue_map_max 2";
+static const char dev_cmd_mpls_0[] = "mpls 00000001";
+static const char dev_cmd_mpls_1[] = "mpls 00000001,000000f2";
+static const char dev_cmd_mpls_2[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f";
+static const char dev_cmd_mpls_3[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f,00000f10";
+static const char dev_cmd_vlan_id_0[] = "vlan_id 1";
+static const char dev_cmd_vlan_p_0[] = "vlan_p 1";
+static const char dev_cmd_vlan_cfi_0[] = "vlan_cfi 1";
+static const char dev_cmd_vlan_id_1[] = "vlan_id 4096";
+static const char dev_cmd_svlan_id_0[] = "svlan_id 1";
+static const char dev_cmd_svlan_p_0[] = "svlan_p 1";
+static const char dev_cmd_svlan_cfi_0[] = "svlan_cfi 1";
+static const char dev_cmd_svlan_id_1[] = "svlan_id 4096";
+static const char dev_cmd_tos_0[] = "tos 0";
+static const char dev_cmd_tos_1[] = "tos 0f";
+static const char dev_cmd_tos_2[] = "tos 0ff";
+static const char dev_cmd_traffic_class_0[] = "traffic_class f0";
+static const char dev_cmd_skb_priority_0[] = "skb_priority 999";
+
+FIXTURE(proc_net_pktgen) {
+ int ctrl_fd;
+ int thr_fd;
+ int dev_fd;
+};
+
+FIXTURE_SETUP(proc_net_pktgen) {
+ int r;
+ ssize_t len;
+
+ r = system("modprobe pktgen");
+ ASSERT_EQ(r, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?");
+
+ self->ctrl_fd = open("/proc/net/pktgen/pgctrl", O_RDWR);
+ ASSERT_GE(self->ctrl_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?");
+
+ self->thr_fd = open("/proc/net/pktgen/kpktgend_0", O_RDWR);
+ ASSERT_GE(self->thr_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?");
+
+ len = write(self->thr_fd, thr_cmd_add_loopback_0, sizeof(thr_cmd_add_loopback_0));
+ ASSERT_EQ(len, sizeof(thr_cmd_add_loopback_0)) TH_LOG("device lo@0 already registered?");
+
+ self->dev_fd = open("/proc/net/pktgen/lo@0", O_RDWR);
+ ASSERT_GE(self->dev_fd, 0) TH_LOG("device entry for lo@0 missing?");
+}
+
+FIXTURE_TEARDOWN(proc_net_pktgen) {
+ int ret;
+ ssize_t len;
+
+ ret = close(self->dev_fd);
+ EXPECT_EQ(ret, 0);
+
+ len = write(self->thr_fd, thr_cmd_rm_loopback_0, sizeof(thr_cmd_rm_loopback_0));
+ EXPECT_EQ(len, sizeof(thr_cmd_rm_loopback_0));
+
+ ret = close(self->thr_fd);
+ EXPECT_EQ(ret, 0);
+
+ ret = close(self->ctrl_fd);
+ EXPECT_EQ(ret, 0);
+}
+
+TEST_F(proc_net_pktgen, wrong_ctrl_cmd) {
+ for (int i = 0; i <= sizeof(wrong_ctrl_cmd); i++) {
+ ssize_t len;
+
+ len = write(self->ctrl_fd, wrong_ctrl_cmd, i);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+ }
+}
+
+TEST_F(proc_net_pktgen, ctrl_cmd) {
+ ssize_t len;
+
+ len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop));
+ EXPECT_EQ(len, sizeof(ctrl_cmd_stop));
+
+ len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop) - 1);
+ EXPECT_EQ(len, sizeof(ctrl_cmd_stop) - 1);
+
+ len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start));
+ EXPECT_EQ(len, sizeof(ctrl_cmd_start));
+
+ len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start) - 1);
+ EXPECT_EQ(len, sizeof(ctrl_cmd_start) - 1);
+
+ len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset));
+ EXPECT_EQ(len, sizeof(ctrl_cmd_reset));
+
+ len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset) - 1);
+ EXPECT_EQ(len, sizeof(ctrl_cmd_reset) - 1);
+}
+
+TEST_F(proc_net_pktgen, wrong_thr_cmd) {
+ for (int i = 0; i <= sizeof(wrong_thr_cmd); i++) {
+ ssize_t len;
+
+ len = write(self->thr_fd, wrong_thr_cmd, i);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+ }
+}
+
+TEST_F(proc_net_pktgen, legacy_thr_cmd) {
+ for (int i = 0; i <= sizeof(legacy_thr_cmd); i++) {
+ ssize_t len;
+
+ len = write(self->thr_fd, legacy_thr_cmd, i);
+ if (i < (sizeof(legacy_thr_cmd) - 1)) {
+ /* incomplete command string */
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ /* complete command string without/with trailing '\0' */
+ EXPECT_EQ(len, i);
+ }
+ }
+}
+
+TEST_F(proc_net_pktgen, wrong_dev_cmd) {
+ for (int i = 0; i <= sizeof(wrong_dev_cmd); i++) {
+ ssize_t len;
+
+ len = write(self->dev_fd, wrong_dev_cmd, i);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+ }
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_min_pkt_size) {
+ ssize_t len;
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0));
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0) - 1);
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0) - 1);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1));
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1) - 1);
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1) - 1);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2));
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2) - 1);
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2) - 1);
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_3, sizeof(dev_cmd_min_pkt_size_3));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_3));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_4, sizeof(dev_cmd_min_pkt_size_4));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_4));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_5, sizeof(dev_cmd_min_pkt_size_5));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_5));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_6, sizeof(dev_cmd_min_pkt_size_6));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_6));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_7, sizeof(dev_cmd_min_pkt_size_7));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_7));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_8, sizeof(dev_cmd_min_pkt_size_8));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_8));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_max_pkt_size) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_max_pkt_size_0, sizeof(dev_cmd_max_pkt_size_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_max_pkt_size_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_pkt_size) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_pkt_size_0, sizeof(dev_cmd_pkt_size_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_pkt_size_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_imix_weights) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_imix_weights_0, sizeof(dev_cmd_imix_weights_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_0));
+
+ len = write(self->dev_fd, dev_cmd_imix_weights_1, sizeof(dev_cmd_imix_weights_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_1));
+
+ len = write(self->dev_fd, dev_cmd_imix_weights_2, sizeof(dev_cmd_imix_weights_2));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, E2BIG);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3) - 1);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4) - 1);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5) - 1);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6) - 1);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_debug) {
+ ssize_t len;
+
+ /* debug on */
+ len = write(self->dev_fd, dev_cmd_debug_0, sizeof(dev_cmd_debug_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_debug_0));
+
+ /* debug off */
+ len = write(self->dev_fd, dev_cmd_debug_1, sizeof(dev_cmd_debug_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_debug_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_frags) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_frags_0, sizeof(dev_cmd_frags_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_frags_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_delay) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_delay_0, sizeof(dev_cmd_delay_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_delay_0));
+
+ len = write(self->dev_fd, dev_cmd_delay_1, sizeof(dev_cmd_delay_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_delay_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_rate) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_rate_0, sizeof(dev_cmd_rate_0));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ len = write(self->dev_fd, dev_cmd_rate_1, sizeof(dev_cmd_rate_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_rate_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_ratep) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_ratep_0, sizeof(dev_cmd_ratep_0));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ len = write(self->dev_fd, dev_cmd_ratep_1, sizeof(dev_cmd_ratep_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_ratep_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_src_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_udp_src_min_0, sizeof(dev_cmd_udp_src_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_udp_src_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_dst_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_udp_dst_min_0, sizeof(dev_cmd_udp_dst_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_src_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_udp_src_max_0, sizeof(dev_cmd_udp_src_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_udp_src_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_dst_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_udp_dst_max_0, sizeof(dev_cmd_udp_dst_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_clone_skb) {
+ ssize_t len;
+
+ /* clone_skb on (gives EOPNOTSUPP on lo device) */
+ len = write(self->dev_fd, dev_cmd_clone_skb_0, sizeof(dev_cmd_clone_skb_0));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EOPNOTSUPP);
+
+ /* clone_skb off */
+ len = write(self->dev_fd, dev_cmd_clone_skb_1, sizeof(dev_cmd_clone_skb_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_clone_skb_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_count) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_count_0, sizeof(dev_cmd_count_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_count_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_mac_count) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src_mac_count_0, sizeof(dev_cmd_src_mac_count_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src_mac_count_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_mac_count) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_mac_count_0, sizeof(dev_cmd_dst_mac_count_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_count_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_burst) {
+ ssize_t len;
+
+ /* burst off */
+ len = write(self->dev_fd, dev_cmd_burst_0, sizeof(dev_cmd_burst_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_burst_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_node) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_node_0, sizeof(dev_cmd_node_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_node_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_xmit_mode) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_xmit_mode_0, sizeof(dev_cmd_xmit_mode_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_0));
+
+ len = write(self->dev_fd, dev_cmd_xmit_mode_1, sizeof(dev_cmd_xmit_mode_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_1));
+
+ len = write(self->dev_fd, dev_cmd_xmit_mode_2, sizeof(dev_cmd_xmit_mode_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_2));
+
+ len = write(self->dev_fd, dev_cmd_xmit_mode_3, sizeof(dev_cmd_xmit_mode_3));
+ EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_3));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_flag) {
+ ssize_t len;
+
+ /* flag UDPCSUM on */
+ len = write(self->dev_fd, dev_cmd_flag_0, sizeof(dev_cmd_flag_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_flag_0));
+
+ /* flag UDPCSUM off */
+ len = write(self->dev_fd, dev_cmd_flag_1, sizeof(dev_cmd_flag_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_flag_1));
+
+ /* flag invalid */
+ len = write(self->dev_fd, dev_cmd_flag_2, sizeof(dev_cmd_flag_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_flag_2));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_min_0, sizeof(dev_cmd_dst_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_0, sizeof(dev_cmd_dst_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_max_0, sizeof(dev_cmd_dst_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst6) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst6_0, sizeof(dev_cmd_dst6_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst6_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst6_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst6_min_0, sizeof(dev_cmd_dst6_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst6_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst6_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst6_max_0, sizeof(dev_cmd_dst6_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst6_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src6) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src6_0, sizeof(dev_cmd_src6_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src6_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src_min_0, sizeof(dev_cmd_src_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src_max_0, sizeof(dev_cmd_src_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_mac) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_mac_0, sizeof(dev_cmd_dst_mac_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_mac) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src_mac_0, sizeof(dev_cmd_src_mac_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src_mac_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_clear_counters) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_clear_counters_0, sizeof(dev_cmd_clear_counters_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_clear_counters_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_flows) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_flows_0, sizeof(dev_cmd_flows_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_flows_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_spi) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_spi_0, sizeof(dev_cmd_spi_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_spi_0)) TH_LOG("CONFIG_XFRM not enabled?");
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_flowlen) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_flowlen_0, sizeof(dev_cmd_flowlen_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_flowlen_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_queue_map_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_queue_map_min_0, sizeof(dev_cmd_queue_map_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_queue_map_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_queue_map_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_queue_map_max_0, sizeof(dev_cmd_queue_map_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_queue_map_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_mpls) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_mpls_0, sizeof(dev_cmd_mpls_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_mpls_0));
+
+ len = write(self->dev_fd, dev_cmd_mpls_1, sizeof(dev_cmd_mpls_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_mpls_1));
+
+ len = write(self->dev_fd, dev_cmd_mpls_2, sizeof(dev_cmd_mpls_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_mpls_2));
+
+ len = write(self->dev_fd, dev_cmd_mpls_3, sizeof(dev_cmd_mpls_3));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, E2BIG);
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_vlan_id) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_vlan_id_0, sizeof(dev_cmd_vlan_id_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_0));
+
+ len = write(self->dev_fd, dev_cmd_vlan_p_0, sizeof(dev_cmd_vlan_p_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_vlan_p_0));
+
+ len = write(self->dev_fd, dev_cmd_vlan_cfi_0, sizeof(dev_cmd_vlan_cfi_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_vlan_cfi_0));
+
+ len = write(self->dev_fd, dev_cmd_vlan_id_1, sizeof(dev_cmd_vlan_id_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_svlan_id) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_svlan_id_0, sizeof(dev_cmd_svlan_id_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_0));
+
+ len = write(self->dev_fd, dev_cmd_svlan_p_0, sizeof(dev_cmd_svlan_p_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_svlan_p_0));
+
+ len = write(self->dev_fd, dev_cmd_svlan_cfi_0, sizeof(dev_cmd_svlan_cfi_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_svlan_cfi_0));
+
+ len = write(self->dev_fd, dev_cmd_svlan_id_1, sizeof(dev_cmd_svlan_id_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_1));
+}
+
+
+TEST_F(proc_net_pktgen, dev_cmd_tos) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_tos_0, sizeof(dev_cmd_tos_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_tos_0));
+
+ len = write(self->dev_fd, dev_cmd_tos_1, sizeof(dev_cmd_tos_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_tos_1));
+
+ len = write(self->dev_fd, dev_cmd_tos_2, sizeof(dev_cmd_tos_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_tos_2));
+}
+
+
+TEST_F(proc_net_pktgen, dev_cmd_traffic_class) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_traffic_class_0, sizeof(dev_cmd_traffic_class_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_traffic_class_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_skb_priority) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_skb_priority_0, sizeof(dev_cmd_skb_priority_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_skb_priority_0));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 1a736f700be4..ab8d8b7e6cb0 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -48,17 +48,45 @@
#include <string.h>
#include <sys/mman.h>
#include <sys/socket.h>
+#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "psock_lib.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define RING_NUM_FRAMES 20
static uint32_t cfg_max_num_members;
+static void loopback_set_up_down(int state_up)
+{
+ struct ifreq ifreq = {};
+ int fd, err;
+
+ fd = socket(AF_PACKET, SOCK_RAW, 0);
+ if (fd < 0) {
+ perror("socket loopback");
+ exit(1);
+ }
+ strcpy(ifreq.ifr_name, "lo");
+ err = ioctl(fd, SIOCGIFFLAGS, &ifreq);
+ if (err) {
+ perror("SIOCGIFFLAGS");
+ exit(1);
+ }
+ if (state_up != !!(ifreq.ifr_flags & IFF_UP)) {
+ ifreq.ifr_flags ^= IFF_UP;
+ err = ioctl(fd, SIOCSIFFLAGS, &ifreq);
+ if (err) {
+ perror("SIOCSIFFLAGS");
+ exit(1);
+ }
+ }
+ close(fd);
+}
+
/* Open a socket in a given fanout mode.
* @return -1 if mode is bad, a valid socket otherwise */
static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
@@ -165,9 +193,9 @@ static void sock_fanout_set_ebpf(int fd)
attr.insns = (unsigned long) prog;
attr.insn_cnt = ARRAY_SIZE(prog);
attr.license = (unsigned long) "GPL";
- attr.log_buf = (unsigned long) log_buf,
- attr.log_size = sizeof(log_buf),
- attr.log_level = 1,
+ attr.log_buf = (unsigned long) log_buf;
+ attr.log_size = sizeof(log_buf);
+ attr.log_level = 1;
pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
if (pfd < 0) {
@@ -251,6 +279,41 @@ static int sock_fanout_read(int fds[], char *rings[], const int expect[])
return 0;
}
+/* Test that creating/joining a fanout group fails for unbound socket without
+ * a specified protocol
+ */
+static void test_unbound_fanout(void)
+{
+ int val, fd0, fd1, err;
+
+ fprintf(stderr, "test: unbound fanout\n");
+ fd0 = socket(PF_PACKET, SOCK_RAW, 0);
+ if (fd0 < 0) {
+ perror("socket packet");
+ exit(1);
+ }
+ /* Try to create a new fanout group. Should fail. */
+ val = (PACKET_FANOUT_HASH << 16) | 1;
+ err = setsockopt(fd0, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val));
+ if (!err) {
+ fprintf(stderr, "ERROR: unbound socket fanout create\n");
+ exit(1);
+ }
+ fd1 = sock_fanout_open(PACKET_FANOUT_HASH, 1);
+ if (fd1 == -1) {
+ fprintf(stderr, "ERROR: failed to open HASH socket\n");
+ exit(1);
+ }
+ /* Try to join an existing fanout group. Should fail. */
+ err = setsockopt(fd0, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val));
+ if (!err) {
+ fprintf(stderr, "ERROR: unbound socket fanout join\n");
+ exit(1);
+ }
+ close(fd0);
+ close(fd1);
+}
+
/* Test illegal mode + flag combination */
static void test_control_single(void)
{
@@ -264,17 +327,22 @@ static void test_control_single(void)
}
/* Test illegal group with different modes or flags */
-static void test_control_group(void)
+static void test_control_group(int toggle)
{
int fds[2];
- fprintf(stderr, "test: control multiple sockets\n");
+ if (toggle)
+ fprintf(stderr, "test: control multiple sockets with link down toggle\n");
+ else
+ fprintf(stderr, "test: control multiple sockets\n");
fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
if (fds[0] == -1) {
fprintf(stderr, "ERROR: failed to open HASH socket\n");
exit(1);
}
+ if (toggle)
+ loopback_set_up_down(0);
if (sock_fanout_open(PACKET_FANOUT_HASH |
PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) {
fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
@@ -294,6 +362,8 @@ static void test_control_group(void)
fprintf(stderr, "ERROR: failed to join group\n");
exit(1);
}
+ if (toggle)
+ loopback_set_up_down(1);
if (close(fds[1]) || close(fds[0])) {
fprintf(stderr, "ERROR: closing sockets\n");
exit(1);
@@ -488,8 +558,10 @@ int main(int argc, char **argv)
const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } };
int port_off = 2, tries = 20, ret;
+ test_unbound_fanout();
test_control_single();
- test_control_group();
+ test_control_group(0);
+ test_control_group(1);
test_control_group_max_num_members();
test_unique_fanout_group_ids();
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
index 6e4fef560873..067265b0a554 100644
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -22,10 +22,6 @@
#define PORT_BASE 8000
-#ifndef __maybe_unused
-# define __maybe_unused __attribute__ ((__unused__))
-#endif
-
static __maybe_unused void pair_udp_setfilter(int fd)
{
/* the filter below checks for all of the following conditions that
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 404a2ce759ab..7caf3135448d 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -12,7 +12,7 @@
*
* Datapath:
* Open a pair of packet sockets and send resp. receive an a priori known
- * packet pattern accross the sockets and check if it was received resp.
+ * packet pattern across the sockets and check if it was received resp.
* sent correctly. Fanout in combination with RX_RING is currently not
* tested here.
*
@@ -22,6 +22,7 @@
* - TPACKET_V3: RX_RING
*/
+#undef NDEBUG
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
@@ -33,7 +34,6 @@
#include <ctype.h>
#include <fcntl.h>
#include <unistd.h>
-#include <bits/wordsize.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
@@ -46,7 +46,7 @@
#include "psock_lib.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef bug_on
# define bug_on(cond) assert(!(cond))
@@ -785,7 +785,7 @@ static int test_kernel_bit_width(void)
static int test_user_bit_width(void)
{
- return __WORDSIZE;
+ return sizeof(long) * 8;
}
static const char *tpacket_str[] = {
diff --git a/tools/testing/selftests/net/rds/.gitignore b/tools/testing/selftests/net/rds/.gitignore
new file mode 100644
index 000000000000..1c6f04e2aa11
--- /dev/null
+++ b/tools/testing/selftests/net/rds/.gitignore
@@ -0,0 +1 @@
+include.sh
diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile
new file mode 100644
index 000000000000..762845cc973c
--- /dev/null
+++ b/tools/testing/selftests/net/rds/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+
+all:
+ @echo mk_build_dir="$(shell pwd)" > include.sh
+
+TEST_PROGS := run.sh
+
+TEST_FILES := \
+ include.sh \
+ test.py \
+# end of TEST_FILES
+
+EXTRA_CLEAN := \
+ include.sh \
+ /tmp/rds_logs \
+# end of EXTRA_CLEAN
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt
new file mode 100644
index 000000000000..cbde2951ab13
--- /dev/null
+++ b/tools/testing/selftests/net/rds/README.txt
@@ -0,0 +1,41 @@
+RDS self-tests
+==============
+
+These scripts provide a coverage test for RDS-TCP by creating two
+network namespaces and running rds packets between them. A loopback
+network is provisioned with optional probability of packet loss or
+corruption. A workload of 50000 hashes, each 64 characters in size,
+are passed over an RDS socket on this test network. A passing test means
+the RDS-TCP stack was able to recover properly. The provided config.sh
+can be used to compile the kernel with the necessary gcov options. The
+kernel may optionally be configured to omit the coverage report as well.
+
+USAGE:
+ run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]
+ [-u packet_duplcate]
+
+OPTIONS:
+ -d Log directory. Defaults to tools/testing/selftests/net/rds/rds_logs
+
+ -l Simulates a percentage of packet loss
+
+ -c Simulates a percentage of packet corruption
+
+ -u Simulates a percentage of packet duplication.
+
+EXAMPLE:
+
+ # Create a suitable gcov enabled .config
+ tools/testing/selftests/net/rds/config.sh -g
+
+ # Alternatly create a gcov disabled .config
+ tools/testing/selftests/net/rds/config.sh
+
+ # build the kernel
+ vng --build --config tools/testing/selftests/net/config
+
+ # launch the tests in a VM
+ vng -v --rwdir ./ --run . --user root --cpus 4 -- \
+ "export PYTHONPATH=tools/testing/selftests/net/; tools/testing/selftests/net/rds/run.sh"
+
+An HTML coverage report will be output in tools/testing/selftests/net/rds/rds_logs/coverage/.
diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh
new file mode 100755
index 000000000000..791c8dbe1095
--- /dev/null
+++ b/tools/testing/selftests/net/rds/config.sh
@@ -0,0 +1,53 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+set -x
+
+unset KBUILD_OUTPUT
+
+GENERATE_GCOV_REPORT=0
+while getopts "g" opt; do
+ case ${opt} in
+ g)
+ GENERATE_GCOV_REPORT=1
+ ;;
+ :)
+ echo "USAGE: config.sh [-g]"
+ exit 1
+ ;;
+ ?)
+ echo "Invalid option: -${OPTARG}."
+ exit 1
+ ;;
+ esac
+done
+
+CONF_FILE="tools/testing/selftests/net/config"
+
+# no modules
+scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES
+
+# enable RDS
+scripts/config --file "$CONF_FILE" --enable CONFIG_RDS
+scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP
+
+if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then
+ # instrument RDS and only RDS
+ scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL
+ scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL
+ scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS
+else
+ scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL
+ scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL
+ scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS
+fi
+
+# need network namespaces to run tests with veth network interfaces
+scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS
+scripts/config --file "$CONF_FILE" --enable CONFIG_VETH
+
+# simulate packet loss
+scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM
+
diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh
new file mode 100755
index 000000000000..8aee244f582a
--- /dev/null
+++ b/tools/testing/selftests/net/rds/run.sh
@@ -0,0 +1,224 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+
+unset KBUILD_OUTPUT
+
+current_dir="$(realpath "$(dirname "$0")")"
+build_dir="$current_dir"
+
+build_include="$current_dir/include.sh"
+if test -f "$build_include"; then
+ # this include will define "$mk_build_dir" as the location the test was
+ # built. We will need this if the tests are installed in a location
+ # other than the kernel source
+
+ source "$build_include"
+ build_dir="$mk_build_dir"
+fi
+
+# This test requires kernel source and the *.gcda data therein
+# Locate the top level of the kernel source, and the net/rds
+# subfolder with the appropriate *.gcno object files
+ksrc_dir="$(realpath "$build_dir"/../../../../../)"
+kconfig="$ksrc_dir/.config"
+obj_dir="$ksrc_dir/net/rds"
+
+GCOV_CMD=gcov
+
+#check to see if the host has the required packages to generate a gcov report
+check_gcov_env()
+{
+ if ! which "$GCOV_CMD" > /dev/null 2>&1; then
+ echo "Warning: Could not find gcov. "
+ GENERATE_GCOV_REPORT=0
+ return
+ fi
+
+ # the gcov version must match the gcc version
+ GCC_VER=$(gcc -dumpfullversion)
+ GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| awk 'BEGIN {FS="-"}{print $1}')
+ if [ "$GCOV_VER" != "$GCC_VER" ]; then
+ #attempt to find a matching gcov version
+ GCOV_CMD=gcov-$(gcc -dumpversion)
+
+ if ! which "$GCOV_CMD" > /dev/null 2>&1; then
+ echo "Warning: Could not find an appropriate gcov installation. \
+ gcov version must match gcc version"
+ GENERATE_GCOV_REPORT=0
+ return
+ fi
+
+ #recheck version number of found gcov executable
+ GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| \
+ awk 'BEGIN {FS="-"}{print $1}')
+ if [ "$GCOV_VER" != "$GCC_VER" ]; then
+ echo "Warning: Could not find an appropriate gcov installation. \
+ gcov version must match gcc version"
+ GENERATE_GCOV_REPORT=0
+ else
+ echo "Warning: Mismatched gcc and gcov detected. Using $GCOV_CMD"
+ fi
+ fi
+}
+
+# Check to see if the kconfig has the required configs to generate a coverage report
+check_gcov_conf()
+{
+ if ! grep -x "CONFIG_GCOV_PROFILE_RDS=y" "$kconfig" > /dev/null 2>&1; then
+ echo "INFO: CONFIG_GCOV_PROFILE_RDS should be enabled for coverage reports"
+ GENERATE_GCOV_REPORT=0
+ fi
+ if ! grep -x "CONFIG_GCOV_KERNEL=y" "$kconfig" > /dev/null 2>&1; then
+ echo "INFO: CONFIG_GCOV_KERNEL should be enabled for coverage reports"
+ GENERATE_GCOV_REPORT=0
+ fi
+ if grep -x "CONFIG_GCOV_PROFILE_ALL=y" "$kconfig" > /dev/null 2>&1; then
+ echo "INFO: CONFIG_GCOV_PROFILE_ALL should be disabled for coverage reports"
+ GENERATE_GCOV_REPORT=0
+ fi
+
+ if [ "$GENERATE_GCOV_REPORT" -eq 0 ]; then
+ echo "To enable gcov reports, please run "\
+ "\"tools/testing/selftests/net/rds/config.sh -g\" and rebuild the kernel"
+ else
+ # if we have the required kernel configs, proceed to check the environment to
+ # ensure we have the required gcov packages
+ check_gcov_env
+ fi
+}
+
+# Kselftest framework requirement - SKIP code is 4.
+check_conf_enabled() {
+ if ! grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then
+ echo "selftests: [SKIP] This test requires $1 enabled"
+ echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel"
+ exit 4
+ fi
+}
+check_conf_disabled() {
+ if grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then
+ echo "selftests: [SKIP] This test requires $1 disabled"
+ echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel"
+ exit 4
+ fi
+}
+check_conf() {
+ check_conf_enabled CONFIG_NET_SCH_NETEM
+ check_conf_enabled CONFIG_VETH
+ check_conf_enabled CONFIG_NET_NS
+ check_conf_enabled CONFIG_RDS_TCP
+ check_conf_enabled CONFIG_RDS
+ check_conf_disabled CONFIG_MODULES
+}
+
+check_env()
+{
+ if ! test -d "$obj_dir"; then
+ echo "selftests: [SKIP] This test requires a kernel source tree"
+ exit 4
+ fi
+ if ! test -e "$kconfig"; then
+ echo "selftests: [SKIP] This test requires a configured kernel source tree"
+ exit 4
+ fi
+ if ! which strace > /dev/null 2>&1; then
+ echo "selftests: [SKIP] Could not run test without strace"
+ exit 4
+ fi
+ if ! which tcpdump > /dev/null 2>&1; then
+ echo "selftests: [SKIP] Could not run test without tcpdump"
+ exit 4
+ fi
+
+ if ! which python3 > /dev/null 2>&1; then
+ echo "selftests: [SKIP] Could not run test without python3"
+ exit 4
+ fi
+
+ python_major=$(python3 -c "import sys; print(sys.version_info[0])")
+ python_minor=$(python3 -c "import sys; print(sys.version_info[1])")
+ if [[ python_major -lt 3 || ( python_major -eq 3 && python_minor -lt 9 ) ]] ; then
+ echo "selftests: [SKIP] Could not run test without at least python3.9"
+ python3 -V
+ exit 4
+ fi
+}
+
+LOG_DIR="$current_dir"/rds_logs
+PLOSS=0
+PCORRUPT=0
+PDUP=0
+GENERATE_GCOV_REPORT=1
+while getopts "d:l:c:u:" opt; do
+ case ${opt} in
+ d)
+ LOG_DIR=${OPTARG}
+ ;;
+ l)
+ PLOSS=${OPTARG}
+ ;;
+ c)
+ PCORRUPT=${OPTARG}
+ ;;
+ u)
+ PDUP=${OPTARG}
+ ;;
+ :)
+ echo "USAGE: run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]" \
+ "[-u packet_duplcate] [-g]"
+ exit 1
+ ;;
+ ?)
+ echo "Invalid option: -${OPTARG}."
+ exit 1
+ ;;
+ esac
+done
+
+
+check_env
+check_conf
+check_gcov_conf
+
+
+rm -fr "$LOG_DIR"
+TRACE_FILE="${LOG_DIR}/rds-strace.txt"
+COVR_DIR="${LOG_DIR}/coverage/"
+mkdir -p "$LOG_DIR"
+mkdir -p "$COVR_DIR"
+
+set +e
+echo running RDS tests...
+echo Traces will be logged to "$TRACE_FILE"
+rm -f "$TRACE_FILE"
+strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \
+ -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP"
+
+test_rc=$?
+dmesg > "${LOG_DIR}/dmesg.out"
+
+if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then
+ echo saving coverage data...
+ (set +x; cd /sys/kernel/debug/gcov; find ./* -name '*.gcda' | \
+ while read -r f
+ do
+ cat < "/sys/kernel/debug/gcov/$f" > "/$f"
+ done)
+
+ echo running gcovr...
+ gcovr -s --html-details --gcov-executable "$GCOV_CMD" --gcov-ignore-parse-errors \
+ -o "${COVR_DIR}/gcovr" "${ksrc_dir}/net/rds/"
+else
+ echo "Coverage report will be skipped"
+fi
+
+if [ "$test_rc" -eq 0 ]; then
+ echo "PASS: Test completed successfully"
+else
+ echo "FAIL: Test failed"
+fi
+
+exit "$test_rc"
diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py
new file mode 100755
index 000000000000..4a7178d11193
--- /dev/null
+++ b/tools/testing/selftests/net/rds/test.py
@@ -0,0 +1,265 @@
+#! /usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import argparse
+import ctypes
+import errno
+import hashlib
+import os
+import select
+import signal
+import socket
+import subprocess
+import sys
+import atexit
+from pwd import getpwuid
+from os import stat
+
+# Allow utils module to be imported from different directory
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(this_dir, "../"))
+from lib.py.utils import ip
+
+libc = ctypes.cdll.LoadLibrary('libc.so.6')
+setns = libc.setns
+
+net0 = 'net0'
+net1 = 'net1'
+
+veth0 = 'veth0'
+veth1 = 'veth1'
+
+# Helper function for creating a socket inside a network namespace.
+# We need this because otherwise RDS will detect that the two TCP
+# sockets are on the same interface and use the loop transport instead
+# of the TCP transport.
+def netns_socket(netns, *args):
+ u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET)
+
+ child = os.fork()
+ if child == 0:
+ # change network namespace
+ with open(f'/var/run/netns/{netns}') as f:
+ try:
+ ret = setns(f.fileno(), 0)
+ except IOError as e:
+ print(e.errno)
+ print(e)
+
+ # create socket in target namespace
+ s = socket.socket(*args)
+
+ # send resulting socket to parent
+ socket.send_fds(u0, [], [s.fileno()])
+
+ sys.exit(0)
+
+ # receive socket from child
+ _, s, _, _ = socket.recv_fds(u1, 0, 1)
+ os.waitpid(child, 0)
+ u0.close()
+ u1.close()
+ return socket.fromfd(s[0], *args)
+
+def signal_handler(sig, frame):
+ print('Test timed out')
+ sys.exit(1)
+
+#Parse out command line arguments. We take an optional
+# timeout parameter and an optional log output folder
+parser = argparse.ArgumentParser(description="init script args",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("-d", "--logdir", action="store",
+ help="directory to store logs", default="/tmp")
+parser.add_argument('--timeout', help="timeout to terminate hung test",
+ type=int, default=0)
+parser.add_argument('-l', '--loss', help="Simulate tcp packet loss",
+ type=int, default=0)
+parser.add_argument('-c', '--corruption', help="Simulate tcp packet corruption",
+ type=int, default=0)
+parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication",
+ type=int, default=0)
+args = parser.parse_args()
+logdir=args.logdir
+packet_loss=str(args.loss)+'%'
+packet_corruption=str(args.corruption)+'%'
+packet_duplicate=str(args.duplicate)+'%'
+
+ip(f"netns add {net0}")
+ip(f"netns add {net1}")
+ip(f"link add type veth")
+
+addrs = [
+ # we technically don't need different port numbers, but this will
+ # help identify traffic in the network analyzer
+ ('10.0.0.1', 10000),
+ ('10.0.0.2', 20000),
+]
+
+# move interfaces to separate namespaces so they can no longer be
+# bound directly; this prevents rds from switching over from the tcp
+# transport to the loop transport.
+ip(f"link set {veth0} netns {net0} up")
+ip(f"link set {veth1} netns {net1} up")
+
+
+
+# add addresses
+ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}")
+ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}")
+
+# add routes
+ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}")
+ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}")
+
+# sanity check that our two interfaces/addresses are correctly set up
+# and communicating by doing a single ping
+ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}")
+
+# Start a packet capture on each network
+for net in [net0, net1]:
+ tcpdump_pid = os.fork()
+ if tcpdump_pid == 0:
+ pcap = logdir+'/'+net+'.pcap'
+ subprocess.check_call(['touch', pcap])
+ user = getpwuid(stat(pcap).st_uid).pw_name
+ ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}")
+ sys.exit(0)
+
+# simulate packet loss, duplication and corruption
+for net, iface in [(net0, veth0), (net1, veth1)]:
+ ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem \
+ corrupt {packet_corruption} loss {packet_loss} duplicate \
+ {packet_duplicate}")
+
+# add a timeout
+if args.timeout > 0:
+ signal.alarm(args.timeout)
+ signal.signal(signal.SIGALRM, signal_handler)
+
+sockets = [
+ netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET),
+ netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET),
+]
+
+for s, addr in zip(sockets, addrs):
+ s.bind(addr)
+ s.setblocking(0)
+
+fileno_to_socket = {
+ s.fileno(): s for s in sockets
+}
+
+addr_to_socket = {
+ addr: s for addr, s in zip(addrs, sockets)
+}
+
+socket_to_addr = {
+ s: addr for addr, s in zip(addrs, sockets)
+}
+
+send_hashes = {}
+recv_hashes = {}
+
+ep = select.epoll()
+
+for s in sockets:
+ ep.register(s, select.EPOLLRDNORM)
+
+n = 50000
+nr_send = 0
+nr_recv = 0
+
+while nr_send < n:
+ # Send as much as we can without blocking
+ print("sending...", nr_send, nr_recv)
+ while nr_send < n:
+ send_data = hashlib.sha256(
+ f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8')
+
+ # pseudo-random send/receive pattern
+ sender = sockets[nr_send % 2]
+ receiver = sockets[1 - (nr_send % 3) % 2]
+
+ try:
+ sender.sendto(send_data, socket_to_addr[receiver])
+ send_hashes.setdefault((sender.fileno(), receiver.fileno()),
+ hashlib.sha256()).update(f'<{send_data}>'.encode('utf-8'))
+ nr_send = nr_send + 1
+ except BlockingIOError as e:
+ break
+ except OSError as e:
+ if e.errno in [errno.ENOBUFS, errno.ECONNRESET, errno.EPIPE]:
+ break
+ raise
+
+ # Receive as much as we can without blocking
+ print("receiving...", nr_send, nr_recv)
+ while nr_recv < nr_send:
+ for fileno, eventmask in ep.poll():
+ receiver = fileno_to_socket[fileno]
+
+ if eventmask & select.EPOLLRDNORM:
+ while True:
+ try:
+ recv_data, address = receiver.recvfrom(1024)
+ sender = addr_to_socket[address]
+ recv_hashes.setdefault((sender.fileno(),
+ receiver.fileno()), hashlib.sha256()).update(
+ f'<{recv_data}>'.encode('utf-8'))
+ nr_recv = nr_recv + 1
+ except BlockingIOError as e:
+ break
+
+ # exercise net/rds/tcp.c:rds_tcp_sysctl_reset()
+ for net in [net0, net1]:
+ ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000")
+ ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000")
+
+print("done", nr_send, nr_recv)
+
+# the Python socket module doesn't know these
+RDS_INFO_FIRST = 10000
+RDS_INFO_LAST = 10017
+
+nr_success = 0
+nr_error = 0
+
+for s in sockets:
+ for optname in range(RDS_INFO_FIRST, RDS_INFO_LAST + 1):
+ # Sigh, the Python socket module doesn't allow us to pass
+ # buffer lengths greater than 1024 for some reason. RDS
+ # wants multiple pages.
+ try:
+ s.getsockopt(socket.SOL_RDS, optname, 1024)
+ nr_success = nr_success + 1
+ except OSError as e:
+ nr_error = nr_error + 1
+ if e.errno == errno.ENOSPC:
+ # ignore
+ pass
+
+print(f"getsockopt(): {nr_success}/{nr_error}")
+
+print("Stopping network packet captures")
+subprocess.check_call(['killall', '-q', 'tcpdump'])
+
+# We're done sending and receiving stuff, now let's check if what
+# we received is what we sent.
+for (sender, receiver), send_hash in send_hashes.items():
+ recv_hash = recv_hashes.get((sender, receiver))
+
+ if recv_hash is None:
+ print("FAIL: No data received")
+ sys.exit(1)
+
+ if send_hash.hexdigest() != recv_hash.hexdigest():
+ print("FAIL: Send/recv mismatch")
+ print("hash expected:", send_hash.hexdigest())
+ print("hash received:", recv_hash.hexdigest())
+ sys.exit(1)
+
+ print(f"{sender}/{receiver}: ok")
+
+print("Success")
+sys.exit(0)
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
index 066efd30e294..5aad27a0d13a 100644
--- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -22,7 +22,7 @@
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
struct reuse_opts {
int reuseaddr[2];
@@ -112,7 +112,7 @@ TEST(reuseaddr_ports_exhausted_reusable_same_euid)
ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
if (opts->reuseport[0] && opts->reuseport[1]) {
- EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened.");
+ EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets successfully listened.");
} else {
EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations.");
}
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
index b8475cb29be7..1c43401a1c80 100644
--- a/tools/testing/selftests/net/reuseport_addr_any.c
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -9,7 +9,6 @@
#include <arpa/inet.h>
#include <errno.h>
#include <error.h>
-#include <linux/dccp.h>
#include <linux/in.h>
#include <linux/unistd.h>
#include <stdbool.h>
@@ -21,10 +20,6 @@
#include <sys/socket.h>
#include <unistd.h>
-#ifndef SOL_DCCP
-#define SOL_DCCP 269
-#endif
-
static const char *IP4_ADDR = "127.0.0.1";
static const char *IP6_ADDR = "::1";
static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
@@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count,
if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
error(1, errno, "tcp: failed to listen on receive port");
- else if (proto == SOCK_DCCP) {
- if (setsockopt(rcv_fds[i], SOL_DCCP,
- DCCP_SOCKOPT_SERVICE,
- &(int) {htonl(42)}, sizeof(int)))
- error(1, errno, "failed to setsockopt");
-
- if (listen(rcv_fds[i], 10))
- error(1, errno, "dccp: failed to listen on receive port");
- }
}
}
@@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto)
if (fd < 0)
error(1, errno, "failed to create send socket");
- if (proto == SOCK_DCCP &&
- setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE,
- &(int){htonl(42)}, sizeof(int)))
- error(1, errno, "failed to setsockopt");
-
if (bind(fd, saddr, sz))
error(1, errno, "failed to bind send socket");
@@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto)
if (i < 0)
error(1, errno, "epoll_wait failed");
- if (proto == SOCK_STREAM || proto == SOCK_DCCP) {
+ if (proto == SOCK_STREAM) {
fd = accept(ev.data.fd, NULL, NULL);
if (fd < 0)
error(1, errno, "failed to accept");
@@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto,
static void test_proto(int proto, const char *proto_str)
{
- if (proto == SOCK_DCCP) {
- int test_fd;
-
- test_fd = socket(AF_INET, proto, 0);
- if (test_fd < 0) {
- if (errno == ESOCKTNOSUPPORT) {
- fprintf(stderr, "DCCP not supported: skipping DCCP tests\n");
- return;
- } else
- error(1, errno, "failed to create a DCCP socket");
- }
- close(test_fd);
- }
-
fprintf(stderr, "%s IPv4 ... ", proto_str);
run_one_test(AF_INET, AF_INET, proto, IP4_ADDR);
@@ -271,7 +238,6 @@ int main(void)
{
test_proto(SOCK_DGRAM, "UDP");
test_proto(SOCK_STREAM, "TCP");
- test_proto(SOCK_DCCP, "DCCP");
fprintf(stderr, "SUCCESS\n");
return 0;
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index 65aea27d761c..b6634d6da3d6 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -24,7 +24,7 @@
#include <sys/resource.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
struct test_params {
int recv_family;
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
index c9ba36aa688e..2ffd957ffb15 100644
--- a/tools/testing/selftests/net/reuseport_bpf_numa.c
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -23,7 +23,7 @@
#include <unistd.h>
#include <numa.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static const int PORT = 8888;
diff --git a/tools/testing/selftests/net/route_hint.sh b/tools/testing/selftests/net/route_hint.sh
new file mode 100755
index 000000000000..2db01ece0cc1
--- /dev/null
+++ b/tools/testing/selftests/net/route_hint.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test ensures directed broadcast routes use dst hint mechanism
+
+source lib.sh
+
+CLIENT_IP4="192.168.0.1"
+SERVER_IP4="192.168.0.2"
+BROADCAST_ADDRESS="192.168.0.255"
+
+setup() {
+ setup_ns CLIENT_NS SERVER_NS
+
+ ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}/24" dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+ ip -net "${SERVER_NS}" addr add "${SERVER_IP4}/24" dev link1
+
+ ip netns exec "${CLIENT_NS}" ethtool -K link0 tcp-segmentation-offload off
+ ip netns exec "${SERVER_NS}" sh -c "echo 500000000 > /sys/class/net/link1/gro_flush_timeout"
+ ip netns exec "${SERVER_NS}" sh -c "echo 1 > /sys/class/net/link1/napi_defer_hard_irqs"
+ ip netns exec "${SERVER_NS}" ethtool -K link1 generic-receive-offload on
+}
+
+cleanup() {
+ ip -net "${SERVER_NS}" link del link1
+ cleanup_ns "${CLIENT_NS}" "${SERVER_NS}"
+}
+
+directed_bcast_hint_test()
+{
+ local rc=0
+
+ echo "Testing for directed broadcast route hint"
+
+ orig_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd')
+ ip netns exec "${CLIENT_NS}" mausezahn link0 -a own -b bcast -A "${CLIENT_IP4}" \
+ -B "${BROADCAST_ADDRESS}" -c1 -t tcp "sp=1-100,dp=1234,s=1,a=0" -p 5 -q
+ sleep 1
+ new_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd')
+
+ res=$(echo "${new_in_brd} - ${orig_in_brd}" | bc)
+
+ if [ "${res}" -lt 100 ]; then
+ echo "[ OK ]"
+ rc="${ksft_pass}"
+ else
+ echo "[FAIL] expected in_brd to be under 100, got ${res}"
+ rc="${ksft_fail}"
+ fi
+
+ return "${rc}"
+}
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit "${ksft_skip}"
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test without jq tool"
+ exit "${ksft_skip}"
+fi
+
+if [ ! -x "$(command -v bc)" ]; then
+ echo "SKIP: Could not run test without bc tool"
+ exit "${ksft_skip}"
+fi
+
+trap cleanup EXIT
+
+setup
+
+directed_bcast_hint_test
+exit $?
diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
index 4287a8529890..b200019b3c80 100755
--- a/tools/testing/selftests/net/rps_default_mask.sh
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -54,16 +54,16 @@ cleanup
echo 1 > /proc/sys/net/core/rps_default_mask
setup
-chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK
+chk_rps "changing rps_default_mask doesn't affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK
echo 3 > /proc/sys/net/core/rps_default_mask
-chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0
+chk_rps "changing rps_default_mask doesn't affect existing netns" $NETNS lo 0
ip link add name $VETH type veth peer netns $NETNS name $VETH
ip link set dev $VETH up
ip -n $NETNS link set dev $VETH up
-chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3
-chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0
+chk_rps "changing rps_default_mask affects newly created devices" "" $VETH 3
+chk_rps "changing rps_default_mask doesn't affect newly child netns[II]" $NETNS $VETH 0
ip link del dev $VETH
ip netns del $NETNS
@@ -72,8 +72,8 @@ chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0
ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1
ip link add name $VETH type veth peer netns $NETNS name $VETH
-chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK
+chk_rps "changing rps_default_mask in child ns doesn't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK
chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1
-chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0
+chk_rps "changing rps_default_mask in child ns doesn't affect existing devices" $NETNS lo 0
exit $ret
diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py
new file mode 100755
index 000000000000..e9ad5e88da97
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_exit, ksft_run, ksft_ge, RtnlAddrFamily
+import socket
+
+IPV4_ALL_HOSTS_MULTICAST = b'\xe0\x00\x00\x01'
+
+def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None:
+ """
+ Verify that at least one interface has the IPv4 all-hosts multicast address.
+ At least the loopback interface should have this address.
+ """
+
+ addresses = rtnl.getmulticast({"ifa-family": socket.AF_INET}, dump=True)
+
+ all_host_multicasts = [
+ addr for addr in addresses if addr['multicast'] == IPV4_ALL_HOSTS_MULTICAST
+ ]
+
+ ksft_ge(len(all_host_multicasts), 1,
+ "No interface found with the IPv4 all-hosts multicast address")
+
+def main() -> None:
+ rtnl = RtnlAddrFamily()
+ ksft_run([dump_mcaddr_check], args=(rtnl, ))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index bdf6f10d0558..248c2b91fe42 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -8,6 +8,7 @@ ALL_TESTS="
kci_test_polrouting
kci_test_route_get
kci_test_addrlft
+ kci_test_addrlft_route_cleanup
kci_test_promote_secondaries
kci_test_tc
kci_test_gre
@@ -21,14 +22,17 @@ ALL_TESTS="
kci_test_vrf
kci_test_encap
kci_test_macsec
- kci_test_macsec_offload
+ kci_test_macsec_vlan
kci_test_ipsec
kci_test_ipsec_offload
kci_test_fdb_get
+ kci_test_fdb_del
kci_test_neigh_get
kci_test_bridge_parent_id
kci_test_address_proto
kci_test_enslave_bonding
+ kci_test_mngtmpaddr
+ kci_test_operstate
"
devdummy="test-dummy0"
@@ -44,6 +48,7 @@ check_err()
if [ $ret -eq 0 ]; then
ret=$1
fi
+ [ -n "$2" ] && echo "$2"
}
# same but inverted -- used when command must fail for test to pass
@@ -289,6 +294,17 @@ kci_test_route_get()
end_test "PASS: route get"
}
+check_addr_not_exist()
+{
+ dev=$1
+ addr=$2
+ if ip addr show dev $dev | grep -q $addr; then
+ return 1
+ else
+ return 0
+ fi
+}
+
kci_test_addrlft()
{
for i in $(seq 10 100) ;do
@@ -296,9 +312,10 @@ kci_test_addrlft()
run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
done
- sleep 5
- run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy"
- if [ $? -eq 0 ]; then
+ slowwait 5 check_addr_not_exist "$devdummy" "10.23.11."
+ if [ $? -eq 1 ]; then
+ # troubleshoot the reason for our failure
+ run_cmd ip addr show dev "$devdummy"
check_err 1
end_test "FAIL: preferred_lft addresses remaining"
return
@@ -307,8 +324,32 @@ kci_test_addrlft()
end_test "PASS: preferred_lft addresses have expired"
}
+kci_test_addrlft_route_cleanup()
+{
+ local ret=0
+ local test_addr="2001:db8:99::1/64"
+ local test_prefix="2001:db8:99::/64"
+
+ run_cmd ip -6 addr add $test_addr dev "$devdummy" valid_lft 300 preferred_lft 300
+ run_cmd_grep "$test_prefix proto kernel" ip -6 route show dev "$devdummy"
+ run_cmd ip -6 addr del $test_addr dev "$devdummy"
+ run_cmd_grep_fail "$test_prefix" ip -6 route show dev "$devdummy"
+
+ if [ $ret -ne 0 ]; then
+ end_test "FAIL: route not cleaned up when address with valid_lft deleted"
+ return 1
+ fi
+
+ end_test "PASS: route cleaned up when address with valid_lft deleted"
+}
+
kci_test_promote_secondaries()
{
+ run_cmd ifconfig "$devdummy"
+ if [ $ret -ne 0 ]; then
+ end_test "SKIP: ifconfig not installed"
+ return $ksft_skip
+ fi
promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries)
sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1
@@ -505,7 +546,7 @@ kci_test_encap_fou()
run_cmd_fail ip -netns "$testns" fou del port 9999
run_cmd ip -netns "$testns" fou del port 7777
if [ $ret -ne 0 ]; then
- end_test "FAIL: fou"s
+ end_test "FAIL: fou"
return 1
fi
@@ -559,71 +600,39 @@ kci_test_macsec()
end_test "PASS: macsec"
}
-kci_test_macsec_offload()
+# Test __dev_set_rx_mode call from dev_uc_add under addr_list_lock spinlock.
+# Make sure __dev_set_promiscuity is not grabbing (sleeping) netdev instance
+# lock.
+# https://lore.kernel.org/netdev/2aff4342b0f5b1539c02ffd8df4c7e58dd9746e7.camel@nvidia.com/
+kci_test_macsec_vlan()
{
- sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/
- sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
- probed=false
+ msname="test_macsec1"
+ vlanname="test_vlan1"
local ret=0
run_cmd_grep "^Usage: ip macsec" ip macsec help
if [ $? -ne 0 ]; then
end_test "SKIP: macsec: iproute2 too old"
return $ksft_skip
fi
-
- if ! mount | grep -q debugfs; then
- mount -t debugfs none /sys/kernel/debug/ &> /dev/null
- fi
-
- # setup netdevsim since dummydev doesn't have offload support
- if [ ! -w /sys/bus/netdevsim/new_device ] ; then
- run_cmd modprobe -q netdevsim
-
- if [ $ret -ne 0 ]; then
- end_test "SKIP: macsec_offload can't load netdevsim"
- return $ksft_skip
- fi
- probed=true
- fi
-
- echo "0" > /sys/bus/netdevsim/new_device
- while [ ! -d $sysfsnet ] ; do :; done
- udevadm settle
- dev=`ls $sysfsnet`
-
- ip link set $dev up
- if [ ! -d $sysfsd ] ; then
- end_test "FAIL: macsec_offload can't create device $dev"
- return 1
- fi
- run_cmd_grep 'macsec-hw-offload: on' ethtool -k $dev
- if [ $? -eq 1 ] ; then
- end_test "FAIL: macsec_offload netdevsim doesn't support MACsec offload"
+ run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: can't add macsec interface, skipping test"
return 1
fi
- run_cmd ip link add link $dev kci_macsec1 type macsec port 4 offload mac
- run_cmd ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac
- run_cmd ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac
- run_cmd_fail ip link add link $dev kci_macsec4 type macsec port 8 offload mac
- msname=kci_macsec1
- run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
- run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
- run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \
- key 00 0123456789abcdef0123456789abcdef
- run_cmd_fail ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef"
- # clean up any leftovers
- for msdev in kci_macsec{1,2,3,4} ; do
- ip link del $msdev 2> /dev/null
- done
- echo 0 > /sys/bus/netdevsim/del_device
- $probed && rmmod netdevsim
+ run_cmd ip link set dev "$msname" up
+ ip link add link "$msname" name "$vlanname" type vlan id 1
+ ip link set dev "$vlanname" address 00:11:22:33:44:88
+ ip link set dev "$vlanname" up
+ run_cmd ip link del dev "$vlanname"
+ run_cmd ip link del dev "$msname"
- if [ $ret -ne 0 ]; then
- end_test "FAIL: macsec_offload"
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: macsec_vlan"
return 1
fi
- end_test "PASS: macsec_offload"
+
+ end_test "PASS: macsec_vlan"
}
#-------------------------------------------------------------------
@@ -738,6 +747,11 @@ kci_test_ipsec_offload()
sysfsf=$sysfsd/ipsec
sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
probed=false
+ esp4_offload_probed_default=false
+
+ if lsmod | grep -q esp4_offload; then
+ esp4_offload_probed_default=true
+ fi
if ! mount | grep -q debugfs; then
mount -t debugfs none /sys/kernel/debug/ &> /dev/null
@@ -809,10 +823,10 @@ kci_test_ipsec_offload()
# does driver have correct offload info
run_cmd diff $sysfsf - << EOF
SA count=2 tx=3
-sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000
+sa[0] tx ipaddr=$dstip
sa[0] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
sa[0] key=0x34333231 38373635 32313039 36353433
-sa[1] rx ipaddr=0x00000000 00000000 00000000 037ba8c0
+sa[1] rx ipaddr=$srcip
sa[1] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
sa[1] key=0x34333231 38373635 32313039 36353433
EOF
@@ -831,6 +845,7 @@ EOF
fi
# clean up any leftovers
+ ! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload
echo 0 > /sys/bus/netdevsim/del_device
$probed && rmmod netdevsim
@@ -1065,6 +1080,45 @@ kci_test_fdb_get()
end_test "PASS: bridge fdb get"
}
+kci_test_fdb_del()
+{
+ local test_mac=de:ad:be:ef:13:37
+ local dummydev="dummy1"
+ local brdev="test-br0"
+ local ret=0
+
+ run_cmd_grep 'bridge fdb get' bridge fdb help
+ if [ $? -ne 0 ]; then
+ end_test "SKIP: fdb del tests: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ setup_ns testns
+ if [ $? -ne 0 ]; then
+ end_test "SKIP fdb del tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+ IP="ip -netns $testns"
+ BRIDGE="bridge -netns $testns"
+ run_cmd $IP link add $dummydev type dummy
+ run_cmd $IP link add name $brdev type bridge vlan_filtering 1
+ run_cmd $IP link set dev $dummydev master $brdev
+ run_cmd $BRIDGE fdb add $test_mac dev $dummydev master static vlan 1
+ run_cmd $BRIDGE vlan del vid 1 dev $dummydev
+ run_cmd $BRIDGE fdb get $test_mac br $brdev vlan 1
+ run_cmd $BRIDGE fdb del $test_mac dev $dummydev master vlan 1
+ run_cmd_fail $BRIDGE fdb get $test_mac br $brdev vlan 1
+
+ ip netns del $testns &>/dev/null
+
+ if [ $ret -ne 0 ]; then
+ end_test "FAIL: bridge fdb del"
+ return 1
+ fi
+
+ end_test "PASS: bridge fdb del"
+}
+
kci_test_neigh_get()
{
dstmac=de:ad:be:ef:13:37
@@ -1174,6 +1228,12 @@ do_test_address_proto()
local ret=0
local err
+ run_cmd_grep 'proto' ip address help
+ if [ $? -ne 0 ];then
+ end_test "SKIP: addr proto ${what}: iproute2 too old"
+ return $ksft_skip
+ fi
+
ip address add dev "$devdummy" "$addr3"
check_err $?
proto=$(address_get_proto "$addr3")
@@ -1267,6 +1327,132 @@ kci_test_enslave_bonding()
ip netns del "$testns"
}
+# Called to validate the addresses on $IFNAME:
+#
+# 1. Every `temporary` address must have a matching `mngtmpaddr`
+# 2. Every `mngtmpaddr` address must have some un`deprecated` `temporary`
+#
+# If the mngtmpaddr or tempaddr checking failed, return 0 and stop slowwait
+validate_mngtmpaddr()
+{
+ local dev=$1
+ local prefix=""
+ local addr_list=$(ip -j -n $testns addr show dev ${dev})
+ local temp_addrs=$(echo ${addr_list} | \
+ jq -r '.[].addr_info[] | select(.temporary == true) | .local')
+ local mng_prefixes=$(echo ${addr_list} | \
+ jq -r '.[].addr_info[] | select(.mngtmpaddr == true) | .local' | \
+ cut -d: -f1-4 | tr '\n' ' ')
+ local undep_prefixes=$(echo ${addr_list} | \
+ jq -r '.[].addr_info[] | select(.temporary == true and .deprecated != true) | .local' | \
+ cut -d: -f1-4 | tr '\n' ' ')
+
+ # 1. All temporary addresses (temp and dep) must have a matching mngtmpaddr
+ for address in ${temp_addrs}; do
+ prefix=$(echo ${address} | cut -d: -f1-4)
+ if [[ ! " ${mng_prefixes} " =~ " $prefix " ]]; then
+ check_err 1 "FAIL: Temporary $address with no matching mngtmpaddr!";
+ return 0
+ fi
+ done
+
+ # 2. All mngtmpaddr addresses must have a temporary address (not dep)
+ for prefix in ${mng_prefixes}; do
+ if [[ ! " ${undep_prefixes} " =~ " $prefix " ]]; then
+ check_err 1 "FAIL: No undeprecated temporary in $prefix!";
+ return 0
+ fi
+ done
+
+ return 1
+}
+
+kci_test_mngtmpaddr()
+{
+ local ret=0
+
+ setup_ns testns
+ if [ $? -ne 0 ]; then
+ end_test "SKIP mngtmpaddr tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ # 1. Create a dummy Ethernet interface
+ run_cmd ip -n $testns link add ${devdummy} type dummy
+ run_cmd ip -n $testns link set ${devdummy} up
+ run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.use_tempaddr=1
+ run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_prefered_lft=10
+ run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_valid_lft=25
+ run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.max_desync_factor=1
+
+ # 2. Create several mngtmpaddr addresses on that interface.
+ # with temp_*_lft configured to be pretty short (10 and 35 seconds
+ # for prefer/valid respectively)
+ for i in $(seq 1 9); do
+ run_cmd ip -n $testns addr add 2001:db8:7e57:${i}::1/64 mngtmpaddr dev ${devdummy}
+ done
+
+ # 3. Confirm that a preferred temporary address exists for each mngtmpaddr
+ # address at all times, polling once per second for 30 seconds.
+ slowwait 30 validate_mngtmpaddr ${devdummy}
+
+ # 4. Delete each mngtmpaddr address, one at a time (alternating between
+ # deleting and merely un-mngtmpaddr-ing), and confirm that the other
+ # mngtmpaddr addresses still have preferred temporaries.
+ for i in $(seq 1 9); do
+ (( $i % 4 == 0 )) && mng_flag="mngtmpaddr" || mng_flag=""
+ if (( $i % 2 == 0 )); then
+ run_cmd ip -n $testns addr del 2001:db8:7e57:${i}::1/64 $mng_flag dev ${devdummy}
+ else
+ run_cmd ip -n $testns addr change 2001:db8:7e57:${i}::1/64 dev ${devdummy}
+ fi
+ # the temp addr should be deleted
+ validate_mngtmpaddr ${devdummy}
+ done
+
+ if [ $ret -ne 0 ]; then
+ end_test "FAIL: mngtmpaddr add/remove incorrect"
+ else
+ end_test "PASS: mngtmpaddr add/remove correctly"
+ fi
+
+ ip netns del "$testns"
+ return $ret
+}
+
+kci_test_operstate()
+{
+ local ret=0
+
+ # Check that it is possible to set operational state during device
+ # creation and that it is preserved when the administrative state of
+ # the device is toggled.
+ run_cmd ip link add name vx0 up state up type vxlan id 10010 dstport 4789
+ run_cmd_grep "state UP" ip link show dev vx0
+ run_cmd ip link set dev vx0 down
+ run_cmd_grep "state DOWN" ip link show dev vx0
+ run_cmd ip link set dev vx0 up
+ run_cmd_grep "state UP" ip link show dev vx0
+
+ run_cmd ip link del dev vx0
+
+ # Check that it is possible to set the operational state of the device
+ # after creation.
+ run_cmd ip link add name vx0 up type vxlan id 10010 dstport 4789
+ run_cmd_grep "state UNKNOWN" ip link show dev vx0
+ run_cmd ip link set dev vx0 state up
+ run_cmd_grep "state UP" ip link show dev vx0
+
+ run_cmd ip link del dev vx0
+
+ if [ "$ret" -ne 0 ]; then
+ end_test "FAIL: operstate"
+ return 1
+ fi
+
+ end_test "PASS: operstate"
+}
+
kci_test_rtnl()
{
local current_test
@@ -1300,6 +1486,8 @@ usage: ${0##*/} OPTS
EOF
}
+require_command jq
+
#check for needed privileges
if [ "$(id -u)" -ne 0 ];then
end_test "SKIP: Need root privileges"
diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh
new file mode 100755
index 000000000000..3f9780232bd6
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink_notification.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking rtnetlink notification callpaths, and get as much
+# coverage as possible.
+#
+# set -e
+
+ALL_TESTS="
+ kci_test_mcast_addr_notification
+ kci_test_anycast_addr_notification
+"
+
+source lib.sh
+test_dev="test-dummy1"
+
+kci_test_mcast_addr_notification()
+{
+ RET=0
+ local tmpfile
+ local monitor_pid
+ local match_result
+
+ tmpfile=$(mktemp)
+ defer rm "$tmpfile"
+
+ ip monitor maddr > $tmpfile &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+
+ sleep 1
+
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "mcast addr notification: iproute2 too old"
+ return $RET
+ fi
+
+ ip link add name "$test_dev" type dummy
+ check_err $? "failed to add dummy interface"
+ ip link set "$test_dev" up
+ check_err $? "failed to set dummy interface up"
+ ip link del dev "$test_dev"
+ check_err $? "Failed to delete dummy interface"
+ sleep 1
+
+ # There should be 4 line matches as follows.
+ # 13: test-dummy1    inet6 mcast ff02::1 scope global 
+ # 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+ # Deleted 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+ # Deleted 13: test-dummy1    inet6 mcast ff02::1 scope global 
+ match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile")
+ if [ "$match_result" -ne 4 ]; then
+ RET=$ksft_fail
+ fi
+ log_test "mcast addr notification: Expected 4 matches, got $match_result"
+ return $RET
+}
+
+kci_test_anycast_addr_notification()
+{
+ RET=0
+ local tmpfile
+ local monitor_pid
+ local match_result
+
+ tmpfile=$(mktemp)
+ defer rm "$tmpfile"
+
+ ip monitor acaddress > "$tmpfile" &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+ sleep 1
+
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "anycast addr notification: iproute2 too old"
+ return "$RET"
+ fi
+
+ ip link add name "$test_dev" type dummy
+ check_err $? "failed to add dummy interface"
+ ip link set "$test_dev" up
+ check_err $? "failed to set dummy interface up"
+ sysctl -qw net.ipv6.conf."$test_dev".forwarding=1
+ ip link del dev "$test_dev"
+ check_err $? "Failed to delete dummy interface"
+ sleep 1
+
+ # There should be 2 line matches as follows.
+ # 9: dummy2 inet6 any fe80:: scope global
+ # Deleted 9: dummy2 inet6 any fe80:: scope global
+ match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile")
+ if [ "$match_result" -ne 2 ]; then
+ RET=$ksft_fail
+ fi
+ log_test "anycast addr notification: Expected 2 matches, got $match_result"
+ return "$RET"
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+ RET=$ksft_skip
+ log_test "need root privileges"
+ exit $RET
+fi
+
+require_command ip
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 9eb42570294d..b81ed0352d6c 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -18,7 +18,7 @@
#include <linux/net_tstamp.h>
#include <linux/errqueue.h>
-#include "../kselftest.h"
+#include "kselftest.h"
struct options {
int so_timestamp;
@@ -57,6 +57,8 @@ static struct sof_flag sof_flags[] = {
SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE),
SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE),
SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE),
+ SOF_FLAG(SOF_TIMESTAMPING_OPT_RX_FILTER),
+ SOF_FLAG(SOF_TIMESTAMPING_RAW_HARDWARE),
};
static struct socket_type socket_types[] = {
@@ -98,6 +100,22 @@ static struct test_case test_cases[] = {
{}
},
{
+ { .so_timestamping = SOF_TIMESTAMPING_RAW_HARDWARE
+ | SOF_TIMESTAMPING_OPT_RX_FILTER },
+ {}
+ },
+ {
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
+ | SOF_TIMESTAMPING_OPT_RX_FILTER },
+ {}
+ },
+ {
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
+ | SOF_TIMESTAMPING_RX_SOFTWARE
+ | SOF_TIMESTAMPING_OPT_RX_FILTER },
+ { .swtstamp = true }
+ },
+ {
{ .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
{ .swtstamp = true }
diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c
index f02f1f95d227..a04dac0b8027 100644
--- a/tools/testing/selftests/net/sctp_hello.c
+++ b/tools/testing/selftests/net/sctp_hello.c
@@ -29,7 +29,6 @@ static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len
static int do_client(int argc, char *argv[])
{
struct sockaddr_storage ss;
- char buf[] = "hello";
int csk, ret, len;
if (argc < 5) {
@@ -56,16 +55,10 @@ static int do_client(int argc, char *argv[])
set_addr(&ss, argv[3], argv[4], &len);
ret = connect(csk, (struct sockaddr *)&ss, len);
- if (ret < 0) {
- printf("failed to connect to peer\n");
+ if (ret < 0)
return -1;
- }
- ret = send(csk, buf, strlen(buf) + 1, 0);
- if (ret < 0) {
- printf("failed to send msg %d\n", ret);
- return -1;
- }
+ recv(csk, NULL, 0, 0);
close(csk);
return 0;
@@ -75,7 +68,6 @@ int main(int argc, char *argv[])
{
struct sockaddr_storage ss;
int lsk, csk, ret, len;
- char buf[20];
if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
printf("%s server|client ...\n", argv[0]);
@@ -125,11 +117,6 @@ int main(int argc, char *argv[])
return -1;
}
- ret = recv(csk, buf, sizeof(buf), 0);
- if (ret <= 0) {
- printf("failed to recv msg %d\n", ret);
- return -1;
- }
close(csk);
close(lsk);
diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh
index c854034b6aa1..667b211aa8a1 100755
--- a/tools/testing/selftests/net/sctp_vrf.sh
+++ b/tools/testing/selftests/net/sctp_vrf.sh
@@ -20,9 +20,9 @@ setup() {
modprobe sctp_diag
setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS
- ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
- ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
- ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+ ip net exec $CLIENT_NS1 sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip net exec $CLIENT_NS2 sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip net exec $SERVER_NS sysctl -wq net.ipv6.conf.default.accept_dad=0
ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1
ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2
@@ -62,17 +62,40 @@ setup() {
}
cleanup() {
- ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+ wait_client $CLIENT_NS1
+ wait_client $CLIENT_NS2
+ stop_server
cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS
}
-wait_server() {
+start_server() {
local IFACE=$1
local CNT=0
- until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \
- grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do
- [ $((CNT++)) = "20" ] && { RET=3; return $RET; }
+ ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP $SERVER_PORT $IFACE &
+ disown
+ until ip netns exec $SERVER_NS ss -SlH | grep -q "$IFACE"; do
+ [ $((CNT++)) -eq 30 ] && { RET=3; return $RET; }
+ sleep 0.1
+ done
+}
+
+stop_server() {
+ local CNT=0
+
+ ip netns exec $SERVER_NS pkill sctp_hello
+ while ip netns exec $SERVER_NS ss -SaH | grep -q .; do
+ [ $((CNT++)) -eq 30 ] && break
+ sleep 0.1
+ done
+}
+
+wait_client() {
+ local CLIENT_NS=$1
+ local CNT=0
+
+ while ip netns exec $CLIENT_NS ss -SaH | grep -q .; do
+ [ $((CNT++)) -eq 30 ] && break
sleep 0.1
done
}
@@ -81,14 +104,12 @@ do_test() {
local CLIENT_NS=$1
local IFACE=$2
- ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
- ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
- $SERVER_PORT $IFACE 2>&1 >/dev/null &
- disown
- wait_server $IFACE || return $RET
+ start_server $IFACE || return $RET
timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \
- $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT
RET=$?
+ wait_client $CLIENT_NS
+ stop_server
return $RET
}
@@ -96,25 +117,21 @@ do_testx() {
local IFACE1=$1
local IFACE2=$2
- ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
- ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
- $SERVER_PORT $IFACE1 2>&1 >/dev/null &
- disown
- wait_server $IFACE1 || return $RET
- ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
- $SERVER_PORT $IFACE2 2>&1 >/dev/null &
- disown
- wait_server $IFACE2 || return $RET
+ start_server $IFACE1 || return $RET
+ start_server $IFACE2 || return $RET
timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \
- $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT && \
timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \
- $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT
RET=$?
+ wait_client $CLIENT_NS1
+ wait_client $CLIENT_NS2
+ stop_server
return $RET
}
testup() {
- ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null
+ ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=1
echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y "
do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; }
echo "[PASS]"
@@ -123,7 +140,7 @@ testup() {
do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; }
echo "[PASS]"
- ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null
+ ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=0
echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N "
do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; }
echo "[PASS]"
@@ -160,7 +177,7 @@ testup() {
do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; }
echo "[PASS]"
- echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N "
+ echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, Y "
do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; }
echo "[PASS]"
}
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
deleted file mode 100644
index 2070b57849de..000000000000
--- a/tools/testing/selftests/net/setup_loopback.sh
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
-readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
-readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
-readonly HARD_IRQS="$(< ${IRQ_PATH})"
-readonly server_ns=$(mktemp -u server-XXXXXXXX)
-readonly client_ns=$(mktemp -u client-XXXXXXXX)
-
-netdev_check_for_carrier() {
- local -r dev="$1"
-
- for i in {1..5}; do
- carrier="$(cat /sys/class/net/${dev}/carrier)"
- if [[ "${carrier}" -ne 1 ]] ; then
- echo "carrier not ready yet..." >&2
- sleep 1
- else
- echo "carrier ready" >&2
- break
- fi
- done
- echo "${carrier}"
-}
-
-# Assumes that there is no existing ipvlan device on the physical device
-setup_loopback_environment() {
- local dev="$1"
-
- # Fail hard if cannot turn on loopback mode for current NIC
- ethtool -K "${dev}" loopback on || exit 1
- sleep 1
-
- # Check for the carrier
- carrier=$(netdev_check_for_carrier ${dev})
- if [[ "${carrier}" -ne 1 ]] ; then
- echo "setup_loopback_environment failed"
- exit 1
- fi
-}
-
-setup_macvlan_ns(){
- local -r link_dev="$1"
- local -r ns_name="$2"
- local -r ns_dev="$3"
- local -r ns_mac="$4"
- local -r addr="$5"
-
- ip link add link "${link_dev}" dev "${ns_dev}" \
- address "${ns_mac}" type macvlan
- exit_code=$?
- if [[ "${exit_code}" -ne 0 ]]; then
- echo "setup_macvlan_ns failed"
- exit $exit_code
- fi
-
- [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
- ip link set dev "${ns_dev}" netns "${ns_name}"
- ip -netns "${ns_name}" link set dev "${ns_dev}" up
- if [[ -n "${addr}" ]]; then
- ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
- fi
-
- sleep 1
-}
-
-cleanup_macvlan_ns(){
- while (( $# >= 2 )); do
- ns_name="$1"
- ns_dev="$2"
- ip -netns "${ns_name}" link del dev "${ns_dev}"
- ip netns del "${ns_name}"
- shift 2
- done
-}
-
-cleanup_loopback(){
- local -r dev="$1"
-
- ethtool -K "${dev}" loopback off
- sleep 1
-
- # Check for the carrier
- carrier=$(netdev_check_for_carrier ${dev})
- if [[ "${carrier}" -ne 1 ]] ; then
- echo "setup_loopback_environment failed"
- exit 1
- fi
-}
-
-setup_interrupt() {
- # Use timer on host to trigger the network stack
- # Also disable device interrupt to not depend on NIC interrupt
- # Reduce test flakiness caused by unexpected interrupts
- echo 100000 >"${FLUSH_PATH}"
- echo 50 >"${IRQ_PATH}"
-}
-
-setup_ns() {
- # Set up server_ns namespace and client_ns namespace
- setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
- setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
-}
-
-cleanup_ns() {
- cleanup_macvlan_ns ${server_ns} server ${client_ns} client
-}
-
-setup() {
- setup_loopback_environment "${dev}"
- setup_interrupt
-}
-
-cleanup() {
- cleanup_loopback "${dev}"
-
- echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
- echo "${HARD_IRQS}" >"${IRQ_PATH}"
-}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
deleted file mode 100644
index 1f78a87f6f37..000000000000
--- a/tools/testing/selftests/net/setup_veth.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly server_ns=$(mktemp -u server-XXXXXXXX)
-readonly client_ns=$(mktemp -u client-XXXXXXXX)
-
-setup_veth_ns() {
- local -r link_dev="$1"
- local -r ns_name="$2"
- local -r ns_dev="$3"
- local -r ns_mac="$4"
-
- [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
- echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
- ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
- ip -netns "${ns_name}" link set dev "${ns_dev}" up
-
- ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
-}
-
-setup_ns() {
- # Set up server_ns namespace and client_ns namespace
- ip link add name server type veth peer name client
-
- setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
- setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
-}
-
-cleanup_ns() {
- local ns_name
-
- for ns_name in ${client_ns} ${server_ns}; do
- [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
- done
-}
-
-setup() {
- # no global init setup step needed
- :
-}
-
-cleanup() {
- cleanup_ns
-}
diff --git a/tools/testing/selftests/net/sk_so_peek_off.c b/tools/testing/selftests/net/sk_so_peek_off.c
new file mode 100644
index 000000000000..2a3f5c604f52
--- /dev/null
+++ b/tools/testing/selftests/net/sk_so_peek_off.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include "kselftest.h"
+
+static char *afstr(int af, int proto)
+{
+ if (proto == IPPROTO_TCP)
+ return af == AF_INET ? "TCP/IPv4" : "TCP/IPv6";
+ else
+ return af == AF_INET ? "UDP/IPv4" : "UDP/IPv6";
+}
+
+int sk_peek_offset_probe(sa_family_t af, int proto)
+{
+ int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM);
+ int optv = 0;
+ int ret = 0;
+ int s;
+
+ s = socket(af, type, proto);
+ if (s < 0) {
+ ksft_perror("Temporary TCP socket creation failed");
+ } else {
+ if (!setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &optv, sizeof(int)))
+ ret = 1;
+ else
+ printf("%s does not support SO_PEEK_OFF\n", afstr(af, proto));
+ close(s);
+ }
+ return ret;
+}
+
+static void sk_peek_offset_set(int s, int offset)
+{
+ if (setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, sizeof(offset)))
+ ksft_perror("Failed to set SO_PEEK_OFF value\n");
+}
+
+static int sk_peek_offset_get(int s)
+{
+ int offset;
+ socklen_t len = sizeof(offset);
+
+ if (getsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, &len))
+ ksft_perror("Failed to get SO_PEEK_OFF value\n");
+ return offset;
+}
+
+static int sk_peek_offset_test(sa_family_t af, int proto)
+{
+ int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM);
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in a4;
+ struct sockaddr_in6 a6;
+ } a;
+ int res = 0;
+ int s[2] = {0, 0};
+ int recv_sock = 0;
+ int offset = 0;
+ ssize_t len;
+ char buf[2];
+
+ memset(&a, 0, sizeof(a));
+ a.sa.sa_family = af;
+
+ s[0] = recv_sock = socket(af, type, proto);
+ s[1] = socket(af, type, proto);
+
+ if (s[0] < 0 || s[1] < 0) {
+ ksft_perror("Temporary socket creation failed\n");
+ goto out;
+ }
+ if (bind(s[0], &a.sa, sizeof(a)) < 0) {
+ ksft_perror("Temporary socket bind() failed\n");
+ goto out;
+ }
+ if (getsockname(s[0], &a.sa, &((socklen_t) { sizeof(a) })) < 0) {
+ ksft_perror("Temporary socket getsockname() failed\n");
+ goto out;
+ }
+ if (proto == IPPROTO_TCP && listen(s[0], 0) < 0) {
+ ksft_perror("Temporary socket listen() failed\n");
+ goto out;
+ }
+ if (connect(s[1], &a.sa, sizeof(a)) < 0) {
+ ksft_perror("Temporary socket connect() failed\n");
+ goto out;
+ }
+ if (proto == IPPROTO_TCP) {
+ recv_sock = accept(s[0], NULL, NULL);
+ if (recv_sock <= 0) {
+ ksft_perror("Temporary socket accept() failed\n");
+ goto out;
+ }
+ }
+
+ /* Some basic tests of getting/setting offset */
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != -1) {
+ ksft_perror("Initial value of socket offset not -1\n");
+ goto out;
+ }
+ sk_peek_offset_set(recv_sock, 0);
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != 0) {
+ ksft_perror("Failed to set socket offset to 0\n");
+ goto out;
+ }
+
+ /* Transfer a message */
+ if (send(s[1], (char *)("ab"), 2, 0) != 2) {
+ ksft_perror("Temporary probe socket send() failed\n");
+ goto out;
+ }
+ /* Read first byte */
+ len = recv(recv_sock, buf, 1, MSG_PEEK);
+ if (len != 1 || buf[0] != 'a') {
+ ksft_perror("Failed to read first byte of message\n");
+ goto out;
+ }
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != 1) {
+ ksft_perror("Offset not forwarded correctly at first byte\n");
+ goto out;
+ }
+ /* Try to read beyond last byte */
+ len = recv(recv_sock, buf, 2, MSG_PEEK);
+ if (len != 1 || buf[0] != 'b') {
+ ksft_perror("Failed to read last byte of message\n");
+ goto out;
+ }
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != 2) {
+ ksft_perror("Offset not forwarded correctly at last byte\n");
+ goto out;
+ }
+ /* Flush message */
+ len = recv(recv_sock, buf, 2, MSG_TRUNC);
+ if (len != 2) {
+ ksft_perror("Failed to flush message\n");
+ goto out;
+ }
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != 0) {
+ ksft_perror("Offset not reverted correctly after flush\n");
+ goto out;
+ }
+
+ printf("%s with MSG_PEEK_OFF works correctly\n", afstr(af, proto));
+ res = 1;
+out:
+ if (proto == IPPROTO_TCP && recv_sock >= 0)
+ close(recv_sock);
+ if (s[1] >= 0)
+ close(s[1]);
+ if (s[0] >= 0)
+ close(s[0]);
+ return res;
+}
+
+static int do_test(int proto)
+{
+ int res4, res6;
+
+ res4 = sk_peek_offset_probe(AF_INET, proto);
+ res6 = sk_peek_offset_probe(AF_INET6, proto);
+
+ if (!res4 && !res6)
+ return KSFT_SKIP;
+
+ if (res4)
+ res4 = sk_peek_offset_test(AF_INET, proto);
+
+ if (res6)
+ res6 = sk_peek_offset_test(AF_INET6, proto);
+
+ if (!res4 || !res6)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
+
+int main(void)
+{
+ int restcp, resudp;
+
+ restcp = do_test(IPPROTO_TCP);
+ resudp = do_test(IPPROTO_UDP);
+ if (restcp == KSFT_FAIL || resudp == KSFT_FAIL)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/net/skf_net_off.c b/tools/testing/selftests/net/skf_net_off.c
new file mode 100644
index 000000000000..1fdf61d6cd7f
--- /dev/null
+++ b/tools/testing/selftests/net/skf_net_off.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Open a tun device.
+ *
+ * [modifications: use IFF_NAPI_FRAGS, add sk filter]
+ *
+ * Expects the device to have been configured previously, e.g.:
+ * sudo ip tuntap add name tap1 mode tap
+ * sudo ip link set tap1 up
+ * sudo ip link set dev tap1 addr 02:00:00:00:00:01
+ * sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
+ *
+ * And to avoid premature pskb_may_pull:
+ *
+ * sudo ethtool -K tap1 gro off
+ * sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux'
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if.h>
+#include <linux/if_packet.h>
+#include <linux/if_tun.h>
+#include <linux/ipv6.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+static bool cfg_do_filter;
+static bool cfg_do_frags;
+static int cfg_dst_port = 8000;
+static char *cfg_ifname;
+
+static int tun_open(const char *tun_name)
+{
+ struct ifreq ifr = {0};
+ int fd, ret;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd == -1)
+ error(1, errno, "open /dev/net/tun");
+
+ ifr.ifr_flags = IFF_TAP;
+ if (cfg_do_frags)
+ ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS;
+
+ strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1);
+
+ ret = ioctl(fd, TUNSETIFF, &ifr);
+ if (ret)
+ error(1, ret, "ioctl TUNSETIFF");
+
+ return fd;
+}
+
+static void sk_set_filter(int fd)
+{
+ const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt);
+ const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest);
+
+ /* Filter UDP packets with destination port cfg_dst_port */
+ struct sock_filter filter_code[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+
+ struct sock_fprog filter = {
+ sizeof(filter_code) / sizeof(filter_code[0]),
+ filter_code,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)))
+ error(1, errno, "setsockopt attach filter");
+}
+
+static int raw_open(void)
+{
+ int fd;
+
+ fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP);
+ if (fd == -1)
+ error(1, errno, "socket raw (udp)");
+
+ if (cfg_do_filter)
+ sk_set_filter(fd);
+
+ return fd;
+}
+
+static void tun_write(int fd)
+{
+ const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 };
+ const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 };
+ struct tun_pi pi = {0};
+ struct ipv6hdr ip6h = {0};
+ struct udphdr uh = {0};
+ struct ethhdr eth = {0};
+ uint32_t payload;
+ struct iovec iov[5];
+ int ret;
+
+ pi.proto = htons(ETH_P_IPV6);
+
+ memcpy(eth.h_source, eth_src, sizeof(eth_src));
+ memcpy(eth.h_dest, eth_dst, sizeof(eth_dst));
+ eth.h_proto = htons(ETH_P_IPV6);
+
+ ip6h.version = 6;
+ ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t));
+ ip6h.nexthdr = IPPROTO_UDP;
+ ip6h.hop_limit = 8;
+ if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1)
+ error(1, errno, "inet_pton src");
+ if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1)
+ error(1, errno, "inet_pton src");
+
+ uh.source = htons(8000);
+ uh.dest = htons(cfg_dst_port);
+ uh.len = ip6h.payload_len;
+ uh.check = 0;
+
+ payload = htonl(0xABABABAB); /* Covered in IPv6 length */
+
+ iov[0].iov_base = &pi;
+ iov[0].iov_len = sizeof(pi);
+ iov[1].iov_base = &eth;
+ iov[1].iov_len = sizeof(eth);
+ iov[2].iov_base = &ip6h;
+ iov[2].iov_len = sizeof(ip6h);
+ iov[3].iov_base = &uh;
+ iov[3].iov_len = sizeof(uh);
+ iov[4].iov_base = &payload;
+ iov[4].iov_len = sizeof(payload);
+
+ ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0]));
+ if (ret <= 0)
+ error(1, errno, "writev");
+}
+
+static void raw_read(int fd)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ struct msghdr msg = {0};
+ struct iovec iov[2];
+ struct udphdr uh;
+ uint32_t payload[2];
+ int ret;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcvtimeo udp");
+
+ iov[0].iov_base = &uh;
+ iov[0].iov_len = sizeof(uh);
+
+ iov[1].iov_base = payload;
+ iov[1].iov_len = sizeof(payload);
+
+ msg.msg_iov = iov;
+ msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]);
+
+ ret = recvmsg(fd, &msg, 0);
+ if (ret <= 0)
+ error(1, errno, "read raw");
+ if (ret != sizeof(uh) + sizeof(payload[0]))
+ error(1, errno, "read raw: len=%d\n", ret);
+
+ fprintf(stderr, "raw recv: 0x%x\n", payload[0]);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "fFi:")) != -1) {
+ switch (c) {
+ case 'f':
+ cfg_do_filter = true;
+ printf("bpf filter enabled\n");
+ break;
+ case 'F':
+ cfg_do_frags = true;
+ printf("napi frags mode enabled\n");
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ default:
+ error(1, 0, "unknown option %c", optopt);
+ break;
+ }
+ }
+
+ if (!cfg_ifname)
+ error(1, 0, "must specify tap interface name (-i)");
+}
+
+int main(int argc, char **argv)
+{
+ int fdt, fdr;
+
+ parse_opts(argc, argv);
+
+ fdr = raw_open();
+ fdt = tun_open(cfg_ifname);
+
+ tun_write(fdt);
+ raw_read(fdr);
+
+ if (close(fdt))
+ error(1, errno, "close tun");
+ if (close(fdr))
+ error(1, errno, "close udp");
+
+ fprintf(stderr, "OK\n");
+ return 0;
+}
+
diff --git a/tools/testing/selftests/net/skf_net_off.sh b/tools/testing/selftests/net/skf_net_off.sh
new file mode 100755
index 000000000000..5da5066fb465
--- /dev/null
+++ b/tools/testing/selftests/net/skf_net_off.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly NS="ns-$(mktemp -u XXXXXX)"
+
+cleanup() {
+ ip netns del $NS
+}
+
+ip netns add $NS
+trap cleanup EXIT
+
+ip -netns $NS link set lo up
+ip -netns $NS tuntap add name tap1 mode tap
+ip -netns $NS link set tap1 up
+ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01
+ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
+ip netns exec $NS ethtool -K tap1 gro off
+
+# disable early demux, else udp_v6_early_demux pulls udp header into linear
+ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0
+
+echo "no filter"
+ip netns exec $NS ./skf_net_off -i tap1
+
+echo "filter, linear skb (-f)"
+ip netns exec $NS ./skf_net_off -i tap1 -f
+
+echo "filter, fragmented skb (-f) (-F)"
+ip netns exec $NS ./skf_net_off -i tap1 -f -F
diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
index e9fa14e10732..4740701f1a9a 100644
--- a/tools/testing/selftests/net/so_incoming_cpu.c
+++ b/tools/testing/selftests/net/so_incoming_cpu.c
@@ -9,7 +9,7 @@
#include <sys/socket.h>
#include <sys/sysinfo.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(so_incoming_cpu)
{
diff --git a/tools/testing/selftests/net/so_rcv_listener.c b/tools/testing/selftests/net/so_rcv_listener.c
new file mode 100644
index 000000000000..bc5841192aa6
--- /dev/null
+++ b/tools/testing/selftests/net/so_rcv_listener.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <netdb.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#ifndef SO_RCVPRIORITY
+#define SO_RCVPRIORITY 82
+#endif
+
+struct options {
+ __u32 val;
+ int name;
+ int rcvname;
+ const char *host;
+ const char *service;
+} opt;
+
+static void __attribute__((noreturn)) usage(const char *bin)
+{
+ printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin);
+ printf("Options:\n"
+ "\t\t-M val Test SO_RCVMARK\n"
+ "\t\t-P val Test SO_RCVPRIORITY\n"
+ "");
+ exit(EXIT_FAILURE);
+}
+
+static void parse_args(int argc, char *argv[])
+{
+ int o;
+
+ while ((o = getopt(argc, argv, "M:P:")) != -1) {
+ switch (o) {
+ case 'M':
+ opt.val = atoi(optarg);
+ opt.name = SO_MARK;
+ opt.rcvname = SO_RCVMARK;
+ break;
+ case 'P':
+ opt.val = atoi(optarg);
+ opt.name = SO_PRIORITY;
+ opt.rcvname = SO_RCVPRIORITY;
+ break;
+ default:
+ usage(argv[0]);
+ break;
+ }
+ }
+
+ if (optind != argc - 2)
+ usage(argv[0]);
+
+ opt.host = argv[optind];
+ opt.service = argv[optind + 1];
+}
+
+int main(int argc, char *argv[])
+{
+ int err = 0;
+ int recv_fd = -1;
+ int ret_value = 0;
+ __u32 recv_val;
+ struct cmsghdr *cmsg;
+ char cbuf[CMSG_SPACE(sizeof(__u32))];
+ char recv_buf[CMSG_SPACE(sizeof(__u32))];
+ struct iovec iov[1];
+ struct msghdr msg;
+ struct sockaddr_in recv_addr4;
+ struct sockaddr_in6 recv_addr6;
+
+ parse_args(argc, argv);
+
+ int family = strchr(opt.host, ':') ? AF_INET6 : AF_INET;
+
+ recv_fd = socket(family, SOCK_DGRAM, IPPROTO_UDP);
+ if (recv_fd < 0) {
+ perror("Can't open recv socket");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ err = setsockopt(recv_fd, SOL_SOCKET, opt.rcvname, &opt.val, sizeof(opt.val));
+ if (err < 0) {
+ perror("Recv setsockopt error");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ if (family == AF_INET) {
+ memset(&recv_addr4, 0, sizeof(recv_addr4));
+ recv_addr4.sin_family = family;
+ recv_addr4.sin_port = htons(atoi(opt.service));
+
+ if (inet_pton(family, opt.host, &recv_addr4.sin_addr) <= 0) {
+ perror("Invalid IPV4 address");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ err = bind(recv_fd, (struct sockaddr *)&recv_addr4, sizeof(recv_addr4));
+ } else {
+ memset(&recv_addr6, 0, sizeof(recv_addr6));
+ recv_addr6.sin6_family = family;
+ recv_addr6.sin6_port = htons(atoi(opt.service));
+
+ if (inet_pton(family, opt.host, &recv_addr6.sin6_addr) <= 0) {
+ perror("Invalid IPV6 address");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ err = bind(recv_fd, (struct sockaddr *)&recv_addr6, sizeof(recv_addr6));
+ }
+
+ if (err < 0) {
+ perror("Recv bind error");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ iov[0].iov_base = recv_buf;
+ iov[0].iov_len = sizeof(recv_buf);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+
+ err = recvmsg(recv_fd, &msg, 0);
+ if (err < 0) {
+ perror("Message receive error");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == opt.name) {
+ recv_val = *(__u32 *)CMSG_DATA(cmsg);
+ printf("Received value: %u\n", recv_val);
+
+ if (recv_val != opt.val) {
+ fprintf(stderr, "Error: expected value: %u, got: %u\n",
+ opt.val, recv_val);
+ ret_value = -EINVAL;
+ }
+ goto cleanup;
+ }
+ }
+
+ fprintf(stderr, "Error: No matching cmsg received\n");
+ ret_value = -ENOMSG;
+
+cleanup:
+ if (recv_fd >= 0)
+ close(recv_fd);
+
+ return ret_value;
+}
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 8457b7ccbc09..b76df1efc2ef 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -174,7 +174,7 @@ static int do_recv_errqueue_timeout(int fdt)
msg.msg_controllen = sizeof(control);
while (1) {
- const char *reason;
+ const char *reason = NULL;
ret = recvmsg(fdt, &msg, MSG_ERRQUEUE);
if (ret == -1 && errno == EAGAIN)
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
index db1aeb8c5d1e..9e270548dad8 100644
--- a/tools/testing/selftests/net/socket.c
+++ b/tools/testing/selftests/net/socket.c
@@ -7,7 +7,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
-#include "../kselftest.h"
+#include "kselftest.h"
struct socket_testcase {
int domain;
@@ -39,6 +39,7 @@ static int run_tests(void)
{
char err_string1[ERR_STRING_SZ];
char err_string2[ERR_STRING_SZ];
+ const char *msg1, *msg2;
int i, err;
err = 0;
@@ -56,13 +57,13 @@ static int run_tests(void)
errno == -s->expect)
continue;
- strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
- strerror_r(errno, err_string2, ERR_STRING_SZ);
+ msg1 = strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
+ msg2 = strerror_r(errno, err_string2, ERR_STRING_SZ);
fprintf(stderr, "socket(%d, %d, %d) expected "
"err (%s) got (%s)\n",
s->domain, s->type, s->protocol,
- err_string1, err_string2);
+ msg1, msg2);
err = -1;
break;
@@ -70,12 +71,12 @@ static int run_tests(void)
close(fd);
if (s->expect < 0) {
- strerror_r(errno, err_string1, ERR_STRING_SZ);
+ msg1 = strerror_r(errno, err_string1, ERR_STRING_SZ);
fprintf(stderr, "socket(%d, %d, %d) expected "
"success got err (%s)\n",
s->domain, s->type, s->protocol,
- err_string1);
+ msg1);
err = -1;
break;
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
index 02d617040793..a5e959a080bb 100755
--- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -285,11 +285,6 @@ setup_hs()
ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
ip -netns ${hsname} link set ${rtveth} netns ${rtname}
ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index 79fb81e63c59..a649dba3cb77 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -250,11 +250,6 @@ setup_hs()
eval local rtname=\${rt_${rid}}
local rtveth=veth-t${tid}
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
ip -netns ${hsname} link set ${rtveth} netns ${rtname}
ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0
diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh
index 50563443a4ad..318487eda671 100755
--- a/tools/testing/selftests/net/srv6_end_flavors_test.sh
+++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh
@@ -399,7 +399,7 @@ __get_srv6_rtcfg_id()
# Given the description of a router <id:op> as an input, the function returns
# the <op> token which represents the operation (e.g. End behavior with or
-# withouth flavors) configured for the node.
+# without flavors) configured for the node.
# Note that when the operation represents an End behavior with a list of
# flavors, the output is the ordered version of that list.
@@ -480,7 +480,7 @@ setup_rt_local_sids()
# all SIDs start with a common locator. Routes and SRv6 Endpoint
- # behavior instaces are grouped together in the 'localsid' table.
+ # behavior instances are grouped together in the 'localsid' table.
ip -netns "${nsname}" -6 rule \
add to "${LOCATOR_SERVICE}::/16" \
lookup "${LOCALSID_TABLE_ID}" prio 999
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
index 87e414cc417c..4bc135e5c22c 100755
--- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -245,10 +245,8 @@
# that adopted in the use cases already examined (of course, it is necessary to
# consider the different SIDs/C-SIDs).
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly DUMMY_DEVNAME="dum0"
readonly VRF_TID=100
readonly VRF_DEVNAME="vrf-${VRF_TID}"
@@ -376,32 +374,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -410,8 +394,7 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
create_host()
@@ -420,28 +403,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
@@ -462,10 +429,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -497,7 +464,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -518,9 +485,6 @@ setup_rt_networking()
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -596,7 +560,7 @@ setup_rt_local_sids()
local lcnode_func_prefix
local lcblock_prefix
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -630,7 +594,7 @@ setup_rt_local_sids()
dev "${DUMMY_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -668,8 +632,8 @@ __setup_l3vpn()
local rtsrc_nsname
local rtdst_nsname
- rtsrc_nsname="$(get_rtname "${src}")"
- rtdst_nsname="$(get_rtname "${dst}")"
+ eval rtsrc_nsname=\${$(get_rtname "${src}")}
+ eval rtdst_nsname=\${$(get_rtname "${dst}")}
container="${LCBLOCK_ADDR}"
@@ -744,8 +708,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -791,11 +755,6 @@ setup_hs()
ip netns exec "${rtname}" \
sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
@@ -880,7 +839,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -903,7 +862,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -915,7 +874,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -1025,7 +984,7 @@ rt_x_nextcsid_end_behavior_test()
local nsname
local ret
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
__nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}"
ret="$?"
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
index c79cb8ede17f..34b781a2ae74 100755
--- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -72,6 +72,9 @@
# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
# the selftest network.
#
+# In addition, every router interface connecting rt-x to rt-y is assigned an
+# IPv6 link-local address fe80::x:y/64.
+#
# Local SID/C-SID table
# =====================
#
@@ -287,10 +290,8 @@
# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46)
# and sends it to the host hs-1.
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly DUMMY_DEVNAME="dum0"
readonly VRF_TID=100
readonly VRF_DEVNAME="vrf-${VRF_TID}"
@@ -418,32 +419,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -452,15 +439,12 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
+ setup_ns "${nsname}"
- __create_namespace "${nsname}"
-
+ eval nsname=\${$(get_rtname "${rtid}")}
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -470,29 +454,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
# the selftest is considered as "skipped".
@@ -512,10 +479,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -547,7 +514,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -557,6 +524,9 @@ setup_rt_networking()
ip -netns "${nsname}" addr \
add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+ ip -netns "${nsname}" addr \
+ add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad
+
ip -netns "${nsname}" link set "${devname}" up
done
@@ -631,7 +601,7 @@ set_end_x_nextcsid()
local rt="$1"
local adj="$2"
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
net_prefix="$(get_network_prefix "${rt}" "${adj}")"
lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
@@ -645,12 +615,33 @@ set_end_x_nextcsid()
nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
}
+set_end_x_ll_nextcsid()
+{
+ local rt="$1"
+ local adj="$2"
+
+ eval nsname=\${$(get_rtname "${rt}")}
+ lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+ nh6_ll_addr="fe80::${adj}:${rt}"
+ oifname="veth-rt-${rt}-${adj}"
+
+ # enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop
+ # address (note that "dev" is the dummy dum0 device chosen for the sake
+ # of simplicity).
+ ip -netns "${nsname}" -6 route \
+ replace "${lcnode_func_prefix}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.X nh6 "${nh6_ll_addr}" \
+ oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \
+ nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
+}
+
set_underlay_sids_reachability()
{
local rt="$1"
local rt_neighs="$2"
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -685,12 +676,12 @@ setup_rt_local_sids()
local lcnode_func_prefix
local lcblock_prefix
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
set_underlay_sids_reachability "${rt}" "${rt_neighs}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -728,8 +719,8 @@ __setup_l3vpn()
local rtsrc_nsname
local rtdst_nsname
- rtsrc_nsname="$(get_rtname "${src}")"
- rtdst_nsname="$(get_rtname "${dst}")"
+ eval rtsrc_nsname=\${$(get_rtname "${src}")}
+ eval rtdst_nsname=\${$(get_rtname "${dst}")}
container="${LCBLOCK_ADDR}"
@@ -804,8 +795,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -851,11 +842,6 @@ setup_hs()
ip netns exec "${rtname}" \
sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
@@ -947,7 +933,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -970,7 +956,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -982,7 +968,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -1057,6 +1043,27 @@ host_vpn_tests()
check_and_log_hs_ipv4_connectivity 1 2
check_and_log_hs_ipv4_connectivity 2 1
+
+ # Setup the adjacencies in the SRv6 aware routers using IPv6 link-local
+ # addresses.
+ # - rt-3 SRv6 End.X adjacency with rt-4
+ # - rt-4 SRv6 End.X adjacency with rt-1
+ set_end_x_ll_nextcsid 3 4
+ set_end_x_ll_nextcsid 4 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local"
+
+ check_and_log_hs_ipv6_connectivity 1 2
+ check_and_log_hs_ipv6_connectivity 2 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local"
+
+ check_and_log_hs_ipv4_connectivity 1 2
+ check_and_log_hs_ipv4_connectivity 2 1
+
+ # Restore the previous adjacencies.
+ set_end_x_nextcsid 3 4
+ set_end_x_nextcsid 4 1
}
__nextcsid_end_x_behavior_test()
@@ -1093,7 +1100,7 @@ rt_x_nextcsid_end_x_behavior_test()
local nsname
local ret
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
__nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}"
ret="$?"
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
index 28a775654b92..6a68c7eff1dc 100755
--- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -166,10 +166,8 @@
# hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d)
#
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly VRF_TID=100
readonly VRF_DEVNAME="vrf-${VRF_TID}"
readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
@@ -248,32 +246,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -282,8 +266,7 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
create_host()
@@ -292,29 +275,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
# the selftest is considered as "skipped".
@@ -334,10 +300,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -369,7 +335,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -387,9 +353,6 @@ setup_rt_networking()
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -403,7 +366,7 @@ setup_rt_local_sids()
local nsname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -432,7 +395,7 @@ setup_rt_local_sids()
dev "${VRF_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -469,7 +432,7 @@ __setup_rt_policy()
local policy=''
local n
- nsname="$(get_rtname "${encap_rt}")"
+ eval nsname=\${$(get_rtname "${encap_rt}")}
for n in ${end_rts}; do
policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
@@ -516,8 +479,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -555,11 +518,6 @@ setup_hs()
ip netns exec "${rtname}" \
sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
@@ -656,7 +614,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -679,7 +637,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -691,7 +649,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
index cb4177d41b21..0979b5316fdf 100755
--- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -116,10 +116,8 @@
# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b)
#
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly DUMMY_DEVNAME="dum0"
readonly RT2HS_DEVNAME="veth-hs"
readonly HS_VETH_NAME="veth0"
@@ -199,32 +197,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -233,8 +217,7 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
create_host()
@@ -243,28 +226,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
@@ -285,10 +252,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -320,7 +287,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -341,9 +308,6 @@ setup_rt_networking()
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -357,7 +321,7 @@ setup_rt_local_sids()
local nsname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -379,7 +343,7 @@ setup_rt_local_sids()
encap seg6local action End dev "${DUMMY_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behaviors instaces are grouped together in the 'localsid'
+ # Endpoint behaviors instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule add \
to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -407,7 +371,7 @@ __setup_rt_policy()
local policy=''
local n
- nsname="$(get_rtname "${encap_rt}")"
+ eval nsname=\${$(get_rtname "${encap_rt}")}
for n in ${end_rts}; do
policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
@@ -446,7 +410,7 @@ setup_decap()
local rt="$1"
local nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
# Local End.DX2 behavior
ip -netns "${nsname}" -6 route \
@@ -463,8 +427,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -486,11 +450,6 @@ setup_hs()
add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
-
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
}
# set an auto-generated mac address
@@ -508,7 +467,7 @@ set_mac_address()
local ifname="$4"
local nsname
- nsname=$(get_nodename "${nodename}")
+ eval nsname=\${${nodename}}
ip -netns "${nsname}" link set dev "${ifname}" down
@@ -532,7 +491,7 @@ set_host_l2peer()
local hssrc_name
local ipaddr
- hssrc_name="$(get_hsname "${hssrc}")"
+ eval hssrc_name=\${$(get_hsname "${hssrc}")}
if [ "${proto}" -eq 6 ]; then
ipaddr="${ipprefix}::${hsdst}"
@@ -562,7 +521,7 @@ setup_l2vpn()
local rtdst="${hsdst}"
# set fixed mac for source node and the neigh MAC address
- set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
+ set_mac_address "hs_${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6
set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4
@@ -570,7 +529,7 @@ setup_l2vpn()
# to the mac address of the remote peer (L2 VPN destination host).
# Otherwise, traffic coming from the source host is dropped at the
# ingress router.
- set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
+ set_mac_address "rt_${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
# set the SRv6 Policies at the ingress router
setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \
@@ -647,7 +606,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -670,7 +629,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -682,7 +641,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c
index 247c3b3ac1c9..9ec1c9b50e77 100644
--- a/tools/testing/selftests/net/tap.c
+++ b/tools/testing/selftests/net/tap.c
@@ -17,7 +17,7 @@
#include <linux/virtio_net.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static const char param_dev_tap_name[] = "xmacvtap0";
static const char param_dev_dummy_name[] = "xdummy0";
diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index 522d991e310e..5b0205c70c39 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -26,12 +26,13 @@ LIB := $(LIBDIR)/libaotst.a
LDLIBS += $(LIB) -pthread
LIBDEPS := lib/aolib.h Makefile
-CFLAGS := -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing
+CFLAGS += -Wall -O2 -g -fno-strict-aliasing
CFLAGS += $(KHDR_INCLUDES)
CFLAGS += -iquote ./lib/ -I ../../../../include/
# Library
-LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c
+LIBSRC := ftrace.c ftrace-tcp.c kconfig.c netlink.c
+LIBSRC += proc.c repair.c setup.c sock.c utils.c
LIBOBJ := $(LIBSRC:%.c=$(LIBDIR)/%.o)
EXTRA_CLEAN += $(LIBOBJ) $(LIB)
diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
index a1e6e007c291..6736484996a3 100644
--- a/tools/testing/selftests/net/tcp_ao/bench-lookups.c
+++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
@@ -355,6 +355,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(30, server_fn, client_fn);
+ test_init(31, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config
index d3277a9de987..971cb6fa2d63 100644
--- a/tools/testing/selftests/net/tcp_ao/config
+++ b/tools/testing/selftests/net/tcp_ao/config
@@ -1,10 +1,11 @@
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_RMD160=y
CONFIG_CRYPTO_SHA1=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NET_VRF=y
CONFIG_TCP_AO=y
CONFIG_TCP_MD5SIG=y
+CONFIG_TRACEPOINTS=y
CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c
index 185a2f6e5ff3..93b61e9a36f1 100644
--- a/tools/testing/selftests/net/tcp_ao/connect-deny.c
+++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c
@@ -4,6 +4,7 @@
#include "aolib.h"
#define fault(type) (inj == FAULT_ ## type)
+static volatile int sk_pair;
static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen,
union tcp_addr in_addr, uint8_t prefix,
@@ -34,10 +35,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
const char *cnt_name, test_cnt cnt_expected,
fault_t inj)
{
- struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ struct tcp_counters cnt1, cnt2;
uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
+ test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
int lsk, err, sk = 0;
- time_t timeout;
lsk = test_listen_socket(this_ip_addr, port, 1);
@@ -46,21 +47,24 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
if (cnt_name)
before_cnt = netstat_get_one(cnt_name, NULL);
- if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (pwd && test_get_tcp_counters(lsk, &cnt1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- err = test_wait_fd(lsk, timeout, 0);
+ err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair);
if (err == -ETIMEDOUT) {
+ sk_pair = err;
if (!fault(TIMEOUT))
- test_fail("timed out for accept()");
+ test_fail("%s: timed out for accept()", tst_name);
+ } else if (err == -EKEYREJECTED) {
+ if (!fault(KEYREJECT))
+ test_fail("%s: key was rejected", tst_name);
} else if (err < 0) {
- test_error("test_wait_fd()");
+ test_error("test_skpair_wait_poll()");
} else {
if (fault(TIMEOUT))
- test_fail("ready to accept");
+ test_fail("%s: ready to accept", tst_name);
sk = accept(lsk, NULL, NULL);
if (sk < 0) {
@@ -71,12 +75,14 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
}
}
- if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2))
- test_error("test_get_tcp_ao_counters()");
+ synchronize_threads(); /* before counter checks */
+ if (pwd && test_get_tcp_counters(lsk, &cnt2))
+ test_error("test_get_tcp_counters()");
close(lsk);
+
if (pwd)
- test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
if (!cnt_name)
goto out;
@@ -84,10 +90,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
after_cnt = netstat_get_one(cnt_name, NULL);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %zu <= %zu",
+ test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %zu => %zu",
+ test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
@@ -107,7 +113,7 @@ static void *server_fn(void *arg)
try_accept("Non-AO server + AO client", port++, NULL,
this_ip_dest, -1, 100, 100, 0,
- "TCPAOKeyNotFound", 0, FAULT_TIMEOUT);
+ "TCPAOKeyNotFound", TEST_CNT_NS_KEY_NOT_FOUND, FAULT_TIMEOUT);
try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0,
@@ -133,8 +139,9 @@ static void *server_fn(void *arg)
wrong_addr, -1, 100, 100, 0,
"TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT);
+ /* Key rejected by the other side, failing short through skpair */
try_accept("Client: Wrong addr", port++, NULL,
- this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT);
+ this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_KEYREJECT);
try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 200, 100, 0,
@@ -161,8 +168,7 @@ static void try_connect(const char *tst_name, unsigned int port,
uint8_t sndid, uint8_t rcvid,
test_cnt cnt_expected, fault_t inj)
{
- struct tcp_ao_counters ao_cnt1, ao_cnt2;
- time_t timeout;
+ struct tcp_counters cnt1, cnt2;
int sk, ret;
sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
@@ -172,15 +178,15 @@ static void try_connect(const char *tst_name, unsigned int port,
if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid))
test_error("setsockopt(TCP_AO_ADD_KEY)");
- if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (pwd && test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
-
+ ret = test_skpair_connect_poll(sk, this_ip_dest, port, cnt_expected, &sk_pair);
+ synchronize_threads(); /* before counter checks */
if (ret < 0) {
+ sk_pair = ret;
if (fault(KEYREJECT) && ret == -EKEYREJECTED) {
test_ok("%s: connect() was prevented", tst_name);
} else if (ret == -ETIMEDOUT && fault(TIMEOUT)) {
@@ -199,9 +205,11 @@ static void try_connect(const char *tst_name, unsigned int port,
else
test_ok("%s: connected", tst_name);
if (pwd && ret > 0) {
- if (test_get_tcp_ao_counters(sk, &ao_cnt2))
- test_error("test_get_tcp_ao_counters()");
- test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
+ } else if (pwd) {
+ test_tcp_counters_free(&cnt1);
}
out:
synchronize_threads(); /* close() */
@@ -212,30 +220,49 @@ out:
static void *client_fn(void *arg)
{
- union tcp_addr wrong_addr, network_addr;
+ union tcp_addr wrong_addr, network_addr, addr_any = {};
unsigned int port = test_server_port;
if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
test_error("Can't convert ip address %s", TEST_WRONG_IP);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO server + Non-AO client", port++, NULL,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ /*
+ * XXX: The test doesn't increase any counters, see tcp_make_synack().
+ * Potentially, it can be speed up by setting sk_pair = -ETIMEDOUT
+ * but the price would be increased complexity of the tracer thread.
+ */
+ trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any,
+ port, 0, 100, 100);
try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_expect(TCP_AO_WRONG_MACLEN, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
@@ -259,6 +286,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(21, server_fn, client_fn);
+ test_init(22, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c
index 81653b47f303..340f00e979ea 100644
--- a/tools/testing/selftests/net/tcp_ao/connect.c
+++ b/tools/testing/selftests/net/tcp_ao/connect.c
@@ -35,7 +35,7 @@ static void *client_fn(void *arg)
uint64_t before_aogood, after_aogood;
const size_t nr_packets = 20;
struct netstat *ns_before, *ns_after;
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters ao1, ao2;
if (sk < 0)
test_error("socket()");
@@ -50,41 +50,41 @@ static void *client_fn(void *arg)
ns_before = netstat_read();
before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &ao1))
+ test_error("test_get_tcp_counters()");
- if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, 100, nr_packets)) {
test_fail("verify failed");
return NULL;
}
ns_after = netstat_read();
after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &ao2))
+ test_error("test_get_tcp_counters()");
netstat_print_diff(ns_before, ns_after);
netstat_free(ns_before);
netstat_free(ns_after);
if (nr_packets > (after_aogood - before_aogood)) {
- test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)",
+ test_fail("TCPAOGood counter mismatch: %zu > (%" PRIu64 " - %" PRIu64 ")",
nr_packets, after_aogood, before_aogood);
return NULL;
}
- if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD))
+ if (test_assert_counters("connect", &ao1, &ao2, TEST_CNT_GOOD))
return NULL;
- test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64,
- before_aogood, ao1.ao_info_pkt_good,
- ao1.key_cnts[0].pkt_good,
- after_aogood, ao2.ao_info_pkt_good,
- ao2.key_cnts[0].pkt_good,
+ test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu",
+ before_aogood, ao1.ao.ao_info_pkt_good,
+ ao1.ao.key_cnts[0].pkt_good,
+ after_aogood, ao2.ao.ao_info_pkt_good,
+ ao2.ao.key_cnts[0].pkt_good,
nr_packets);
return NULL;
}
int main(int argc, char *argv[])
{
- test_init(1, server_fn, client_fn);
+ test_init(2, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
index d69bcba3c929..85c1a1e958c6 100644
--- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c
+++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
@@ -53,7 +53,7 @@ static void serve_interfered(int sk)
ssize_t test_quota = packet_size * packets_nr * 10;
uint64_t dest_unreach_a, dest_unreach_b;
uint64_t icmp_ignored_a, icmp_ignored_b;
- struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ struct tcp_counters cnt1, cnt2;
bool counter_not_found;
struct netstat *ns_after, *ns_before;
ssize_t bytes;
@@ -61,16 +61,16 @@ static void serve_interfered(int sk)
ns_before = netstat_read();
dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL);
icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL);
- if (test_get_tcp_ao_counters(sk, &ao_cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
bytes = test_server_run(sk, test_quota, 0);
ns_after = netstat_read();
netstat_print_diff(ns_before, ns_after);
dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL);
icmp_ignored_b = netstat_get(ns_after, tcpao_icmps,
&counter_not_found);
- if (test_get_tcp_ao_counters(sk, &ao_cnt2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
netstat_free(ns_before);
netstat_free(ns_after);
@@ -91,9 +91,9 @@ static void serve_interfered(int sk)
return;
}
#ifdef TEST_ICMPS_ACCEPT
- test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD);
+ test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD);
#else
- test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP);
+ test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP);
#endif
if (icmp_ignored_a >= icmp_ignored_b) {
test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64,
@@ -395,7 +395,6 @@ static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *d
static void send_interfered(int sk)
{
- const unsigned int timeout = TEST_TIMEOUT_SEC;
struct sockaddr_in6 src, dst;
socklen_t addr_sz;
@@ -409,7 +408,7 @@ static void send_interfered(int sk)
while (1) {
uint32_t rcv_nxt;
- if (test_client_verify(sk, packet_size, packets_nr, timeout)) {
+ if (test_client_verify(sk, packet_size, packets_nr)) {
test_fail("client: connection is broken");
return;
}
@@ -444,6 +443,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(3, server_fn, client_fn);
+ test_init(4, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c
index 24e62120b792..69d9a7a05d5c 100644
--- a/tools/testing/selftests/net/tcp_ao/key-management.c
+++ b/tools/testing/selftests/net/tcp_ao/key-management.c
@@ -629,11 +629,11 @@ static int key_collection_socket(bool server, unsigned int port)
}
static void verify_counters(const char *tst_name, bool is_listen_sk, bool server,
- struct tcp_ao_counters *a, struct tcp_ao_counters *b)
+ struct tcp_counters *a, struct tcp_counters *b)
{
unsigned int i;
- __test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD);
+ test_assert_counters_sk(tst_name, a, b, TEST_CNT_GOOD);
for (i = 0; i < collection.nr_keys; i++) {
struct test_key *key = &collection.keys[i];
@@ -652,12 +652,12 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server
rx_cnt_expected = key->used_on_server_tx;
}
- test_tcp_ao_key_counters_cmp(tst_name, a, b,
- rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0,
- sndid, rcvid);
+ test_assert_counters_key(tst_name, &a->ao, &b->ao,
+ rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0,
+ sndid, rcvid);
}
- test_tcp_ao_counters_free(a);
- test_tcp_ao_counters_free(b);
+ test_tcp_counters_free(a);
+ test_tcp_counters_free(b);
test_ok("%s: passed counters checks", tst_name);
}
@@ -791,17 +791,17 @@ out:
}
static int start_server(const char *tst_name, unsigned int port, size_t quota,
- struct tcp_ao_counters *begin,
+ struct tcp_counters *begin,
unsigned int current_index, unsigned int rnext_index)
{
- struct tcp_ao_counters lsk_c1, lsk_c2;
+ struct tcp_counters lsk_c1, lsk_c2;
ssize_t bytes;
int sk, lsk;
synchronize_threads(); /* 1: key collection initialized */
lsk = key_collection_socket(true, port);
- if (test_get_tcp_ao_counters(lsk, &lsk_c1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(lsk, &lsk_c1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 2: MKTs added => connect() */
if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
test_error("test_wait_fd()");
@@ -809,12 +809,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota,
sk = accept(lsk, NULL, NULL);
if (sk < 0)
test_error("accept()");
- if (test_get_tcp_ao_counters(sk, begin))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, begin))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 3: accepted => send data */
- if (test_get_tcp_ao_counters(lsk, &lsk_c2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(lsk, &lsk_c2))
+ test_error("test_get_tcp_counters()");
verify_keys(tst_name, lsk, true, true);
close(lsk);
@@ -830,12 +830,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota,
}
static void end_server(const char *tst_name, int sk,
- struct tcp_ao_counters *begin)
+ struct tcp_counters *begin)
{
- struct tcp_ao_counters end;
+ struct tcp_counters end;
- if (test_get_tcp_ao_counters(sk, &end))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &end))
+ test_error("test_get_tcp_counters()");
verify_keys(tst_name, sk, false, true);
synchronize_threads(); /* 4: verified => closed */
@@ -848,7 +848,7 @@ static void end_server(const char *tst_name, int sk,
static void try_server_run(const char *tst_name, unsigned int port, size_t quota,
unsigned int current_index, unsigned int rnext_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
int sk;
sk = start_server(tst_name, port, quota, &tmp,
@@ -860,7 +860,7 @@ static void server_rotations(const char *tst_name, unsigned int port,
size_t quota, unsigned int rotations,
unsigned int current_index, unsigned int rnext_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
unsigned int i;
int sk;
@@ -886,7 +886,7 @@ static void server_rotations(const char *tst_name, unsigned int port,
static int run_client(const char *tst_name, unsigned int port,
unsigned int nr_keys, int current_index, int rnext_index,
- struct tcp_ao_counters *before,
+ struct tcp_counters *before,
const size_t msg_sz, const size_t msg_nr)
{
int sk;
@@ -904,8 +904,8 @@ static int run_client(const char *tst_name, unsigned int port,
if (test_set_key(sk, sndid, rcvid))
test_error("failed to set current/rnext keys");
}
- if (before && test_get_tcp_ao_counters(sk, before))
- test_error("test_get_tcp_ao_counters()");
+ if (before && test_get_tcp_counters(sk, before))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 2: MKTs added => connect() */
if (test_connect_socket(sk, this_ip_dest, port++) <= 0)
@@ -918,11 +918,11 @@ static int run_client(const char *tst_name, unsigned int port,
collection.keys[rnext_index].used_on_server_tx = 1;
synchronize_threads(); /* 3: accepted => send data */
- if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, msg_sz, msg_nr)) {
test_fail("verify failed");
close(sk);
if (before)
- test_tcp_ao_counters_free(before);
+ test_tcp_counters_free(before);
return -1;
}
@@ -931,7 +931,7 @@ static int run_client(const char *tst_name, unsigned int port,
static int start_client(const char *tst_name, unsigned int port,
unsigned int nr_keys, int current_index, int rnext_index,
- struct tcp_ao_counters *before,
+ struct tcp_counters *before,
const size_t msg_sz, const size_t msg_nr)
{
if (init_default_key_collection(nr_keys, true))
@@ -943,9 +943,9 @@ static int start_client(const char *tst_name, unsigned int port,
static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
int current_index, int rnext_index,
- struct tcp_ao_counters *start)
+ struct tcp_counters *start)
{
- struct tcp_ao_counters end;
+ struct tcp_counters end;
/* Some application may become dependent on this kernel choice */
if (current_index < 0)
@@ -955,8 +955,8 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
verify_current_rnext(tst_name, sk,
collection.keys[current_index].client_keyid,
collection.keys[rnext_index].server_keyid);
- if (start && test_get_tcp_ao_counters(sk, &end))
- test_error("test_get_tcp_ao_counters()");
+ if (start && test_get_tcp_counters(sk, &end))
+ test_error("test_get_tcp_counters()");
verify_keys(tst_name, sk, false, false);
synchronize_threads(); /* 4: verify => closed */
close(sk);
@@ -965,7 +965,7 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
synchronize_threads(); /* 5: counters */
}
-static void try_unmatched_keys(int sk, int *rnext_index)
+static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port)
{
struct test_key *key;
unsigned int i = 0;
@@ -1013,7 +1013,10 @@ static void try_unmatched_keys(int sk, int *rnext_index)
test_error("all keys on server match the client");
if (test_set_key(sk, -1, key->server_keyid))
test_error("Can't change the current key");
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1,
+ -1, key->server_keyid, -1);
+ if (test_client_verify(sk, msg_len, nr_packets))
test_fail("verify failed");
*rnext_index = i;
}
@@ -1045,7 +1048,7 @@ static void check_current_back(const char *tst_name, unsigned int port,
unsigned int current_index, unsigned int rnext_index,
unsigned int rotate_to_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
int sk;
sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
@@ -1054,7 +1057,11 @@ static void check_current_back(const char *tst_name, unsigned int port,
return;
if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1))
test_error("Can't change the current key");
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_dest, this_ip_addr,
+ port, -1, 0, -1, -1, -1, -1, -1,
+ collection.keys[rotate_to_index].client_keyid,
+ collection.keys[current_index].client_keyid, -1);
+ if (test_client_verify(sk, msg_len, nr_packets))
test_fail("verify failed");
/* There is a race here: between setting the current_key with
* setsockopt(TCP_AO_INFO) and starting to send some data - there
@@ -1074,7 +1081,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
unsigned int nr_keys, unsigned int rotations,
unsigned int current_index, unsigned int rnext_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
unsigned int i;
int sk;
@@ -1085,12 +1092,17 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
for (i = rnext_index + 1; rotations > 0; i++, rotations--) {
if (i >= collection.nr_keys)
i = 0;
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST,
+ this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1,
+ i == 0 ? -1 : collection.keys[i - 1].server_keyid,
+ collection.keys[i].server_keyid, -1);
if (test_set_key(sk, -1, collection.keys[i].server_keyid))
test_error("Can't change the Rnext key");
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, msg_len, nr_packets)) {
test_fail("verify failed");
close(sk);
- test_tcp_ao_counters_free(&tmp);
+ test_tcp_counters_free(&tmp);
return;
}
verify_current_rnext(tst_name, sk, -1,
@@ -1104,7 +1116,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
static void try_client_run(const char *tst_name, unsigned int port,
unsigned int nr_keys, int current_index, int rnext_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
int sk;
sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
@@ -1124,7 +1136,7 @@ static void try_client_match(const char *tst_name, unsigned int port,
rnext_index, msg_len, nr_packets);
if (sk < 0)
return;
- try_unmatched_keys(sk, &rnext_index);
+ try_unmatched_keys(sk, &rnext_index, port);
end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL);
}
@@ -1181,6 +1193,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(120, server_fn, client_fn);
+ test_init(121, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
index fbc7f6111815..ebb2899c12fe 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h
+++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
@@ -37,17 +37,59 @@ extern void __test_xfail(const char *buf);
extern void __test_error(const char *buf);
extern void __test_skip(const char *buf);
-__attribute__((__format__(__printf__, 2, 3)))
-static inline void __test_print(void (*fn)(const char *), const char *fmt, ...)
+static inline char *test_snprintf(const char *fmt, va_list vargs)
{
-#define TEST_MSG_BUFFER_SIZE 4096
- char buf[TEST_MSG_BUFFER_SIZE];
- va_list arg;
-
- va_start(arg, fmt);
- vsnprintf(buf, sizeof(buf), fmt, arg);
- va_end(arg);
- fn(buf);
+ char *ret = NULL;
+ size_t size = 0;
+ va_list tmp;
+ int n = 0;
+
+ va_copy(tmp, vargs);
+ n = vsnprintf(ret, size, fmt, tmp);
+ va_end(tmp);
+ if (n < 0)
+ return NULL;
+
+ size = n + 1;
+ ret = malloc(size);
+ if (!ret)
+ return NULL;
+
+ n = vsnprintf(ret, size, fmt, vargs);
+ if (n < 0 || n > size - 1) {
+ free(ret);
+ return NULL;
+ }
+ return ret;
+}
+
+static __printf(1, 2) inline char *test_sprintf(const char *fmt, ...)
+{
+ va_list vargs;
+ char *ret;
+
+ va_start(vargs, fmt);
+ ret = test_snprintf(fmt, vargs);
+ va_end(vargs);
+
+ return ret;
+}
+
+static __printf(2, 3) inline void __test_print(void (*fn)(const char *),
+ const char *fmt, ...)
+{
+ va_list vargs;
+ char *msg;
+
+ va_start(vargs, fmt);
+ msg = test_snprintf(fmt, vargs);
+ va_end(vargs);
+
+ if (!msg)
+ return;
+
+ fn(msg);
+ free(msg);
}
#define test_print(fmt, ...) \
@@ -103,6 +145,7 @@ enum test_needs_kconfig {
KCONFIG_TCP_AO, /* required */
KCONFIG_TCP_MD5, /* optional, for TCP-MD5 features */
KCONFIG_NET_VRF, /* optional, for L3/VRF testing */
+ KCONFIG_FTRACE, /* optional, for tracepoints checks */
__KCONFIG_LAST__
};
extern bool kernel_config_has(enum test_needs_kconfig k);
@@ -142,6 +185,8 @@ static inline void test_init2(unsigned int ntests,
__test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2);
}
extern void test_add_destructor(void (*d)(void));
+extern void test_init_ftrace(int nsfd1, int nsfd2);
+extern int test_setup_tracing(void);
/* To adjust optmem socket limit, approximately estimate a number,
* that is bigger than sizeof(struct tcp_ao_key).
@@ -216,12 +261,17 @@ static inline void test_init(unsigned int ntests,
}
extern void synchronize_threads(void);
extern void switch_ns(int fd);
+extern int switch_save_ns(int fd);
+extern void switch_close_ns(int fd);
extern __thread union tcp_addr this_ip_addr;
extern __thread union tcp_addr this_ip_dest;
extern int test_family;
extern void randomize_buffer(void *buf, size_t buflen);
+extern __printf(3, 4) int test_echo(const char *fname, bool append,
+ const char *fmt, ...);
+
extern int open_netns(void);
extern int unshare_open_netns(void);
extern const char veth_name[];
@@ -239,7 +289,7 @@ extern int link_set_up(const char *intf);
extern const unsigned int test_server_port;
extern int test_wait_fd(int sk, time_t sec, bool write);
extern int __test_connect_socket(int sk, const char *device,
- void *addr, size_t addr_sz, time_t timeout);
+ void *addr, size_t addr_sz, bool async);
extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz);
static inline int test_listen_socket(const union tcp_addr taddr,
@@ -281,25 +331,26 @@ static inline int test_listen_socket(const union tcp_addr taddr,
* If set to 0 - kernel will try to retransmit SYN number of times, set in
* /proc/sys/net/ipv4/tcp_syn_retries
* By default set to 1 to make tests pass faster on non-busy machine.
+ * [in process of removal, don't use in new tests]
*/
#ifndef TEST_RETRANSMIT_SEC
#define TEST_RETRANSMIT_SEC 1
#endif
static inline int _test_connect_socket(int sk, const union tcp_addr taddr,
- unsigned int port, time_t timeout)
+ unsigned int port, bool async)
{
sockaddr_af addr;
tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
return __test_connect_socket(sk, veth_name,
- (void *)&addr, sizeof(addr), timeout);
+ (void *)&addr, sizeof(addr), async);
}
static inline int test_connect_socket(int sk, const union tcp_addr taddr,
unsigned int port)
{
- return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC);
+ return _test_connect_socket(sk, taddr, port, false);
}
extern int __test_set_md5(int sk, void *addr, size_t addr_sz,
@@ -433,10 +484,7 @@ static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps)
}
extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec);
-extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
- const size_t msg_len, time_t timeout_sec);
-extern int test_client_verify(int sk, const size_t msg_len, const size_t nr,
- time_t timeout_sec);
+extern int test_client_verify(int sk, const size_t msg_len, const size_t nr);
struct tcp_ao_key_counters {
uint8_t sndid;
@@ -462,7 +510,15 @@ struct tcp_ao_counters {
size_t nr_keys;
struct tcp_ao_key_counters *key_cnts;
};
-extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out);
+
+struct tcp_counters {
+ struct tcp_ao_counters ao;
+ uint64_t netns_md5_notfound;
+ uint64_t netns_md5_unexpected;
+ uint64_t netns_md5_failure;
+};
+
+extern int test_get_tcp_counters(int sk, struct tcp_counters *out);
#define TEST_CNT_KEY_GOOD BIT(0)
#define TEST_CNT_KEY_BAD BIT(1)
@@ -476,8 +532,31 @@ extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out);
#define TEST_CNT_NS_KEY_NOT_FOUND BIT(9)
#define TEST_CNT_NS_AO_REQUIRED BIT(10)
#define TEST_CNT_NS_DROPPED_ICMP BIT(11)
+#define TEST_CNT_NS_MD5_NOT_FOUND BIT(12)
+#define TEST_CNT_NS_MD5_UNEXPECTED BIT(13)
+#define TEST_CNT_NS_MD5_FAILURE BIT(14)
typedef uint16_t test_cnt;
+#define _for_each_counter(f) \
+do { \
+ /* per-netns */ \
+ f(ao.netns_ao_good, TEST_CNT_NS_GOOD); \
+ f(ao.netns_ao_bad, TEST_CNT_NS_BAD); \
+ f(ao.netns_ao_key_not_found, TEST_CNT_NS_KEY_NOT_FOUND); \
+ f(ao.netns_ao_required, TEST_CNT_NS_AO_REQUIRED); \
+ f(ao.netns_ao_dropped_icmp, TEST_CNT_NS_DROPPED_ICMP); \
+ /* per-socket */ \
+ f(ao.ao_info_pkt_good, TEST_CNT_SOCK_GOOD); \
+ f(ao.ao_info_pkt_bad, TEST_CNT_SOCK_BAD); \
+ f(ao.ao_info_pkt_key_not_found, TEST_CNT_SOCK_KEY_NOT_FOUND); \
+ f(ao.ao_info_pkt_ao_required, TEST_CNT_SOCK_AO_REQUIRED); \
+ f(ao.ao_info_pkt_dropped_icmp, TEST_CNT_SOCK_DROPPED_ICMP); \
+ /* non-AO */ \
+ f(netns_md5_notfound, TEST_CNT_NS_MD5_NOT_FOUND); \
+ f(netns_md5_unexpected, TEST_CNT_NS_MD5_UNEXPECTED); \
+ f(netns_md5_failure, TEST_CNT_NS_MD5_FAILURE); \
+} while (0)
+
#define TEST_CNT_AO_GOOD (TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD)
#define TEST_CNT_AO_BAD (TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD)
#define TEST_CNT_AO_KEY_NOT_FOUND (TEST_CNT_SOCK_KEY_NOT_FOUND | \
@@ -489,34 +568,71 @@ typedef uint16_t test_cnt;
#define TEST_CNT_GOOD (TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD)
#define TEST_CNT_BAD (TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD)
-extern int __test_tcp_ao_counters_cmp(const char *tst_name,
- struct tcp_ao_counters *before, struct tcp_ao_counters *after,
+extern test_cnt test_cmp_counters(struct tcp_counters *before,
+ struct tcp_counters *after);
+extern int test_assert_counters_sk(const char *tst_name,
+ struct tcp_counters *before, struct tcp_counters *after,
test_cnt expected);
-extern int test_tcp_ao_key_counters_cmp(const char *tst_name,
+extern int test_assert_counters_key(const char *tst_name,
struct tcp_ao_counters *before, struct tcp_ao_counters *after,
test_cnt expected, int sndid, int rcvid);
-extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts);
+extern void test_tcp_counters_free(struct tcp_counters *cnts);
+
+/*
+ * Polling for netns and socket counters during select()/connect() and also
+ * client/server messaging. Instead of constant timeout on underlying select(),
+ * check the counters and return early. This allows to pass the tests where
+ * timeout is expected without waiting for that fixing timeout (tests speed-up).
+ * Previously shorter timeouts were used for tests expecting to time out,
+ * but that leaded to sporadic false positives on counter checks failures,
+ * as one second timeouts aren't enough for TCP retransmit.
+ *
+ * Two sides of the socketpair (client/server) should synchronize failures
+ * using a shared variable *err, so that they can detect the other side's
+ * failure.
+ */
+extern int test_skpair_wait_poll(int sk, bool write, test_cnt cond,
+ volatile int *err);
+extern int _test_skpair_connect_poll(int sk, const char *device,
+ void *addr, size_t addr_sz,
+ test_cnt cond, volatile int *err);
+static inline int test_skpair_connect_poll(int sk, const union tcp_addr taddr,
+ unsigned int port,
+ test_cnt cond, volatile int *err)
+{
+ sockaddr_af addr;
+
+ tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
+ return _test_skpair_connect_poll(sk, veth_name,
+ (void *)&addr, sizeof(addr), cond, err);
+}
+
+extern int test_skpair_client(int sk, const size_t msg_len, const size_t nr,
+ test_cnt cond, volatile int *err);
+extern int test_skpair_server(int sk, ssize_t quota,
+ test_cnt cond, volatile int *err);
+
/*
- * Frees buffers allocated in test_get_tcp_ao_counters().
+ * Frees buffers allocated in test_get_tcp_counters().
* The function doesn't expect new keys or keys removed between calls
- * to test_get_tcp_ao_counters(). Check key counters manually if they
+ * to test_get_tcp_counters(). Check key counters manually if they
* may change.
*/
-static inline int test_tcp_ao_counters_cmp(const char *tst_name,
- struct tcp_ao_counters *before,
- struct tcp_ao_counters *after,
- test_cnt expected)
+static inline int test_assert_counters(const char *tst_name,
+ struct tcp_counters *before,
+ struct tcp_counters *after,
+ test_cnt expected)
{
int ret;
- ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected);
+ ret = test_assert_counters_sk(tst_name, before, after, expected);
if (ret)
goto out;
- ret = test_tcp_ao_key_counters_cmp(tst_name, before, after,
- expected, -1, -1);
+ ret = test_assert_counters_key(tst_name, &before->ao, &after->ao,
+ expected, -1, -1);
out:
- test_tcp_ao_counters_free(before);
- test_tcp_ao_counters_free(after);
+ test_tcp_counters_free(before);
+ test_tcp_counters_free(after);
return ret;
}
@@ -602,4 +718,115 @@ static inline int test_add_repaired_key(int sk,
return test_verify_socket_key(sk, &tmp);
}
+#define DEFAULT_FTRACE_BUFFER_KB 10000
+#define DEFAULT_TRACER_LINES_ARR 200
+struct test_ftracer;
+extern uint64_t ns_cookie1, ns_cookie2;
+
+enum ftracer_op {
+ FTRACER_LINE_DISCARD = 0,
+ FTRACER_LINE_PRESERVE,
+ FTRACER_EXIT,
+};
+
+extern struct test_ftracer *create_ftracer(const char *name,
+ enum ftracer_op (*process_line)(const char *line),
+ void (*destructor)(struct test_ftracer *tracer),
+ bool (*expecting_more)(void),
+ size_t lines_buf_sz, size_t buffer_size_kb);
+extern int setup_trace_event(struct test_ftracer *tracer,
+ const char *event, const char *filter);
+extern void destroy_ftracer(struct test_ftracer *tracer);
+extern const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer);
+extern const char **tracer_get_savedlines(struct test_ftracer *tracer);
+
+enum trace_events {
+ /* TCP_HASH_EVENT */
+ TCP_HASH_BAD_HEADER = 0,
+ TCP_HASH_MD5_REQUIRED,
+ TCP_HASH_MD5_UNEXPECTED,
+ TCP_HASH_MD5_MISMATCH,
+ TCP_HASH_AO_REQUIRED,
+ /* TCP_AO_EVENT */
+ TCP_AO_HANDSHAKE_FAILURE,
+ TCP_AO_WRONG_MACLEN,
+ TCP_AO_MISMATCH,
+ TCP_AO_KEY_NOT_FOUND,
+ TCP_AO_RNEXT_REQUEST,
+ /* TCP_AO_EVENT_SK */
+ TCP_AO_SYNACK_NO_KEY,
+ /* TCP_AO_EVENT_SNE */
+ TCP_AO_SND_SNE_UPDATE,
+ TCP_AO_RCV_SNE_UPDATE,
+ __MAX_TRACE_EVENTS
+};
+
+extern int __trace_event_expect(enum trace_events type, int family,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int L3index,
+ int fin, int syn, int rst, int psh, int ack,
+ int keyid, int rnext, int maclen, int sne);
+
+static inline void trace_hash_event_expect(enum trace_events type,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int L3index,
+ int fin, int syn, int rst, int psh, int ack)
+{
+ int err;
+
+ err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+ src_port, dst_port, L3index,
+ fin, syn, rst, psh, ack,
+ -1, -1, -1, -1);
+ if (err)
+ test_error("Couldn't add a trace event: %d", err);
+}
+
+static inline void trace_ao_event_expect(enum trace_events type,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int L3index,
+ int fin, int syn, int rst, int psh, int ack,
+ int keyid, int rnext, int maclen)
+{
+ int err;
+
+ err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+ src_port, dst_port, L3index,
+ fin, syn, rst, psh, ack,
+ keyid, rnext, maclen, -1);
+ if (err)
+ test_error("Couldn't add a trace event: %d", err);
+}
+
+static inline void trace_ao_event_sk_expect(enum trace_events type,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port,
+ int keyid, int rnext)
+{
+ int err;
+
+ err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+ src_port, dst_port, -1,
+ -1, -1, -1, -1, -1,
+ keyid, rnext, -1, -1);
+ if (err)
+ test_error("Couldn't add a trace event: %d", err);
+}
+
+static inline void trace_ao_event_sne_expect(enum trace_events type,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int sne)
+{
+ int err;
+
+ err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+ src_port, dst_port, -1,
+ -1, -1, -1, -1, -1,
+ -1, -1, -1, sne);
+ if (err)
+ test_error("Couldn't add a trace event: %d", err);
+}
+
+extern int setup_aolib_ftracer(void);
+
#endif /* _AOLIB_H_ */
diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
new file mode 100644
index 000000000000..27403f875054
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
@@ -0,0 +1,556 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <pthread.h>
+#include "aolib.h"
+
+static const char *trace_event_names[__MAX_TRACE_EVENTS] = {
+ /* TCP_HASH_EVENT */
+ "tcp_hash_bad_header",
+ "tcp_hash_md5_required",
+ "tcp_hash_md5_unexpected",
+ "tcp_hash_md5_mismatch",
+ "tcp_hash_ao_required",
+ /* TCP_AO_EVENT */
+ "tcp_ao_handshake_failure",
+ "tcp_ao_wrong_maclen",
+ "tcp_ao_mismatch",
+ "tcp_ao_key_not_found",
+ "tcp_ao_rnext_request",
+ /* TCP_AO_EVENT_SK */
+ "tcp_ao_synack_no_key",
+ /* TCP_AO_EVENT_SNE */
+ "tcp_ao_snd_sne_update",
+ "tcp_ao_rcv_sne_update"
+};
+
+struct expected_trace_point {
+ /* required */
+ enum trace_events type;
+ int family;
+ union tcp_addr src;
+ union tcp_addr dst;
+
+ /* optional */
+ int src_port;
+ int dst_port;
+ int L3index;
+
+ int fin;
+ int syn;
+ int rst;
+ int psh;
+ int ack;
+
+ int keyid;
+ int rnext;
+ int maclen;
+ int sne;
+
+ size_t matched;
+};
+
+static struct expected_trace_point *exp_tps;
+static size_t exp_tps_nr;
+static size_t exp_tps_size;
+static pthread_mutex_t exp_tps_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int __trace_event_expect(enum trace_events type, int family,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int L3index,
+ int fin, int syn, int rst, int psh, int ack,
+ int keyid, int rnext, int maclen, int sne)
+{
+ struct expected_trace_point new_tp = {
+ .type = type,
+ .family = family,
+ .src = src,
+ .dst = dst,
+ .src_port = src_port,
+ .dst_port = dst_port,
+ .L3index = L3index,
+ .fin = fin,
+ .syn = syn,
+ .rst = rst,
+ .psh = psh,
+ .ack = ack,
+ .keyid = keyid,
+ .rnext = rnext,
+ .maclen = maclen,
+ .sne = sne,
+ .matched = 0,
+ };
+ int ret = 0;
+
+ if (!kernel_config_has(KCONFIG_FTRACE))
+ return 0;
+
+ pthread_mutex_lock(&exp_tps_mutex);
+ if (exp_tps_nr == exp_tps_size) {
+ struct expected_trace_point *tmp;
+
+ if (exp_tps_size == 0)
+ exp_tps_size = 10;
+ else
+ exp_tps_size = exp_tps_size * 1.6;
+
+ tmp = reallocarray(exp_tps, exp_tps_size, sizeof(exp_tps[0]));
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ exp_tps = tmp;
+ }
+ exp_tps[exp_tps_nr] = new_tp;
+ exp_tps_nr++;
+out:
+ pthread_mutex_unlock(&exp_tps_mutex);
+ return ret;
+}
+
+static void free_expected_events(void)
+{
+ /* We're from the process destructor - not taking the mutex */
+ exp_tps_size = 0;
+ exp_tps = NULL;
+ free(exp_tps);
+}
+
+struct trace_point {
+ int family;
+ union tcp_addr src;
+ union tcp_addr dst;
+ unsigned int src_port;
+ unsigned int dst_port;
+ int L3index;
+ unsigned int fin:1,
+ syn:1,
+ rst:1,
+ psh:1,
+ ack:1;
+
+ unsigned int keyid;
+ unsigned int rnext;
+ unsigned int maclen;
+
+ unsigned int sne;
+};
+
+static bool lookup_expected_event(int event_type, struct trace_point *e)
+{
+ size_t i;
+
+ pthread_mutex_lock(&exp_tps_mutex);
+ for (i = 0; i < exp_tps_nr; i++) {
+ struct expected_trace_point *p = &exp_tps[i];
+ size_t sk_size;
+
+ if (p->type != event_type)
+ continue;
+ if (p->family != e->family)
+ continue;
+ if (p->family == AF_INET)
+ sk_size = sizeof(p->src.a4);
+ else
+ sk_size = sizeof(p->src.a6);
+ if (memcmp(&p->src, &e->src, sk_size))
+ continue;
+ if (memcmp(&p->dst, &e->dst, sk_size))
+ continue;
+ if (p->src_port >= 0 && p->src_port != e->src_port)
+ continue;
+ if (p->dst_port >= 0 && p->dst_port != e->dst_port)
+ continue;
+ if (p->L3index >= 0 && p->L3index != e->L3index)
+ continue;
+
+ if (p->fin >= 0 && p->fin != e->fin)
+ continue;
+ if (p->syn >= 0 && p->syn != e->syn)
+ continue;
+ if (p->rst >= 0 && p->rst != e->rst)
+ continue;
+ if (p->psh >= 0 && p->psh != e->psh)
+ continue;
+ if (p->ack >= 0 && p->ack != e->ack)
+ continue;
+
+ if (p->keyid >= 0 && p->keyid != e->keyid)
+ continue;
+ if (p->rnext >= 0 && p->rnext != e->rnext)
+ continue;
+ if (p->maclen >= 0 && p->maclen != e->maclen)
+ continue;
+ if (p->sne >= 0 && p->sne != e->sne)
+ continue;
+ p->matched++;
+ pthread_mutex_unlock(&exp_tps_mutex);
+ return true;
+ }
+ pthread_mutex_unlock(&exp_tps_mutex);
+ return false;
+}
+
+static int check_event_type(const char *line)
+{
+ size_t i;
+
+ /*
+ * This should have been a set or hashmap, but it's a selftest,
+ * so... KISS.
+ */
+ for (i = 0; i < __MAX_TRACE_EVENTS; i++) {
+ if (!strncmp(trace_event_names[i], line, strlen(trace_event_names[i])))
+ return i;
+ }
+ return -1;
+}
+
+static bool event_has_flags(enum trace_events event)
+{
+ switch (event) {
+ case TCP_HASH_BAD_HEADER:
+ case TCP_HASH_MD5_REQUIRED:
+ case TCP_HASH_MD5_UNEXPECTED:
+ case TCP_HASH_MD5_MISMATCH:
+ case TCP_HASH_AO_REQUIRED:
+ case TCP_AO_HANDSHAKE_FAILURE:
+ case TCP_AO_WRONG_MACLEN:
+ case TCP_AO_MISMATCH:
+ case TCP_AO_KEY_NOT_FOUND:
+ case TCP_AO_RNEXT_REQUEST:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int tracer_ip_split(int family, char *src, char **addr, char **port)
+{
+ char *p;
+
+ if (family == AF_INET) {
+ /* fomat is <addr>:port, i.e.: 10.0.254.1:7015 */
+ *addr = src;
+ p = strchr(src, ':');
+ if (!p) {
+ test_print("Couldn't parse trace event addr:port %s", src);
+ return -EINVAL;
+ }
+ *p++ = '\0';
+ *port = p;
+ return 0;
+ }
+ if (family != AF_INET6)
+ return -EAFNOSUPPORT;
+
+ /* format is [<addr>]:port, i.e.: [2001:db8:254::1]:7013 */
+ *addr = strchr(src, '[');
+ p = strchr(src, ']');
+
+ if (!p || !*addr) {
+ test_print("Couldn't parse trace event [addr]:port %s", src);
+ return -EINVAL;
+ }
+
+ *addr = *addr + 1; /* '[' */
+ *p++ = '\0'; /* ']' */
+ if (*p != ':') {
+ test_print("Couldn't parse trace event :port %s", p);
+ return -EINVAL;
+ }
+ *p++ = '\0'; /* ':' */
+ *port = p;
+ return 0;
+}
+
+static int tracer_scan_address(int family, char *src,
+ union tcp_addr *dst, unsigned int *port)
+{
+ char *addr, *port_str;
+ int ret;
+
+ ret = tracer_ip_split(family, src, &addr, &port_str);
+ if (ret)
+ return ret;
+
+ if (inet_pton(family, addr, dst) != 1) {
+ test_print("Couldn't parse trace event addr %s", addr);
+ return -EINVAL;
+ }
+ errno = 0;
+ *port = (unsigned int)strtoul(port_str, NULL, 10);
+ if (errno != 0) {
+ test_print("Couldn't parse trace event port %s", port_str);
+ return -errno;
+ }
+ return 0;
+}
+
+static int tracer_scan_event(const char *line, enum trace_events event,
+ struct trace_point *out)
+{
+ char *src = NULL, *dst = NULL, *family = NULL;
+ char fin, syn, rst, psh, ack;
+ int nr_matched, ret = 0;
+ uint64_t netns_cookie;
+
+ switch (event) {
+ case TCP_HASH_BAD_HEADER:
+ case TCP_HASH_MD5_REQUIRED:
+ case TCP_HASH_MD5_UNEXPECTED:
+ case TCP_HASH_MD5_MISMATCH:
+ case TCP_HASH_AO_REQUIRED: {
+ nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c]",
+ &netns_cookie, &family,
+ &src, &dst, &out->L3index,
+ &fin, &syn, &rst, &psh, &ack);
+ if (nr_matched != 10)
+ test_print("Couldn't parse trace event, matched = %d/10",
+ nr_matched);
+ break;
+ }
+ case TCP_AO_HANDSHAKE_FAILURE:
+ case TCP_AO_WRONG_MACLEN:
+ case TCP_AO_MISMATCH:
+ case TCP_AO_KEY_NOT_FOUND:
+ case TCP_AO_RNEXT_REQUEST: {
+ nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u",
+ &netns_cookie, &family,
+ &src, &dst, &out->L3index,
+ &fin, &syn, &rst, &psh, &ack,
+ &out->keyid, &out->rnext, &out->maclen);
+ if (nr_matched != 13)
+ test_print("Couldn't parse trace event, matched = %d/13",
+ nr_matched);
+ break;
+ }
+ case TCP_AO_SYNACK_NO_KEY: {
+ nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms keyid=%u rnext=%u",
+ &netns_cookie, &family,
+ &src, &dst, &out->keyid, &out->rnext);
+ if (nr_matched != 6)
+ test_print("Couldn't parse trace event, matched = %d/6",
+ nr_matched);
+ break;
+ }
+ case TCP_AO_SND_SNE_UPDATE:
+ case TCP_AO_RCV_SNE_UPDATE: {
+ nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms sne=%u",
+ &netns_cookie, &family,
+ &src, &dst, &out->sne);
+ if (nr_matched != 5)
+ test_print("Couldn't parse trace event, matched = %d/5",
+ nr_matched);
+ break;
+ }
+ default:
+ return -1;
+ }
+
+ if (family) {
+ if (!strcmp(family, "AF_INET")) {
+ out->family = AF_INET;
+ } else if (!strcmp(family, "AF_INET6")) {
+ out->family = AF_INET6;
+ } else {
+ test_print("Couldn't parse trace event family %s", family);
+ ret = -EINVAL;
+ goto out_free;
+ }
+ }
+
+ if (event_has_flags(event)) {
+ out->fin = (fin == 'F');
+ out->syn = (syn == 'S');
+ out->rst = (rst == 'R');
+ out->psh = (psh == 'P');
+ out->ack = (ack == '.');
+
+ if ((fin != 'F' && fin != ' ') ||
+ (syn != 'S' && syn != ' ') ||
+ (rst != 'R' && rst != ' ') ||
+ (psh != 'P' && psh != ' ') ||
+ (ack != '.' && ack != ' ')) {
+ test_print("Couldn't parse trace event flags %c%c%c%c%c",
+ fin, syn, rst, psh, ack);
+ ret = -EINVAL;
+ goto out_free;
+ }
+ }
+
+ if (src && tracer_scan_address(out->family, src, &out->src, &out->src_port)) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ if (dst && tracer_scan_address(out->family, dst, &out->dst, &out->dst_port)) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ if (netns_cookie != ns_cookie1 && netns_cookie != ns_cookie2) {
+ test_print("Net namespace filter for trace event didn't work: %" PRIu64 " != %" PRIu64 " OR %" PRIu64,
+ netns_cookie, ns_cookie1, ns_cookie2);
+ ret = -EINVAL;
+ }
+
+out_free:
+ free(src);
+ free(dst);
+ free(family);
+ return ret;
+}
+
+static enum ftracer_op aolib_tracer_process_event(const char *line)
+{
+ int event_type = check_event_type(line);
+ struct trace_point tmp = {};
+
+ if (event_type < 0)
+ return FTRACER_LINE_PRESERVE;
+
+ if (tracer_scan_event(line, event_type, &tmp))
+ return FTRACER_LINE_PRESERVE;
+
+ return lookup_expected_event(event_type, &tmp) ?
+ FTRACER_LINE_DISCARD : FTRACER_LINE_PRESERVE;
+}
+
+static void dump_trace_event(struct expected_trace_point *e)
+{
+ char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
+
+ if (!inet_ntop(e->family, &e->src, src, INET6_ADDRSTRLEN))
+ test_error("inet_ntop()");
+ if (!inet_ntop(e->family, &e->dst, dst, INET6_ADDRSTRLEN))
+ test_error("inet_ntop()");
+ test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu",
+ trace_event_names[e->type],
+ src, e->src_port, dst, e->dst_port, e->L3index,
+ e->fin ? "F" : "", e->syn ? "S" : "", e->rst ? "R" : "",
+ e->psh ? "P" : "", e->ack ? "." : "",
+ e->keyid, e->rnext, e->maclen, e->sne, e->matched);
+}
+
+static void print_match_stats(bool unexpected_events)
+{
+ size_t matches_per_type[__MAX_TRACE_EVENTS] = {};
+ bool expected_but_none = false;
+ size_t i, total_matched = 0;
+ char *stat_line = NULL;
+
+ for (i = 0; i < exp_tps_nr; i++) {
+ struct expected_trace_point *e = &exp_tps[i];
+
+ total_matched += e->matched;
+ matches_per_type[e->type] += e->matched;
+ if (!e->matched)
+ expected_but_none = true;
+ }
+ for (i = 0; i < __MAX_TRACE_EVENTS; i++) {
+ if (!matches_per_type[i])
+ continue;
+ stat_line = test_sprintf("%s%s[%zu] ", stat_line ?: "",
+ trace_event_names[i],
+ matches_per_type[i]);
+ if (!stat_line)
+ test_error("test_sprintf()");
+ }
+
+ if (unexpected_events || expected_but_none) {
+ for (i = 0; i < exp_tps_nr; i++)
+ dump_trace_event(&exp_tps[i]);
+ }
+
+ if (unexpected_events)
+ return;
+
+ if (expected_but_none)
+ test_fail("Some trace events were expected, but didn't occur");
+ else if (total_matched)
+ test_ok("Trace events matched expectations: %zu %s",
+ total_matched, stat_line);
+ else
+ test_ok("No unexpected trace events during the test run");
+}
+
+#define dump_events(fmt, ...) \
+ __test_print(__test_msg, fmt, ##__VA_ARGS__)
+static void check_free_events(struct test_ftracer *tracer)
+{
+ const char **lines;
+ size_t nr;
+
+ if (!kernel_config_has(KCONFIG_FTRACE)) {
+ test_skip("kernel config doesn't have ftrace - no checks");
+ return;
+ }
+
+ nr = tracer_get_savedlines_nr(tracer);
+ lines = tracer_get_savedlines(tracer);
+ print_match_stats(!!nr);
+ if (!nr)
+ return;
+
+ errno = 0;
+ test_xfail("Trace events [%zu] were not expected:", nr);
+ while (nr)
+ dump_events("\t%s", lines[--nr]);
+}
+
+static int setup_tcp_trace_events(struct test_ftracer *tracer)
+{
+ char *filter;
+ size_t i;
+ int ret;
+
+ filter = test_sprintf("net_cookie == %zu || net_cookie == %zu",
+ ns_cookie1, ns_cookie2);
+ if (!filter)
+ return -ENOMEM;
+
+ for (i = 0; i < __MAX_TRACE_EVENTS; i++) {
+ char *event_name = test_sprintf("tcp/%s", trace_event_names[i]);
+
+ if (!event_name) {
+ ret = -ENOMEM;
+ break;
+ }
+ ret = setup_trace_event(tracer, event_name, filter);
+ free(event_name);
+ if (ret)
+ break;
+ }
+
+ free(filter);
+ return ret;
+}
+
+static void aolib_tracer_destroy(struct test_ftracer *tracer)
+{
+ check_free_events(tracer);
+ free_expected_events();
+}
+
+static bool aolib_tracer_expecting_more(void)
+{
+ size_t i;
+
+ for (i = 0; i < exp_tps_nr; i++)
+ if (!exp_tps[i].matched)
+ return true;
+ return false;
+}
+
+int setup_aolib_ftracer(void)
+{
+ struct test_ftracer *f;
+
+ f = create_ftracer("aolib", aolib_tracer_process_event,
+ aolib_tracer_destroy, aolib_tracer_expecting_more,
+ DEFAULT_FTRACE_BUFFER_KB, DEFAULT_TRACER_LINES_ARR);
+ if (!f)
+ return -1;
+
+ return setup_tcp_trace_events(f);
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c
new file mode 100644
index 000000000000..e4d0b173bc94
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include "../../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+static char ftrace_path[] = "ksft-ftrace-XXXXXX";
+static bool ftrace_mounted;
+uint64_t ns_cookie1, ns_cookie2;
+
+struct test_ftracer {
+ pthread_t tracer_thread;
+ int error;
+ char *instance_path;
+ FILE *trace_pipe;
+
+ enum ftracer_op (*process_line)(const char *line);
+ void (*destructor)(struct test_ftracer *tracer);
+ bool (*expecting_more)(void);
+
+ char **saved_lines;
+ size_t saved_lines_size;
+ size_t next_line_ind;
+
+ pthread_cond_t met_all_expected;
+ pthread_mutex_t met_all_expected_lock;
+
+ struct test_ftracer *next;
+};
+
+static struct test_ftracer *ftracers;
+static pthread_mutex_t ftracers_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static int mount_ftrace(void)
+{
+ if (!mkdtemp(ftrace_path))
+ test_error("Can't create temp dir");
+
+ if (mount("tracefs", ftrace_path, "tracefs", 0, "rw"))
+ return -errno;
+
+ ftrace_mounted = true;
+
+ return 0;
+}
+
+static void unmount_ftrace(void)
+{
+ if (ftrace_mounted && umount(ftrace_path))
+ test_print("Failed on cleanup: can't unmount tracefs: %m");
+
+ if (rmdir(ftrace_path))
+ test_error("Failed on cleanup: can't remove ftrace dir %s",
+ ftrace_path);
+}
+
+struct opts_list_t {
+ char *opt_name;
+ struct opts_list_t *next;
+};
+
+static int disable_trace_options(const char *ftrace_path)
+{
+ struct opts_list_t *opts_list = NULL;
+ char *fopts, *line = NULL;
+ size_t buf_len = 0;
+ ssize_t line_len;
+ int ret = 0;
+ FILE *opts;
+
+ fopts = test_sprintf("%s/%s", ftrace_path, "trace_options");
+ if (!fopts)
+ return -ENOMEM;
+
+ opts = fopen(fopts, "r+");
+ if (!opts) {
+ ret = -errno;
+ goto out_free;
+ }
+
+ while ((line_len = getline(&line, &buf_len, opts)) != -1) {
+ struct opts_list_t *tmp;
+
+ if (!strncmp(line, "no", 2))
+ continue;
+
+ tmp = malloc(sizeof(*tmp));
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out_free_opts_list;
+ }
+ tmp->next = opts_list;
+ tmp->opt_name = test_sprintf("no%s", line);
+ if (!tmp->opt_name) {
+ ret = -ENOMEM;
+ free(tmp);
+ goto out_free_opts_list;
+ }
+ opts_list = tmp;
+ }
+
+ while (opts_list) {
+ struct opts_list_t *tmp = opts_list;
+
+ fseek(opts, 0, SEEK_SET);
+ fwrite(tmp->opt_name, 1, strlen(tmp->opt_name), opts);
+
+ opts_list = opts_list->next;
+ free(tmp->opt_name);
+ free(tmp);
+ }
+
+out_free_opts_list:
+ while (opts_list) {
+ struct opts_list_t *tmp = opts_list;
+
+ opts_list = opts_list->next;
+ free(tmp->opt_name);
+ free(tmp);
+ }
+ free(line);
+ fclose(opts);
+out_free:
+ free(fopts);
+ return ret;
+}
+
+static int setup_buffer_size(const char *ftrace_path, size_t sz)
+{
+ char *fbuf_size = test_sprintf("%s/buffer_size_kb", ftrace_path);
+ int ret;
+
+ if (!fbuf_size)
+ return -1;
+
+ ret = test_echo(fbuf_size, 0, "%zu", sz);
+ free(fbuf_size);
+ return ret;
+}
+
+static int setup_ftrace_instance(struct test_ftracer *tracer, const char *name)
+{
+ char *tmp;
+
+ tmp = test_sprintf("%s/instances/ksft-%s-XXXXXX", ftrace_path, name);
+ if (!tmp)
+ return -ENOMEM;
+
+ tracer->instance_path = mkdtemp(tmp);
+ if (!tracer->instance_path) {
+ free(tmp);
+ return -errno;
+ }
+
+ return 0;
+}
+
+static void remove_ftrace_instance(struct test_ftracer *tracer)
+{
+ if (rmdir(tracer->instance_path))
+ test_print("Failed on cleanup: can't remove ftrace instance %s",
+ tracer->instance_path);
+ free(tracer->instance_path);
+}
+
+static void tracer_cleanup(void *arg)
+{
+ struct test_ftracer *tracer = arg;
+
+ fclose(tracer->trace_pipe);
+}
+
+static void tracer_set_error(struct test_ftracer *tracer, int error)
+{
+ if (!tracer->error)
+ tracer->error = error;
+}
+
+const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer)
+{
+ return tracer->next_line_ind;
+}
+
+const char **tracer_get_savedlines(struct test_ftracer *tracer)
+{
+ return (const char **)tracer->saved_lines;
+}
+
+static void *tracer_thread_func(void *arg)
+{
+ struct test_ftracer *tracer = arg;
+
+ pthread_cleanup_push(tracer_cleanup, arg);
+
+ while (tracer->next_line_ind < tracer->saved_lines_size) {
+ char **lp = &tracer->saved_lines[tracer->next_line_ind];
+ enum ftracer_op op;
+ size_t buf_len = 0;
+ ssize_t line_len;
+
+ line_len = getline(lp, &buf_len, tracer->trace_pipe);
+ if (line_len == -1)
+ break;
+
+ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+ op = tracer->process_line(*lp);
+ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+
+ if (tracer->expecting_more) {
+ pthread_mutex_lock(&tracer->met_all_expected_lock);
+ if (!tracer->expecting_more())
+ pthread_cond_signal(&tracer->met_all_expected);
+ pthread_mutex_unlock(&tracer->met_all_expected_lock);
+ }
+
+ if (op == FTRACER_LINE_DISCARD)
+ continue;
+ if (op == FTRACER_EXIT)
+ break;
+ if (op != FTRACER_LINE_PRESERVE)
+ test_error("unexpected tracer command %d", op);
+
+ tracer->next_line_ind++;
+ buf_len = 0;
+ }
+ test_print("too many lines in ftracer buffer %zu, exiting tracer",
+ tracer->next_line_ind);
+
+ pthread_cleanup_pop(1);
+ return NULL;
+}
+
+static int setup_trace_thread(struct test_ftracer *tracer)
+{
+ int ret = 0;
+ char *path;
+
+ path = test_sprintf("%s/trace_pipe", tracer->instance_path);
+ if (!path)
+ return -ENOMEM;
+
+ tracer->trace_pipe = fopen(path, "r");
+ if (!tracer->trace_pipe) {
+ ret = -errno;
+ goto out_free;
+ }
+
+ if (pthread_create(&tracer->tracer_thread, NULL,
+ tracer_thread_func, (void *)tracer)) {
+ ret = -errno;
+ fclose(tracer->trace_pipe);
+ }
+
+out_free:
+ free(path);
+ return ret;
+}
+
+static void stop_trace_thread(struct test_ftracer *tracer)
+{
+ void *res;
+
+ if (pthread_cancel(tracer->tracer_thread)) {
+ test_print("Can't stop tracer pthread: %m");
+ tracer_set_error(tracer, -errno);
+ }
+ if (pthread_join(tracer->tracer_thread, &res)) {
+ test_print("Can't join tracer pthread: %m");
+ tracer_set_error(tracer, -errno);
+ }
+ if (res != PTHREAD_CANCELED) {
+ test_print("Tracer thread wasn't canceled");
+ tracer_set_error(tracer, -errno);
+ }
+ if (tracer->error)
+ test_fail("tracer errored by %s", strerror(tracer->error));
+}
+
+static void final_wait_for_events(struct test_ftracer *tracer,
+ unsigned timeout_sec)
+{
+ struct timespec timeout;
+ struct timeval now;
+ int ret = 0;
+
+ if (!tracer->expecting_more)
+ return;
+
+ pthread_mutex_lock(&tracer->met_all_expected_lock);
+ gettimeofday(&now, NULL);
+ timeout.tv_sec = now.tv_sec + timeout_sec;
+ timeout.tv_nsec = now.tv_usec * 1000;
+
+ while (tracer->expecting_more() && ret != ETIMEDOUT)
+ ret = pthread_cond_timedwait(&tracer->met_all_expected,
+ &tracer->met_all_expected_lock, &timeout);
+ pthread_mutex_unlock(&tracer->met_all_expected_lock);
+}
+
+int setup_trace_event(struct test_ftracer *tracer,
+ const char *event, const char *filter)
+{
+ char *enable_path, *filter_path, *instance = tracer->instance_path;
+ int ret;
+
+ enable_path = test_sprintf("%s/events/%s/enable", instance, event);
+ if (!enable_path)
+ return -ENOMEM;
+
+ filter_path = test_sprintf("%s/events/%s/filter", instance, event);
+ if (!filter_path) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ ret = test_echo(filter_path, 0, "%s", filter);
+ if (!ret)
+ ret = test_echo(enable_path, 0, "1");
+
+out_free:
+ free(filter_path);
+ free(enable_path);
+ return ret;
+}
+
+struct test_ftracer *create_ftracer(const char *name,
+ enum ftracer_op (*process_line)(const char *line),
+ void (*destructor)(struct test_ftracer *tracer),
+ bool (*expecting_more)(void),
+ size_t lines_buf_sz, size_t buffer_size_kb)
+{
+ struct test_ftracer *tracer;
+ int err;
+
+ /* XXX: separate __create_ftracer() helper and do here
+ * if (!kernel_config_has(KCONFIG_FTRACE))
+ * return NULL;
+ */
+
+ tracer = malloc(sizeof(*tracer));
+ if (!tracer) {
+ test_print("malloc()");
+ return NULL;
+ }
+
+ memset(tracer, 0, sizeof(*tracer));
+
+ err = setup_ftrace_instance(tracer, name);
+ if (err) {
+ test_print("setup_ftrace_instance(): %d", err);
+ goto err_free;
+ }
+
+ err = disable_trace_options(tracer->instance_path);
+ if (err) {
+ test_print("disable_trace_options(): %d", err);
+ goto err_remove;
+ }
+
+ err = setup_buffer_size(tracer->instance_path, buffer_size_kb);
+ if (err) {
+ test_print("disable_trace_options(): %d", err);
+ goto err_remove;
+ }
+
+ tracer->saved_lines = calloc(lines_buf_sz, sizeof(tracer->saved_lines[0]));
+ if (!tracer->saved_lines) {
+ test_print("calloc()");
+ goto err_remove;
+ }
+ tracer->saved_lines_size = lines_buf_sz;
+
+ tracer->process_line = process_line;
+ tracer->destructor = destructor;
+ tracer->expecting_more = expecting_more;
+
+ err = pthread_cond_init(&tracer->met_all_expected, NULL);
+ if (err) {
+ test_print("pthread_cond_init(): %d", err);
+ goto err_free_lines;
+ }
+
+ err = pthread_mutex_init(&tracer->met_all_expected_lock, NULL);
+ if (err) {
+ test_print("pthread_mutex_init(): %d", err);
+ goto err_cond_destroy;
+ }
+
+ err = setup_trace_thread(tracer);
+ if (err) {
+ test_print("setup_trace_thread(): %d", err);
+ goto err_mutex_destroy;
+ }
+
+ pthread_mutex_lock(&ftracers_lock);
+ tracer->next = ftracers;
+ ftracers = tracer;
+ pthread_mutex_unlock(&ftracers_lock);
+
+ return tracer;
+
+err_mutex_destroy:
+ pthread_mutex_destroy(&tracer->met_all_expected_lock);
+err_cond_destroy:
+ pthread_cond_destroy(&tracer->met_all_expected);
+err_free_lines:
+ free(tracer->saved_lines);
+err_remove:
+ remove_ftrace_instance(tracer);
+err_free:
+ free(tracer);
+ return NULL;
+}
+
+static void __destroy_ftracer(struct test_ftracer *tracer)
+{
+ size_t i;
+
+ final_wait_for_events(tracer, TEST_TIMEOUT_SEC);
+ stop_trace_thread(tracer);
+ remove_ftrace_instance(tracer);
+ if (tracer->destructor)
+ tracer->destructor(tracer);
+ for (i = 0; i < tracer->saved_lines_size; i++)
+ free(tracer->saved_lines[i]);
+ pthread_cond_destroy(&tracer->met_all_expected);
+ pthread_mutex_destroy(&tracer->met_all_expected_lock);
+ free(tracer);
+}
+
+void destroy_ftracer(struct test_ftracer *tracer)
+{
+ pthread_mutex_lock(&ftracers_lock);
+ if (tracer == ftracers) {
+ ftracers = tracer->next;
+ } else {
+ struct test_ftracer *f = ftracers;
+
+ while (f->next != tracer) {
+ if (!f->next)
+ test_error("tracers list corruption or double free %p", tracer);
+ f = f->next;
+ }
+ f->next = tracer->next;
+ }
+ tracer->next = NULL;
+ pthread_mutex_unlock(&ftracers_lock);
+ __destroy_ftracer(tracer);
+}
+
+static void destroy_all_ftracers(void)
+{
+ struct test_ftracer *f;
+
+ pthread_mutex_lock(&ftracers_lock);
+ f = ftracers;
+ ftracers = NULL;
+ pthread_mutex_unlock(&ftracers_lock);
+
+ while (f) {
+ struct test_ftracer *n = f->next;
+
+ f->next = NULL;
+ __destroy_ftracer(f);
+ f = n;
+ }
+}
+
+static void test_unset_tracing(void)
+{
+ destroy_all_ftracers();
+ unmount_ftrace();
+}
+
+int test_setup_tracing(void)
+{
+ /*
+ * Just a basic protection - this should be called only once from
+ * lib/kconfig. Not thread safe, which is fine as it's early, before
+ * threads are created.
+ */
+ static int already_set;
+ int err;
+
+ if (already_set)
+ return -1;
+
+ /* Needs net-namespace cookies for filters */
+ if (ns_cookie1 == ns_cookie2) {
+ test_print("net-namespace cookies: %" PRIu64 " == %" PRIu64 ", can't set up tracing",
+ ns_cookie1, ns_cookie2);
+ return -1;
+ }
+
+ already_set = 1;
+
+ test_add_destructor(test_unset_tracing);
+
+ err = mount_ftrace();
+ if (err) {
+ test_print("failed to mount_ftrace(): %d", err);
+ return err;
+ }
+
+ return setup_aolib_ftracer();
+}
+
+static int get_ns_cookie(int nsfd, uint64_t *out)
+{
+ int old_ns = switch_save_ns(nsfd);
+ socklen_t size = sizeof(*out);
+ int sk;
+
+ sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0) {
+ test_print("socket(): %m");
+ return -errno;
+ }
+
+ if (getsockopt(sk, SOL_SOCKET, SO_NETNS_COOKIE, out, &size)) {
+ test_print("getsockopt(SO_NETNS_COOKIE): %m");
+ close(sk);
+ return -errno;
+ }
+
+ close(sk);
+ switch_close_ns(old_ns);
+ return 0;
+}
+
+void test_init_ftrace(int nsfd1, int nsfd2)
+{
+ get_ns_cookie(nsfd1, &ns_cookie1);
+ get_ns_cookie(nsfd2, &ns_cookie2);
+ /* Populate kernel config state */
+ kernel_config_has(KCONFIG_FTRACE);
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
index f279ffc3843b..9f1c175846f8 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
@@ -6,7 +6,7 @@
#include "aolib.h"
struct kconfig_t {
- int _errno; /* the returned error if not supported */
+ int _error; /* negative errno if not supported */
int (*check_kconfig)(int *error);
};
@@ -62,7 +62,7 @@ static int has_tcp_ao(int *err)
memcpy(&tmp.addr, &addr, sizeof(addr));
*err = 0;
if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) {
- *err = errno;
+ *err = -errno;
if (errno != ENOPROTOOPT)
ret = -errno;
}
@@ -87,7 +87,7 @@ static int has_tcp_md5(int *err)
*/
*err = 0;
if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) {
- *err = errno;
+ *err = -errno;
if (errno != ENOPROTOOPT && errno == ENOMEM) {
test_print("setsockopt(TCP_MD5SIG_EXT): %m");
ret = -errno;
@@ -116,13 +116,21 @@ static int has_vrfs(int *err)
return ret;
}
+static int has_ftrace(int *err)
+{
+ *err = test_setup_tracing();
+ return 0;
+}
+
+#define KCONFIG_UNKNOWN 1
static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER;
static struct kconfig_t kconfig[__KCONFIG_LAST__] = {
- { -1, has_net_ns },
- { -1, has_veth },
- { -1, has_tcp_ao },
- { -1, has_tcp_md5 },
- { -1, has_vrfs },
+ { KCONFIG_UNKNOWN, has_net_ns },
+ { KCONFIG_UNKNOWN, has_veth },
+ { KCONFIG_UNKNOWN, has_tcp_ao },
+ { KCONFIG_UNKNOWN, has_tcp_md5 },
+ { KCONFIG_UNKNOWN, has_vrfs },
+ { KCONFIG_UNKNOWN, has_ftrace },
};
const char *tests_skip_reason[__KCONFIG_LAST__] = {
@@ -131,6 +139,7 @@ const char *tests_skip_reason[__KCONFIG_LAST__] = {
"Tests require TCP-AO support (CONFIG_TCP_AO)",
"setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)",
"VRFs are not supported (CONFIG_NET_VRF)",
+ "Ftrace points are not supported (CONFIG_TRACEPOINTS)",
};
bool kernel_config_has(enum test_needs_kconfig k)
@@ -138,11 +147,11 @@ bool kernel_config_has(enum test_needs_kconfig k)
bool ret;
pthread_mutex_lock(&kconfig_lock);
- if (kconfig[k]._errno == -1) {
- if (kconfig[k].check_kconfig(&kconfig[k]._errno))
+ if (kconfig[k]._error == KCONFIG_UNKNOWN) {
+ if (kconfig[k].check_kconfig(&kconfig[k]._error))
test_error("Failed to initialize kconfig %u", k);
}
- ret = kconfig[k]._errno == 0;
+ ret = kconfig[k]._error == 0;
pthread_mutex_unlock(&kconfig_lock);
return ret;
}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
index e408b9243b2c..49aec2922a31 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/setup.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -9,7 +9,7 @@
* Can't be included in the header: it defines static variables which
* will be unique to every object. Let's include it only once here.
*/
-#include "../../../kselftest.h"
+#include "kselftest.h"
/* Prevent overriding of one thread's output by another */
static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -111,7 +111,7 @@ static void sig_int(int signo)
int open_netns(void)
{
- const char *netns_path = "/proc/self/ns/net";
+ const char *netns_path = "/proc/thread-self/ns/net";
int fd;
fd = open(netns_path, O_RDONLY);
@@ -142,6 +142,13 @@ int switch_save_ns(int new_ns)
return ret;
}
+void switch_close_ns(int fd)
+{
+ if (setns(fd, CLONE_NEWNET))
+ test_error("setns()");
+ close(fd);
+}
+
static int nsfd_outside = -1;
static int nsfd_parent = -1;
static int nsfd_child = -1;
@@ -243,9 +250,9 @@ void __test_init(unsigned int ntests, int family, unsigned int prefix,
test_print("rand seed %u", (unsigned int)seed);
srand(seed);
-
ksft_print_header();
init_namespaces();
+ test_init_ftrace(nsfd_parent, nsfd_child);
if (add_veth(veth_name, nsfd_parent, nsfd_child))
test_error("Failed to add veth");
@@ -296,7 +303,7 @@ static bool is_optmem_namespaced(void)
int old_ns = switch_save_ns(nsfd_child);
optmem_ns = !access(optmem_file, F_OK);
- switch_ns(old_ns);
+ switch_close_ns(old_ns);
}
return !!optmem_ns;
}
@@ -317,7 +324,7 @@ size_t test_get_optmem(void)
test_error("can't read from %s", optmem_file);
fclose(foptmem);
if (!is_optmem_namespaced())
- switch_ns(old_ns);
+ switch_close_ns(old_ns);
return ret;
}
@@ -339,7 +346,7 @@ static void __test_set_optmem(size_t new, size_t *old)
test_error("can't write %zu to %s", new, optmem_file);
fclose(foptmem);
if (!is_optmem_namespaced())
- switch_ns(old_ns);
+ switch_close_ns(old_ns);
}
static void test_revert_optmem(void)
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c
index 15aeb0963058..ef8e9031d47a 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/sock.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -34,10 +34,8 @@ int __test_listen_socket(int backlog, void *addr, size_t addr_sz)
return sk;
}
-int test_wait_fd(int sk, time_t sec, bool write)
+static int __test_wait_fd(int sk, struct timeval *tv, bool write)
{
- struct timeval tv = { .tv_sec = sec };
- struct timeval *ptv = NULL;
fd_set fds, efds;
int ret;
socklen_t slen = sizeof(ret);
@@ -47,14 +45,11 @@ int test_wait_fd(int sk, time_t sec, bool write)
FD_ZERO(&efds);
FD_SET(sk, &efds);
- if (sec)
- ptv = &tv;
-
errno = 0;
if (write)
- ret = select(sk + 1, NULL, &fds, &efds, ptv);
+ ret = select(sk + 1, NULL, &fds, &efds, tv);
else
- ret = select(sk + 1, &fds, NULL, &efds, ptv);
+ ret = select(sk + 1, &fds, NULL, &efds, tv);
if (ret < 0)
return -errno;
if (ret == 0) {
@@ -69,8 +64,54 @@ int test_wait_fd(int sk, time_t sec, bool write)
return 0;
}
+int test_wait_fd(int sk, time_t sec, bool write)
+{
+ struct timeval tv = { .tv_sec = sec, };
+
+ return __test_wait_fd(sk, sec ? &tv : NULL, write);
+}
+
+static bool __skpair_poll_should_stop(int sk, struct tcp_counters *c,
+ test_cnt condition)
+{
+ struct tcp_counters c2;
+ test_cnt diff;
+
+ if (test_get_tcp_counters(sk, &c2))
+ test_error("test_get_tcp_counters()");
+
+ diff = test_cmp_counters(c, &c2);
+ test_tcp_counters_free(&c2);
+ return (diff & condition) == condition;
+}
+
+/* How often wake up and check netns counters & paired (*err) */
+#define POLL_USEC 150
+static int __test_skpair_poll(int sk, bool write, uint64_t timeout,
+ struct tcp_counters *c, test_cnt cond,
+ volatile int *err)
+{
+ uint64_t t;
+
+ for (t = 0; t <= timeout * 1000000; t += POLL_USEC) {
+ struct timeval tv = { .tv_usec = POLL_USEC, };
+ int ret;
+
+ ret = __test_wait_fd(sk, &tv, write);
+ if (ret != -ETIMEDOUT)
+ return ret;
+ if (c && cond && __skpair_poll_should_stop(sk, c, cond))
+ break;
+ if (err && *err)
+ return *err;
+ }
+ if (err)
+ *err = -ETIMEDOUT;
+ return -ETIMEDOUT;
+}
+
int __test_connect_socket(int sk, const char *device,
- void *addr, size_t addr_sz, time_t timeout)
+ void *addr, size_t addr_sz, bool async)
{
long flags;
int err;
@@ -82,15 +123,6 @@ int __test_connect_socket(int sk, const char *device,
test_error("setsockopt(SO_BINDTODEVICE, %s)", device);
}
- if (!timeout) {
- err = connect(sk, addr, addr_sz);
- if (err) {
- err = -errno;
- goto out;
- }
- return 0;
- }
-
flags = fcntl(sk, F_GETFL);
if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
test_error("fcntl()");
@@ -100,9 +132,9 @@ int __test_connect_socket(int sk, const char *device,
err = -errno;
goto out;
}
- if (timeout < 0)
+ if (async)
return sk;
- err = test_wait_fd(sk, timeout, 1);
+ err = test_wait_fd(sk, TEST_TIMEOUT_SEC, 1);
if (err)
goto out;
}
@@ -113,6 +145,45 @@ out:
return err;
}
+int test_skpair_wait_poll(int sk, bool write,
+ test_cnt cond, volatile int *err)
+{
+ struct tcp_counters c;
+ int ret;
+
+ *err = 0;
+ if (test_get_tcp_counters(sk, &c))
+ test_error("test_get_tcp_counters()");
+ synchronize_threads(); /* 1: init skpair & read nscounters */
+
+ ret = __test_skpair_poll(sk, write, TEST_TIMEOUT_SEC, &c, cond, err);
+ test_tcp_counters_free(&c);
+ return ret;
+}
+
+int _test_skpair_connect_poll(int sk, const char *device,
+ void *addr, size_t addr_sz,
+ test_cnt condition, volatile int *err)
+{
+ struct tcp_counters c;
+ int ret;
+
+ *err = 0;
+ if (test_get_tcp_counters(sk, &c))
+ test_error("test_get_tcp_counters()");
+ synchronize_threads(); /* 1: init skpair & read nscounters */
+ ret = __test_connect_socket(sk, device, addr, addr_sz, true);
+ if (ret < 0) {
+ test_tcp_counters_free(&c);
+ return (*err = ret);
+ }
+ ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, &c, condition, err);
+ if (ret < 0)
+ close(sk);
+ test_tcp_counters_free(&c);
+ return ret;
+}
+
int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix,
int vrf, const char *password)
{
@@ -333,12 +404,12 @@ do { \
return 0;
}
-int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
+int test_get_tcp_counters(int sk, struct tcp_counters *out)
{
struct tcp_ao_getsockopt *key_dump;
socklen_t key_dump_sz = sizeof(*key_dump);
struct tcp_ao_info_opt info = {};
- bool c1, c2, c3, c4, c5;
+ bool c1, c2, c3, c4, c5, c6, c7, c8;
struct netstat *ns;
int err, nr_keys;
@@ -346,25 +417,30 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
/* per-netns */
ns = netstat_read();
- out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1);
- out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2);
- out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3);
- out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4);
- out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5);
+ out->ao.netns_ao_good = netstat_get(ns, "TCPAOGood", &c1);
+ out->ao.netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2);
+ out->ao.netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3);
+ out->ao.netns_ao_required = netstat_get(ns, "TCPAORequired", &c4);
+ out->ao.netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5);
+ out->netns_md5_notfound = netstat_get(ns, "TCPMD5NotFound", &c6);
+ out->netns_md5_unexpected = netstat_get(ns, "TCPMD5Unexpected", &c7);
+ out->netns_md5_failure = netstat_get(ns, "TCPMD5Failure", &c8);
netstat_free(ns);
- if (c1 || c2 || c3 || c4 || c5)
+ if (c1 || c2 || c3 || c4 || c5 || c6 || c7 || c8)
return -EOPNOTSUPP;
err = test_get_ao_info(sk, &info);
+ if (err == -ENOENT)
+ return 0;
if (err)
return err;
/* per-socket */
- out->ao_info_pkt_good = info.pkt_good;
- out->ao_info_pkt_bad = info.pkt_bad;
- out->ao_info_pkt_key_not_found = info.pkt_key_not_found;
- out->ao_info_pkt_ao_required = info.pkt_ao_required;
- out->ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp;
+ out->ao.ao_info_pkt_good = info.pkt_good;
+ out->ao.ao_info_pkt_bad = info.pkt_bad;
+ out->ao.ao_info_pkt_key_not_found = info.pkt_key_not_found;
+ out->ao.ao_info_pkt_ao_required = info.pkt_ao_required;
+ out->ao.ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp;
/* per-key */
nr_keys = test_get_ao_keys_nr(sk);
@@ -372,14 +448,13 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
return nr_keys;
if (nr_keys == 0)
test_error("test_get_ao_keys_nr() == 0");
- out->nr_keys = (size_t)nr_keys;
+ out->ao.nr_keys = (size_t)nr_keys;
key_dump = calloc(nr_keys, key_dump_sz);
if (!key_dump)
return -errno;
key_dump[0].nkeys = nr_keys;
key_dump[0].get_all = 1;
- key_dump[0].get_all = 1;
err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS,
key_dump, &key_dump_sz);
if (err) {
@@ -387,72 +462,84 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
return -errno;
}
- out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0]));
- if (!out->key_cnts) {
+ out->ao.key_cnts = calloc(nr_keys, sizeof(out->ao.key_cnts[0]));
+ if (!out->ao.key_cnts) {
free(key_dump);
return -errno;
}
while (nr_keys--) {
- out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid;
- out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid;
- out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good;
- out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad;
+ out->ao.key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid;
+ out->ao.key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid;
+ out->ao.key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good;
+ out->ao.key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad;
}
free(key_dump);
return 0;
}
-int __test_tcp_ao_counters_cmp(const char *tst_name,
- struct tcp_ao_counters *before,
- struct tcp_ao_counters *after,
- test_cnt expected)
+test_cnt test_cmp_counters(struct tcp_counters *before,
+ struct tcp_counters *after)
{
-#define __cmp_ao(cnt, expecting_inc) \
+#define __cmp(cnt, e_cnt) \
+do { \
+ if (before->cnt > after->cnt) \
+ test_error("counter " __stringify(cnt) " decreased"); \
+ if (before->cnt != after->cnt) \
+ ret |= e_cnt; \
+} while (0)
+
+ test_cnt ret = 0;
+ size_t i;
+
+ if (before->ao.nr_keys != after->ao.nr_keys)
+ test_error("the number of keys has changed");
+
+ _for_each_counter(__cmp);
+
+ i = before->ao.nr_keys;
+ while (i--) {
+ __cmp(ao.key_cnts[i].pkt_good, TEST_CNT_KEY_GOOD);
+ __cmp(ao.key_cnts[i].pkt_bad, TEST_CNT_KEY_BAD);
+ }
+#undef __cmp
+ return ret;
+}
+
+int test_assert_counters_sk(const char *tst_name,
+ struct tcp_counters *before,
+ struct tcp_counters *after,
+ test_cnt expected)
+{
+#define __cmp_ao(cnt, e_cnt) \
do { \
if (before->cnt > after->cnt) { \
test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \
- tst_name ?: "", before->cnt, after->cnt); \
+ tst_name ?: "", before->cnt, after->cnt); \
return -1; \
} \
- if ((before->cnt != after->cnt) != (expecting_inc)) { \
+ if ((before->cnt != after->cnt) != !!(expected & e_cnt)) { \
test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \
- tst_name ?: "", (expecting_inc) ? "" : "not ", \
+ tst_name ?: "", (expected & e_cnt) ? "" : "not ", \
before->cnt, after->cnt); \
return -1; \
} \
-} while(0)
+} while (0)
errno = 0;
- /* per-netns */
- __cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD));
- __cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD));
- __cmp_ao(netns_ao_key_not_found,
- !!(expected & TEST_CNT_NS_KEY_NOT_FOUND));
- __cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED));
- __cmp_ao(netns_ao_dropped_icmp,
- !!(expected & TEST_CNT_NS_DROPPED_ICMP));
- /* per-socket */
- __cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD));
- __cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD));
- __cmp_ao(ao_info_pkt_key_not_found,
- !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND));
- __cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED));
- __cmp_ao(ao_info_pkt_dropped_icmp,
- !!(expected & TEST_CNT_SOCK_DROPPED_ICMP));
+ _for_each_counter(__cmp_ao);
return 0;
#undef __cmp_ao
}
-int test_tcp_ao_key_counters_cmp(const char *tst_name,
- struct tcp_ao_counters *before,
- struct tcp_ao_counters *after,
- test_cnt expected,
- int sndid, int rcvid)
+int test_assert_counters_key(const char *tst_name,
+ struct tcp_ao_counters *before,
+ struct tcp_ao_counters *after,
+ test_cnt expected, int sndid, int rcvid)
{
size_t i;
-#define __cmp_ao(i, cnt, expecting_inc) \
+#define __cmp_ao(i, cnt, e_cnt) \
do { \
if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) { \
test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \
@@ -462,16 +549,16 @@ do { \
before->key_cnts[i].rcvid); \
return -1; \
} \
- if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) { \
+ if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != !!(expected & e_cnt)) { \
test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \
- tst_name ?: "", (expecting_inc) ? "" : "not ",\
+ tst_name ?: "", (expected & e_cnt) ? "" : "not ",\
before->key_cnts[i].cnt, \
after->key_cnts[i].cnt, \
before->key_cnts[i].sndid, \
before->key_cnts[i].rcvid); \
return -1; \
} \
-} while(0)
+} while (0)
if (before->nr_keys != after->nr_keys) {
test_fail("%s: Keys changed on the socket %zu != %zu",
@@ -486,20 +573,22 @@ do { \
continue;
if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid)
continue;
- __cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD));
- __cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD));
+ __cmp_ao(i, pkt_good, TEST_CNT_KEY_GOOD);
+ __cmp_ao(i, pkt_bad, TEST_CNT_KEY_BAD);
}
return 0;
#undef __cmp_ao
}
-void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts)
+void test_tcp_counters_free(struct tcp_counters *cnts)
{
- free(cnts->key_cnts);
+ free(cnts->ao.key_cnts);
}
#define TEST_BUF_SIZE 4096
-ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
+static ssize_t _test_server_run(int sk, ssize_t quota, struct tcp_counters *c,
+ test_cnt cond, volatile int *err,
+ time_t timeout_sec)
{
ssize_t total = 0;
@@ -508,7 +597,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
ssize_t bytes, sent;
int ret;
- ret = test_wait_fd(sk, timeout_sec, 0);
+ ret = __test_skpair_poll(sk, 0, timeout_sec, c, cond, err);
if (ret)
return ret;
@@ -519,7 +608,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
if (bytes == 0)
break;
- ret = test_wait_fd(sk, timeout_sec, 1);
+ ret = __test_skpair_poll(sk, 1, timeout_sec, c, cond, err);
if (ret)
return ret;
@@ -534,13 +623,41 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
return total;
}
-ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
- const size_t msg_len, time_t timeout_sec)
+ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
+{
+ return _test_server_run(sk, quota, NULL, 0, NULL,
+ timeout_sec ?: TEST_TIMEOUT_SEC);
+}
+
+int test_skpair_server(int sk, ssize_t quota, test_cnt cond, volatile int *err)
+{
+ struct tcp_counters c;
+ ssize_t ret;
+
+ *err = 0;
+ if (test_get_tcp_counters(sk, &c))
+ test_error("test_get_tcp_counters()");
+ synchronize_threads(); /* 1: init skpair & read nscounters */
+
+ ret = _test_server_run(sk, quota, &c, cond, err, TEST_TIMEOUT_SEC);
+ test_tcp_counters_free(&c);
+ return ret;
+}
+
+static ssize_t test_client_loop(int sk, size_t buf_sz, const size_t msg_len,
+ struct tcp_counters *c, test_cnt cond,
+ volatile int *err)
{
char msg[msg_len];
int nodelay = 1;
+ char *buf;
size_t i;
+ buf = alloca(buf_sz);
+ if (!buf)
+ return -ENOMEM;
+ randomize_buffer(buf, buf_sz);
+
if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay)))
test_error("setsockopt(TCP_NODELAY)");
@@ -548,7 +665,7 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
size_t sent, bytes = min(msg_len, buf_sz - i);
int ret;
- ret = test_wait_fd(sk, timeout_sec, 1);
+ ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, c, cond, err);
if (ret)
return ret;
@@ -562,7 +679,8 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
do {
ssize_t got;
- ret = test_wait_fd(sk, timeout_sec, 0);
+ ret = __test_skpair_poll(sk, 0, TEST_TIMEOUT_SEC,
+ c, cond, err);
if (ret)
return ret;
@@ -581,15 +699,31 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
return i;
}
-int test_client_verify(int sk, const size_t msg_len, const size_t nr,
- time_t timeout_sec)
+int test_client_verify(int sk, const size_t msg_len, const size_t nr)
{
size_t buf_sz = msg_len * nr;
- char *buf = alloca(buf_sz);
ssize_t ret;
- randomize_buffer(buf, buf_sz);
- ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec);
+ ret = test_client_loop(sk, buf_sz, msg_len, NULL, 0, NULL);
+ if (ret < 0)
+ return (int)ret;
+ return ret != buf_sz ? -1 : 0;
+}
+
+int test_skpair_client(int sk, const size_t msg_len, const size_t nr,
+ test_cnt cond, volatile int *err)
+{
+ struct tcp_counters c;
+ size_t buf_sz = msg_len * nr;
+ ssize_t ret;
+
+ *err = 0;
+ if (test_get_tcp_counters(sk, &c))
+ test_error("test_get_tcp_counters()");
+ synchronize_threads(); /* 1: init skpair & read nscounters */
+
+ ret = test_client_loop(sk, buf_sz, msg_len, &c, cond, err);
+ test_tcp_counters_free(&c);
if (ret < 0)
return (int)ret;
return ret != buf_sz ? -1 : 0;
diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c
index 372daca525f5..bdf5522c9213 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/utils.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c
@@ -21,6 +21,32 @@ void randomize_buffer(void *buf, size_t buflen)
}
}
+__printf(3, 4) int test_echo(const char *fname, bool append,
+ const char *fmt, ...)
+{
+ size_t len, written;
+ va_list vargs;
+ char *msg;
+ FILE *f;
+
+ f = fopen(fname, append ? "a" : "w");
+ if (!f)
+ return -errno;
+
+ va_start(vargs, fmt);
+ msg = test_snprintf(fmt, vargs);
+ va_end(vargs);
+ if (!msg) {
+ fclose(f);
+ return -1;
+ }
+ len = strlen(msg);
+ written = fwrite(msg, 1, len, f);
+ fclose(f);
+ free(msg);
+ return written == len ? 0 : -1;
+}
+
const struct sockaddr_in6 addr_any6 = {
.sin6_family = AF_INET6,
};
diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c
index 8fdc808df325..9a059b6c4523 100644
--- a/tools/testing/selftests/net/tcp_ao/restore.c
+++ b/tools/testing/selftests/net/tcp_ao/restore.c
@@ -16,11 +16,11 @@ const size_t quota = nr_packets * msg_len;
static void try_server_run(const char *tst_name, unsigned int port,
fault_t inj, test_cnt cnt_expected)
{
+ test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
const char *cnt_name = "TCPAOGood";
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
uint64_t before_cnt, after_cnt;
- int sk, lsk;
- time_t timeout;
+ int sk, lsk, dummy;
ssize_t bytes;
if (fault(TIMEOUT))
@@ -48,11 +48,10 @@ static void try_server_run(const char *tst_name, unsigned int port,
}
before_cnt = netstat_get_one(cnt_name, NULL);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- bytes = test_server_run(sk, quota, timeout);
+ bytes = test_skpair_server(sk, quota, poll_cnt, &dummy);
if (fault(TIMEOUT)) {
if (bytes > 0)
test_fail("%s: server served: %zd", tst_name, bytes);
@@ -64,17 +63,18 @@ static void try_server_run(const char *tst_name, unsigned int port,
else
test_ok("%s: server alive", tst_name);
}
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ synchronize_threads(); /* 3: counters checks */
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
after_cnt = netstat_get_one(cnt_name, NULL);
- test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %zu <= %zu",
- tst_name, cnt_name, after_cnt, before_cnt);
+ test_fail("%s(server): %s counter did not increase: %" PRIu64 " <= %" PRIu64,
+ tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %zu => %zu",
+ test_ok("%s(server): counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
@@ -82,7 +82,7 @@ static void try_server_run(const char *tst_name, unsigned int port,
* Before close() as that will send FIN and move the peer in TCP_CLOSE
* and that will prevent reading AO counters from the peer's socket.
*/
- synchronize_threads(); /* 3: verified => closed */
+ synchronize_threads(); /* 4: verified => closed */
out:
close(sk);
}
@@ -91,16 +91,16 @@ static void *server_fn(void *arg)
{
unsigned int port = test_server_port;
- try_server_run("TCP-AO migrate to another socket", port++,
+ try_server_run("TCP-AO migrate to another socket (server)", port++,
0, TEST_CNT_GOOD);
- try_server_run("TCP-AO with wrong send ISN", port++,
+ try_server_run("TCP-AO with wrong send ISN (server)", port++,
FAULT_TIMEOUT, TEST_CNT_BAD);
- try_server_run("TCP-AO with wrong receive ISN", port++,
+ try_server_run("TCP-AO with wrong receive ISN (server)", port++,
FAULT_TIMEOUT, TEST_CNT_BAD);
- try_server_run("TCP-AO with wrong send SEQ ext number", port++,
+ try_server_run("TCP-AO with wrong send SEQ ext number (server)", port++,
FAULT_TIMEOUT, TEST_CNT_BAD);
- try_server_run("TCP-AO with wrong receive SEQ ext number", port++,
- FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD);
+ try_server_run("TCP-AO with wrong receive SEQ ext number (server)",
+ port++, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD);
synchronize_threads(); /* don't race to exit: client exits */
return NULL;
@@ -124,7 +124,7 @@ static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr,
test_error("failed to connect()");
synchronize_threads(); /* 2: accepted => send data */
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, msg_len, nr_packets))
test_fail("pre-migrate verify failed");
test_enable_repair(sk);
@@ -138,11 +138,11 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
struct tcp_ao_repair *ao_img,
fault_t inj, test_cnt cnt_expected)
{
+ test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
const char *cnt_name = "TCPAOGood";
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
uint64_t before_cnt, after_cnt;
- time_t timeout;
- int sk;
+ int sk, dummy;
if (fault(TIMEOUT))
cnt_name = "TCPAOBad";
@@ -158,38 +158,39 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
test_error("setsockopt(TCP_AO_ADD_KEY)");
test_ao_restore(sk, ao_img);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
test_disable_repair(sk);
test_sock_state_free(img);
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- if (test_client_verify(sk, msg_len, nr_packets, timeout)) {
+ if (test_skpair_client(sk, msg_len, nr_packets, poll_cnt, &dummy)) {
if (fault(TIMEOUT))
test_ok("%s: post-migrate connection is broken", tst_name);
else
test_fail("%s: post-migrate connection is working", tst_name);
} else {
if (fault(TIMEOUT))
- test_fail("%s: post-migrate connection still working", tst_name);
+ test_fail("%s: post-migrate connection is working", tst_name);
else
test_ok("%s: post-migrate connection is alive", tst_name);
}
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+
+ synchronize_threads(); /* 3: counters checks */
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
after_cnt = netstat_get_one(cnt_name, NULL);
- test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %zu <= %zu",
+ test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %zu => %zu",
+ test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
- synchronize_threads(); /* 3: verified => closed */
+ synchronize_threads(); /* 4: verified => closed */
close(sk);
}
@@ -201,29 +202,43 @@ static void *client_fn(void *arg)
sockaddr_af saddr;
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
- test_sk_restore("TCP-AO migrate to another socket", port++,
+ test_sk_restore("TCP-AO migrate to another socket (client)", port++,
&saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
ao_img.snt_isn += 1;
- test_sk_restore("TCP-AO with wrong send ISN", port++,
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
+ port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+ test_sk_restore("TCP-AO with wrong send ISN (client)", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
ao_img.rcv_isn += 1;
- test_sk_restore("TCP-AO with wrong receive ISN", port++,
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
+ port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+ test_sk_restore("TCP-AO with wrong receive ISN (client)", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
ao_img.snd_sne += 1;
- test_sk_restore("TCP-AO with wrong send SEQ ext number", port++,
- &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+ /* not expecting server => client mismatches as only snd sne is broken */
+ test_sk_restore("TCP-AO with wrong send SEQ ext number (client)",
+ port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
TEST_CNT_NS_BAD | TEST_CNT_GOOD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
ao_img.rcv_sne += 1;
- test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++,
- &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+ /* not expecting client => server mismatches as only rcv sne is broken */
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
+ port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+ test_sk_restore("TCP-AO with wrong receive SEQ ext number (client)",
+ port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
TEST_CNT_NS_GOOD | TEST_CNT_BAD);
return NULL;
@@ -231,6 +246,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(20, server_fn, client_fn);
+ test_init(21, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
index a2fe88d35ac0..883cddf377cf 100644
--- a/tools/testing/selftests/net/tcp_ao/rst.c
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -84,15 +84,15 @@ static void close_forced(int sk)
static void test_server_active_rst(unsigned int port)
{
- struct tcp_ao_counters cnt1, cnt2;
+ struct tcp_counters cnt1, cnt2;
ssize_t bytes;
int sk, lsk;
lsk = test_listen_socket(this_ip_addr, port, backlog);
if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
test_error("setsockopt(TCP_AO_ADD_KEY)");
- if (test_get_tcp_ao_counters(lsk, &cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(lsk, &cnt1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 1: MKT added */
if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
@@ -103,8 +103,8 @@ static void test_server_active_rst(unsigned int port)
test_error("accept()");
synchronize_threads(); /* 2: connection accept()ed, another queued */
- if (test_get_tcp_ao_counters(lsk, &cnt2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(lsk, &cnt2))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 3: close listen socket */
close(lsk);
@@ -120,7 +120,7 @@ static void test_server_active_rst(unsigned int port)
synchronize_threads(); /* 5: closed active sk */
synchronize_threads(); /* 6: counters checks */
- if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD))
+ if (test_assert_counters("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD))
test_fail("MKT counters (server) have not only good packets");
else
test_ok("MKT counters are good on server");
@@ -128,7 +128,7 @@ static void test_server_active_rst(unsigned int port)
static void test_server_passive_rst(unsigned int port)
{
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
int sk, lsk;
ssize_t bytes;
@@ -147,8 +147,8 @@ static void test_server_passive_rst(unsigned int port)
synchronize_threads(); /* 2: accepted => send data */
close(lsk);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
if (bytes != quota) {
@@ -160,12 +160,12 @@ static void test_server_passive_rst(unsigned int port)
synchronize_threads(); /* 3: checkpoint the client */
synchronize_threads(); /* 4: close the server, creating twsk */
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
close(sk);
synchronize_threads(); /* 5: restore the socket, send more data */
- test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD);
+ test_assert_counters("passive RST server", &cnt1, &cnt2, TEST_CNT_GOOD);
synchronize_threads(); /* 6: server exits */
}
@@ -271,8 +271,7 @@ static void test_client_active_rst(unsigned int port)
synchronize_threads(); /* 1: MKT added */
for (i = 0; i < last; i++) {
- err = _test_connect_socket(sk[i], this_ip_dest, port,
- (i == 0) ? TEST_TIMEOUT_SEC : -1);
+ err = _test_connect_socket(sk[i], this_ip_dest, port, i != 0);
if (err < 0)
test_error("failed to connect()");
}
@@ -283,12 +282,12 @@ static void test_client_active_rst(unsigned int port)
test_error("test_wait_fds(): %d", err);
/* async connect() with third sk to get into request_sock_queue */
- err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
+ err = _test_connect_socket(sk[last], this_ip_dest, port, 1);
if (err < 0)
test_error("failed to connect()");
synchronize_threads(); /* 3: close listen socket */
- if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk[0], packet_sz, quota / packet_sz))
test_fail("Failed to send data on connected socket");
else
test_ok("Verified established tcp connection");
@@ -323,7 +322,7 @@ static void test_client_active_rst(unsigned int port)
static void test_client_passive_rst(unsigned int port)
{
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
struct tcp_ao_repair ao_img;
struct tcp_sock_state img;
sockaddr_af saddr;
@@ -341,7 +340,7 @@ static void test_client_passive_rst(unsigned int port)
test_error("failed to connect()");
synchronize_threads(); /* 2: accepted => send data */
- if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, packet_sz, quota / packet_sz))
test_fail("Failed to send data on connected socket");
else
test_ok("Verified established tcp connection");
@@ -397,8 +396,8 @@ static void test_client_passive_rst(unsigned int port)
test_error("setsockopt(TCP_AO_ADD_KEY)");
test_ao_restore(sk, &ao_img);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
test_disable_repair(sk);
test_sock_state_free(&img);
@@ -417,7 +416,7 @@ static void test_client_passive_rst(unsigned int port)
* IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0,
* options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0
*/
- err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC);
+ err = test_client_verify(sk, packet_sz, quota / packet_sz);
/* Make sure that the connection was reset, not timeouted */
if (err && err == -ECONNRESET)
test_ok("client sock was passively reset post-seq-adjust");
@@ -426,12 +425,12 @@ static void test_client_passive_rst(unsigned int port)
else
test_fail("client sock is yet connected post-seq-adjust");
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 6: server exits */
close(sk);
- test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD);
+ test_assert_counters("client passive RST", &cnt1, &cnt2, TEST_CNT_GOOD);
}
static void *client_fn(void *arg)
@@ -455,6 +454,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(14, server_fn, client_fn);
+ test_init(15, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c
index a5698b0a3718..2c73bea698a6 100644
--- a/tools/testing/selftests/net/tcp_ao/self-connect.c
+++ b/tools/testing/selftests/net/tcp_ao/self-connect.c
@@ -16,6 +16,9 @@ static void __setup_lo_intf(const char *lo_intf,
if (link_set_up(lo_intf))
test_error("Failed to bring %s up", lo_intf);
+
+ if (ip_route_add(lo_intf, TEST_FAMILY, local_addr, local_addr))
+ test_error("Failed to add a local route %s", lo_intf);
}
static void setup_lo_intf(const char *lo_intf)
@@ -30,7 +33,7 @@ static void setup_lo_intf(const char *lo_intf)
static void tcp_self_connect(const char *tst, unsigned int port,
bool different_keyids, bool check_restore)
{
- struct tcp_ao_counters before_ao, after_ao;
+ struct tcp_counters before, after;
uint64_t before_aogood, after_aogood;
struct netstat *ns_before, *ns_after;
const size_t nr_packets = 20;
@@ -60,17 +63,17 @@ static void tcp_self_connect(const char *tst, unsigned int port,
ns_before = netstat_read();
before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
- if (test_get_tcp_ao_counters(sk, &before_ao))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &before))
+ test_error("test_get_tcp_counters()");
if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr,
- sizeof(addr), TEST_TIMEOUT_SEC) < 0) {
+ sizeof(addr), 0) < 0) {
ns_after = netstat_read();
netstat_print_diff(ns_before, ns_after);
test_error("failed to connect()");
}
- if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, 100, nr_packets)) {
test_fail("%s: tcp connection verify failed", tst);
close(sk);
return;
@@ -78,8 +81,8 @@ static void tcp_self_connect(const char *tst, unsigned int port,
ns_after = netstat_read();
after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
- if (test_get_tcp_ao_counters(sk, &after_ao))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &after))
+ test_error("test_get_tcp_counters()");
if (!check_restore) {
/* to debug: netstat_print_diff(ns_before, ns_after); */
netstat_free(ns_before);
@@ -87,13 +90,13 @@ static void tcp_self_connect(const char *tst, unsigned int port,
netstat_free(ns_after);
if (after_aogood <= before_aogood) {
- test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+ test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64,
tst, after_aogood, before_aogood);
close(sk);
return;
}
- if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) {
+ if (test_assert_counters(tst, &before, &after, TEST_CNT_GOOD)) {
close(sk);
return;
}
@@ -136,7 +139,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
test_ao_restore(sk, &ao_img);
test_disable_repair(sk);
test_sock_state_free(&img);
- if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, 100, nr_packets)) {
test_fail("%s: tcp connection verify failed", tst);
close(sk);
return;
@@ -148,7 +151,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
netstat_free(ns_after);
close(sk);
if (after_aogood <= before_aogood) {
- test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+ test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64,
tst, after_aogood, before_aogood);
return;
}
@@ -163,17 +166,26 @@ static void *client_fn(void *arg)
setup_lo_intf("lo");
tcp_self_connect("self-connect(same keyids)", port++, false, false);
+
+ /* expecting rnext to change based on the first segment RNext != Current */
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr,
+ port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1);
tcp_self_connect("self-connect(different keyids)", port++, true, false);
tcp_self_connect("self-connect(restore)", port, false, true);
- port += 2;
+ port += 2; /* restore test restores over different port */
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr,
+ port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1);
+ /* intentionally on restore they are added to the socket in different order */
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr,
+ port + 1, port + 1, 0, -1, -1, -1, -1, -1, 5, 7, -1);
tcp_self_connect("self-connect(restore, different keyids)", port, true, true);
- port += 2;
+ port += 2; /* restore test restores over different port */
return NULL;
}
int main(int argc, char *argv[])
{
- test_init(4, client_fn, NULL);
+ test_init(5, client_fn, NULL);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
index ad4e77d6823e..6478da6a71c3 100644
--- a/tools/testing/selftests/net/tcp_ao/seq-ext.c
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Check that after SEQ number wrap-around:
* 1. SEQ-extension has upper bytes set
- * 2. TCP conneciton is alive and no TCPAOBad segments
+ * 2. TCP connection is alive and no TCPAOBad segments
* In order to test (2), the test doesn't just adjust seq number for a queue
* on a connected socket, but migrates it to another sk+port number, so
* that there won't be any delayed packets that will fail to verify
@@ -40,7 +40,7 @@ static void test_adjust_seqs(struct tcp_sock_state *img,
static int test_sk_restore(struct tcp_sock_state *img,
struct tcp_ao_repair *ao_img, sockaddr_af *saddr,
const union tcp_addr daddr, unsigned int dport,
- struct tcp_ao_counters *cnt)
+ struct tcp_counters *cnt)
{
int sk;
@@ -54,8 +54,8 @@ static int test_sk_restore(struct tcp_sock_state *img,
test_error("setsockopt(TCP_AO_ADD_KEY)");
test_ao_restore(sk, ao_img);
- if (test_get_tcp_ao_counters(sk, cnt))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, cnt))
+ test_error("test_get_tcp_counters()");
test_disable_repair(sk);
test_sock_state_free(img);
@@ -65,7 +65,7 @@ static int test_sk_restore(struct tcp_sock_state *img,
static void *server_fn(void *arg)
{
uint64_t before_good, after_good, after_bad;
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
struct tcp_sock_state img;
struct tcp_ao_repair ao_img;
sockaddr_af saddr;
@@ -114,9 +114,17 @@ static void *server_fn(void *arg)
test_adjust_seqs(&img, &ao_img, true);
synchronize_threads(); /* 4: dump finished */
sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
- client_new_port, &ao1);
-
- synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+ client_new_port, &cnt1);
+
+ trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr,
+ this_ip_dest, test_server_port + 1, client_new_port, 1);
+ trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_dest,
+ this_ip_addr, client_new_port, test_server_port + 1, 1);
+ trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_addr,
+ this_ip_dest, test_server_port + 1, client_new_port, 1);
+ trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_dest,
+ this_ip_addr, client_new_port, test_server_port + 1, 1);
+ synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */
bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
if (bytes != quota) {
if (bytes > 0)
@@ -127,22 +135,23 @@ static void *server_fn(void *arg)
test_ok("server alive");
}
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ synchronize_threads(); /* 6: verify counters after SEQ-number rollover */
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
after_good = netstat_get_one("TCPAOGood", NULL);
- test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+ test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD);
if (after_good <= before_good) {
- test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+ test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
after_good, before_good);
} else {
- test_ok("TCPAOGood counter increased %zu => %zu",
+ test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64,
before_good, after_good);
}
after_bad = netstat_get_one("TCPAOBad", NULL);
if (after_bad)
- test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+ test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad);
else
test_ok("TCPAOBad counter didn't increase");
test_enable_repair(sk);
@@ -164,7 +173,7 @@ out:
static void *client_fn(void *arg)
{
uint64_t before_good, after_good, after_bad;
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
struct tcp_sock_state img;
struct tcp_ao_repair ao_img;
sockaddr_af saddr;
@@ -182,7 +191,7 @@ static void *client_fn(void *arg)
test_error("failed to connect()");
synchronize_threads(); /* 2: accepted => send data */
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, msg_len, nr_packets)) {
test_fail("pre-migrate verify failed");
return NULL;
}
@@ -204,30 +213,31 @@ static void *client_fn(void *arg)
test_adjust_seqs(&img, &ao_img, false);
synchronize_threads(); /* 4: dump finished */
sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
- test_server_port + 1, &ao1);
+ test_server_port + 1, &cnt1);
- synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */
+ if (test_client_verify(sk, msg_len, nr_packets))
test_fail("post-migrate verify failed");
else
test_ok("post-migrate connection alive");
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ synchronize_threads(); /* 5: verify counters after SEQ-number rollover */
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
after_good = netstat_get_one("TCPAOGood", NULL);
- test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+ test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD);
if (after_good <= before_good) {
- test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+ test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
after_good, before_good);
} else {
- test_ok("TCPAOGood counter increased %zu => %zu",
+ test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64,
before_good, after_good);
}
after_bad = netstat_get_one("TCPAOBad", NULL);
if (after_bad)
- test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+ test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad);
else
test_ok("TCPAOBad counter didn't increase");
@@ -240,6 +250,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(7, server_fn, client_fn);
+ test_init(8, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
index 517930f9721b..0abb9807d742 100644
--- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
+++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
@@ -6,6 +6,8 @@
static union tcp_addr tcp_md5_client;
+#define FILTER_TEST_NKEYS 16
+
static int test_port = 7788;
static void make_listen(int sk)
{
@@ -30,8 +32,8 @@ static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info,
#define __cmp_ao(member) \
do { \
if (info->member != tmp.member) { \
- test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu", \
- tst, (size_t)info->member, (size_t)tmp.member); \
+ test_fail("%s: getsockopt(): " __stringify(member) " %" PRIu64 " != %" PRIu64, \
+ tst, (uint64_t)info->member, (uint64_t)tmp.member); \
return; \
} \
} while(0)
@@ -813,23 +815,197 @@ static void duplicate_tests(void)
setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: SendID differs");
}
+static void fetch_all_keys(int sk, struct tcp_ao_getsockopt *keys)
+{
+ socklen_t optlen = sizeof(struct tcp_ao_getsockopt);
+
+ memset(keys, 0, sizeof(struct tcp_ao_getsockopt) * FILTER_TEST_NKEYS);
+ keys[0].get_all = 1;
+ keys[0].nkeys = FILTER_TEST_NKEYS;
+ if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &keys[0], &optlen))
+ test_error("getsockopt");
+}
+
+static int prepare_test_keys(struct tcp_ao_getsockopt *keys)
+{
+ const char *test_password = "Test password number ";
+ struct tcp_ao_add test_ao[FILTER_TEST_NKEYS];
+ char test_password_scratch[64] = {};
+ u8 rcvid = 100, sndid = 100;
+ int sk;
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ for (int i = 0; i < FILTER_TEST_NKEYS; i++) {
+ snprintf(test_password_scratch, 64, "%s %d", test_password, i);
+ test_prepare_key(&test_ao[i], DEFAULT_TEST_ALGO, this_ip_dest,
+ false, false, DEFAULT_TEST_PREFIX, 0, sndid++,
+ rcvid++, 0, 0, strlen(test_password_scratch),
+ test_password_scratch);
+ }
+ test_ao[0].set_current = 1;
+ test_ao[1].set_rnext = 1;
+ /* One key with a different addr and overlapping sndid, rcvid */
+ tcp_addr_to_sockaddr_in(&test_ao[2].addr, &this_ip_addr, 0);
+ test_ao[2].sndid = 100;
+ test_ao[2].rcvid = 100;
+
+ /* Add keys in a random order */
+ for (int i = 0; i < FILTER_TEST_NKEYS; i++) {
+ int randidx = rand() % (FILTER_TEST_NKEYS - i);
+
+ if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY,
+ &test_ao[randidx], sizeof(struct tcp_ao_add)))
+ test_error("setsockopt()");
+ memcpy(&test_ao[randidx], &test_ao[FILTER_TEST_NKEYS - 1 - i],
+ sizeof(struct tcp_ao_add));
+ }
+
+ fetch_all_keys(sk, keys);
+
+ return sk;
+}
+
+/* Assumes passwords are unique */
+static int compare_mkts(struct tcp_ao_getsockopt *expected, int nexpected,
+ struct tcp_ao_getsockopt *actual, int nactual)
+{
+ int matches = 0;
+
+ for (int i = 0; i < nexpected; i++) {
+ for (int j = 0; j < nactual; j++) {
+ if (memcmp(expected[i].key, actual[j].key,
+ TCP_AO_MAXKEYLEN) == 0)
+ matches++;
+ }
+ }
+ return nexpected - matches;
+}
+
+static void filter_keys_checked(int sk, struct tcp_ao_getsockopt *filter,
+ struct tcp_ao_getsockopt *expected,
+ unsigned int nexpected, const char *tst)
+{
+ struct tcp_ao_getsockopt filtered_keys[FILTER_TEST_NKEYS] = {};
+ struct tcp_ao_getsockopt all_keys[FILTER_TEST_NKEYS] = {};
+ socklen_t len = sizeof(struct tcp_ao_getsockopt);
+
+ fetch_all_keys(sk, all_keys);
+ memcpy(&filtered_keys[0], filter, sizeof(struct tcp_ao_getsockopt));
+ filtered_keys[0].nkeys = FILTER_TEST_NKEYS;
+ if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, filtered_keys, &len))
+ test_error("getsockopt");
+ if (filtered_keys[0].nkeys != nexpected) {
+ test_fail("wrong nr of keys, expected %u got %u", nexpected,
+ filtered_keys[0].nkeys);
+ goto out_close;
+ }
+ if (compare_mkts(expected, nexpected, filtered_keys,
+ filtered_keys[0].nkeys)) {
+ test_fail("got wrong keys back");
+ goto out_close;
+ }
+ test_ok("filter keys: %s", tst);
+
+out_close:
+ close(sk);
+ memset(filter, 0, sizeof(struct tcp_ao_getsockopt));
+}
+
+static void filter_tests(void)
+{
+ struct tcp_ao_getsockopt original_keys[FILTER_TEST_NKEYS];
+ struct tcp_ao_getsockopt expected_keys[FILTER_TEST_NKEYS];
+ struct tcp_ao_getsockopt filter = {};
+ int sk, f, nmatches;
+ socklen_t len;
+
+ f = 2;
+ sk = prepare_test_keys(original_keys);
+ filter.rcvid = original_keys[f].rcvid;
+ filter.sndid = original_keys[f].sndid;
+ memcpy(&filter.addr, &original_keys[f].addr,
+ sizeof(original_keys[f].addr));
+ filter.prefix = original_keys[f].prefix;
+ filter_keys_checked(sk, &filter, &original_keys[f], 1,
+ "by sndid, rcvid, address");
+
+ f = -1;
+ sk = prepare_test_keys(original_keys);
+ for (int i = 0; i < original_keys[0].nkeys; i++) {
+ if (original_keys[i].is_current) {
+ f = i;
+ break;
+ }
+ }
+ if (f < 0)
+ test_error("No current key after adding one");
+ filter.is_current = 1;
+ filter_keys_checked(sk, &filter, &original_keys[f], 1, "by is_current");
+
+ f = -1;
+ sk = prepare_test_keys(original_keys);
+ for (int i = 0; i < original_keys[0].nkeys; i++) {
+ if (original_keys[i].is_rnext) {
+ f = i;
+ break;
+ }
+ }
+ if (f < 0)
+ test_error("No rnext key after adding one");
+ filter.is_rnext = 1;
+ filter_keys_checked(sk, &filter, &original_keys[f], 1, "by is_rnext");
+
+ f = -1;
+ nmatches = 0;
+ sk = prepare_test_keys(original_keys);
+ for (int i = 0; i < original_keys[0].nkeys; i++) {
+ if (original_keys[i].sndid == 100) {
+ f = i;
+ memcpy(&expected_keys[nmatches], &original_keys[i],
+ sizeof(struct tcp_ao_getsockopt));
+ nmatches++;
+ }
+ }
+ if (f < 0)
+ test_error("No key for sndid 100");
+ if (nmatches != 2)
+ test_error("Should have 2 keys with sndid 100");
+ filter.rcvid = original_keys[f].rcvid;
+ filter.sndid = original_keys[f].sndid;
+ filter.addr.ss_family = test_family;
+ filter_keys_checked(sk, &filter, expected_keys, nmatches,
+ "by sndid, rcvid");
+
+ sk = prepare_test_keys(original_keys);
+ filter.get_all = 1;
+ filter.nkeys = FILTER_TEST_NKEYS / 2;
+ len = sizeof(struct tcp_ao_getsockopt);
+ if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &filter, &len))
+ test_error("getsockopt");
+ if (filter.nkeys == FILTER_TEST_NKEYS)
+ test_ok("filter keys: correct nkeys when in.nkeys < matches");
+ else
+ test_fail("filter keys: wrong nkeys, expected %u got %u",
+ FILTER_TEST_NKEYS, filter.nkeys);
+}
+
static void *client_fn(void *arg)
{
if (inet_pton(TEST_FAMILY, __TEST_CLIENT_IP(2), &tcp_md5_client) != 1)
test_error("Can't convert ip address");
extend_tests();
einval_tests();
+ filter_tests();
duplicate_tests();
- /*
- * TODO: check getsockopt(TCP_AO_GET_KEYS) with different filters
- * returning proper nr & keys;
- */
return NULL;
}
int main(int argc, char *argv[])
{
- test_init(120, client_fn, NULL);
+ test_init(126, client_fn, NULL);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
index 6b59a652159f..a1467b64390a 100644
--- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -6,6 +6,7 @@
#define fault(type) (inj == FAULT_ ## type)
static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver.";
static const char *ao_password = DEFAULT_TEST_PASSWORD;
+static volatile int sk_pair;
static union tcp_addr client2;
static union tcp_addr client3;
@@ -41,10 +42,10 @@ static void try_accept(const char *tst_name, unsigned int port,
const char *cnt_name, test_cnt cnt_expected,
int needs_tcp_md5, fault_t inj)
{
- struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ struct tcp_counters cnt1, cnt2;
uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
- int lsk, err, sk = 0;
- time_t timeout;
+ test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
+ int lsk, err, sk = -1;
if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
return;
@@ -63,21 +64,25 @@ static void try_accept(const char *tst_name, unsigned int port,
if (cnt_name)
before_cnt = netstat_get_one(cnt_name, NULL);
- if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (ao_addr && test_get_tcp_counters(lsk, &cnt1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- err = test_wait_fd(lsk, timeout, 0);
+ err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair);
+ synchronize_threads(); /* connect()/accept() timeouts */
if (err == -ETIMEDOUT) {
+ sk_pair = err;
if (!fault(TIMEOUT))
- test_fail("timed out for accept()");
+ test_fail("%s: timed out for accept()", tst_name);
+ } else if (err == -EKEYREJECTED) {
+ if (!fault(KEYREJECT))
+ test_fail("%s: key was rejected", tst_name);
} else if (err < 0) {
- test_error("test_wait_fd()");
+ test_error("test_skpair_wait_poll()");
} else {
if (fault(TIMEOUT))
- test_fail("ready to accept");
+ test_fail("%s: ready to accept", tst_name);
sk = accept(lsk, NULL, NULL);
if (sk < 0) {
@@ -88,8 +93,8 @@ static void try_accept(const char *tst_name, unsigned int port,
}
}
- if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2))
- test_error("test_get_tcp_ao_counters()");
+ if (ao_addr && test_get_tcp_counters(lsk, &cnt2))
+ test_error("test_get_tcp_counters()");
close(lsk);
if (!cnt_name) {
@@ -100,18 +105,18 @@ static void try_accept(const char *tst_name, unsigned int port,
after_cnt = netstat_get_one(cnt_name, NULL);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %zu <= %zu",
+ test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %zu => %zu",
+ test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
if (ao_addr)
- test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
out:
synchronize_threads(); /* test_kill_sk() */
- if (sk > 0)
+ if (sk >= 0)
test_kill_sk(sk);
}
@@ -152,78 +157,82 @@ static void *server_fn(void *arg)
server_add_routes();
- try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0,
+ try_accept("[server] AO server (INADDR_ANY): AO client", port++, NULL, 0,
&addr_any, 0, 0, 100, 100, 0, "TCPAOGood",
TEST_CNT_GOOD, 0, 0);
- try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0,
+ try_accept("[server] AO server (INADDR_ANY): MD5 client", port++, NULL, 0,
&addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected",
- 0, 1, FAULT_TIMEOUT);
- try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0,
+ TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+ try_accept("[server] AO server (INADDR_ANY): no sign client", port++, NULL, 0,
&addr_any, 0, 0, 100, 100, 0, "TCPAORequired",
TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
- try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
+ try_accept("[server] AO server (AO_REQUIRED): AO client", port++, NULL, 0,
&this_ip_dest, TEST_PREFIX, true,
100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0);
- try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
+ try_accept("[server] AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
&this_ip_dest, TEST_PREFIX, true,
100, 100, 0, "TCPAORequired",
TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
- try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0,
+ try_accept("[server] MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0,
NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
- 0, 1, FAULT_TIMEOUT);
- try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+ TEST_CNT_NS_KEY_NOT_FOUND, 1, FAULT_TIMEOUT);
+ try_accept("[server] MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0);
- try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any,
+ try_accept("[server] MD5 server (INADDR_ANY): no sign client", port++, &addr_any,
0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound",
- 0, 1, FAULT_TIMEOUT);
+ TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT);
- try_accept("no sign server: AO client", port++, NULL, 0,
+ try_accept("[server] no sign server: AO client", port++, NULL, 0,
NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
- TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT);
- try_accept("no sign server: MD5 client", port++, NULL, 0,
+ TEST_CNT_NS_KEY_NOT_FOUND, 0, FAULT_TIMEOUT);
+ try_accept("[server] no sign server: MD5 client", port++, NULL, 0,
NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected",
- 0, 1, FAULT_TIMEOUT);
- try_accept("no sign server: no sign client", port++, NULL, 0,
+ TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+ try_accept("[server] no sign server: no sign client", port++, NULL, 0,
NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0);
- try_accept("AO+MD5 server: AO client (matching)", port++,
+ try_accept("[server] AO+MD5 server: AO client (matching)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0);
- try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++,
+ try_accept("[server] AO+MD5 server: AO client (misconfig, matching MD5)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++,
+ try_accept("[server] AO+MD5 server: AO client (misconfig, non-matching)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: MD5 client (matching)", port++,
+ try_accept("[server] AO+MD5 server: MD5 client (matching)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, NULL, 0, 1, 0);
- try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++,
+ try_accept("[server] AO+MD5 server: MD5 client (misconfig, matching AO)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++,
+ 100, 100, 0, "TCPMD5Unexpected",
+ TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+ try_accept("[server] AO+MD5 server: MD5 client (misconfig, non-matching)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: no sign client (unmatched)", port++,
+ 100, 100, 0, "TCPMD5Unexpected",
+ TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+ try_accept("[server] AO+MD5 server: no sign client (unmatched)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "CurrEstab", 0, 1, 0);
- try_accept("AO+MD5 server: no sign client (misconfig, matching AO)",
+ try_accept("[server] AO+MD5 server: no sign client (misconfig, matching AO)",
port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "TCPAORequired",
TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)",
+ try_accept("[server] AO+MD5 server: no sign client (misconfig, matching MD5)",
port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT);
+ 100, 100, 0, "TCPMD5NotFound",
+ TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
+ /* Key rejected by the other side, failing short through skpair */
+ try_accept("[server] AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
+ 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT);
+ try_accept("[server] AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
+ 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT);
server_add_fail_tests(&port);
@@ -258,7 +267,6 @@ static void try_connect(const char *tst_name, unsigned int port,
uint8_t sndid, uint8_t rcvid, uint8_t vrf,
fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr)
{
- time_t timeout;
int sk, ret;
if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
@@ -280,10 +288,10 @@ static void try_connect(const char *tst_name, unsigned int port,
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
-
+ ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair);
+ synchronize_threads(); /* connect()/accept() timeouts */
if (ret < 0) {
+ sk_pair = ret;
if (fault(KEYREJECT) && ret == -EKEYREJECTED)
test_ok("%s: connect() was prevented", tst_name);
else if (ret == -ETIMEDOUT && fault(TIMEOUT))
@@ -303,8 +311,7 @@ static void try_connect(const char *tst_name, unsigned int port,
out:
synchronize_threads(); /* test_kill_sk() */
- /* _test_connect_socket() cleans up on failure */
- if (ret > 0)
+ if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */
test_kill_sk(sk);
}
@@ -435,7 +442,6 @@ static void try_to_add(const char *tst_name, unsigned int port,
int ao_vrf, uint8_t sndid, uint8_t rcvid,
int needs_tcp_md5, fault_t inj)
{
- time_t timeout;
int sk, ret;
if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
@@ -448,10 +454,10 @@ static void try_to_add(const char *tst_name, unsigned int port,
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+ ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair);
- if (ret <= 0) {
+ synchronize_threads(); /* connect()/accept() timeouts */
+ if (ret < 0) {
test_error("%s: connect() returned %d", tst_name, ret);
goto out;
}
@@ -487,8 +493,7 @@ static void try_to_add(const char *tst_name, unsigned int port,
out:
synchronize_threads(); /* test_kill_sk() */
- /* _test_connect_socket() cleans up on failure */
- if (ret > 0)
+ if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */
test_kill_sk(sk);
}
@@ -671,24 +676,38 @@ static void *client_fn(void *arg)
try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("no sign server: AO client", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("no sign server: MD5 client", port++, &addr_any, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
try_connect("no sign server: no sign client", port++, NULL, 0,
@@ -696,25 +715,37 @@ static void *client_fn(void *arg)
try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, 0, 1, &client2);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("AO+MD5 server: AO client (misconfig, matching MD5)",
port++, NULL, 0, &addr_any, 0, 100, 100, 0,
FAULT_TIMEOUT, 1, &this_ip_addr);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, client3, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("AO+MD5 server: AO client (misconfig, non-matching)",
port++, NULL, 0, &addr_any, 0, 100, 100, 0,
FAULT_TIMEOUT, 1, &client3);
try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0,
NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client2,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)",
port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
1, &client2);
+ trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client3,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)",
port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
1, &client3);
try_connect("AO+MD5 server: no sign client (unmatched)",
port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3);
+ trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO+MD5 server: no sign client (misconfig, matching AO)",
port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
1, &client2);
+ trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)",
port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
1, &this_ip_addr);
@@ -736,6 +767,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(72, server_fn, client_fn);
+ test_init(73, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
index c1cb0c75156a..4b3f9b5e50fe 100644
--- a/tools/testing/selftests/net/tcp_fastopen_backup_key.c
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -26,7 +26,7 @@
#include <fcntl.h>
#include <time.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef TCP_FASTOPEN_KEY
#define TCP_FASTOPEN_KEY 33
diff --git a/tools/testing/selftests/net/tcp_port_share.c b/tools/testing/selftests/net/tcp_port_share.c
new file mode 100644
index 000000000000..6146b62610df
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_port_share.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2025 Cloudflare, Inc.
+
+/* Tests for TCP port sharing (bind bucket reuse). */
+
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdlib.h>
+
+#include "kselftest_harness.h"
+
+#define DST_PORT 30000
+#define SRC_PORT 40000
+
+struct sockaddr_inet {
+ union {
+ struct sockaddr_storage ss;
+ struct sockaddr_in6 v6;
+ struct sockaddr_in v4;
+ struct sockaddr sa;
+ };
+ socklen_t len;
+ char str[INET6_ADDRSTRLEN + __builtin_strlen("[]:65535") + 1];
+};
+
+const int one = 1;
+
+static int disconnect(int fd)
+{
+ return connect(fd, &(struct sockaddr){ AF_UNSPEC }, sizeof(struct sockaddr));
+}
+
+static int getsockname_port(int fd)
+{
+ struct sockaddr_inet addr = {};
+ int err;
+
+ addr.len = sizeof(addr);
+ err = getsockname(fd, &addr.sa, &addr.len);
+ if (err)
+ return -1;
+
+ switch (addr.sa.sa_family) {
+ case AF_INET:
+ return ntohs(addr.v4.sin_port);
+ case AF_INET6:
+ return ntohs(addr.v6.sin6_port);
+ default:
+ errno = EAFNOSUPPORT;
+ return -1;
+ }
+}
+
+static void make_inet_addr(int af, const char *ip, __u16 port,
+ struct sockaddr_inet *addr)
+{
+ const char *fmt = "";
+
+ memset(addr, 0, sizeof(*addr));
+
+ switch (af) {
+ case AF_INET:
+ addr->len = sizeof(addr->v4);
+ addr->v4.sin_family = af;
+ addr->v4.sin_port = htons(port);
+ inet_pton(af, ip, &addr->v4.sin_addr);
+ fmt = "%s:%hu";
+ break;
+ case AF_INET6:
+ addr->len = sizeof(addr->v6);
+ addr->v6.sin6_family = af;
+ addr->v6.sin6_port = htons(port);
+ inet_pton(af, ip, &addr->v6.sin6_addr);
+ fmt = "[%s]:%hu";
+ break;
+ }
+
+ snprintf(addr->str, sizeof(addr->str), fmt, ip, port);
+}
+
+FIXTURE(tcp_port_share) {};
+
+FIXTURE_VARIANT(tcp_port_share) {
+ int domain;
+ /* IP to listen on and connect to */
+ const char *dst_ip;
+ /* Primary IP to connect from */
+ const char *src1_ip;
+ /* Secondary IP to connect from */
+ const char *src2_ip;
+ /* IP to bind to in order to block the source port */
+ const char *bind_ip;
+};
+
+FIXTURE_VARIANT_ADD(tcp_port_share, ipv4) {
+ .domain = AF_INET,
+ .dst_ip = "127.0.0.1",
+ .src1_ip = "127.1.1.1",
+ .src2_ip = "127.2.2.2",
+ .bind_ip = "127.3.3.3",
+};
+
+FIXTURE_VARIANT_ADD(tcp_port_share, ipv6) {
+ .domain = AF_INET6,
+ .dst_ip = "::1",
+ .src1_ip = "2001:db8::1",
+ .src2_ip = "2001:db8::2",
+ .bind_ip = "2001:db8::3",
+};
+
+FIXTURE_SETUP(tcp_port_share)
+{
+ int sc;
+
+ ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+ ASSERT_EQ(system("ip link set dev lo up"), 0);
+ ASSERT_EQ(system("ip addr add dev lo 2001:db8::1/32 nodad"), 0);
+ ASSERT_EQ(system("ip addr add dev lo 2001:db8::2/32 nodad"), 0);
+ ASSERT_EQ(system("ip addr add dev lo 2001:db8::3/32 nodad"), 0);
+
+ sc = open("/proc/sys/net/ipv4/ip_local_port_range", O_WRONLY);
+ ASSERT_GE(sc, 0);
+ ASSERT_GT(dprintf(sc, "%hu %hu\n", SRC_PORT, SRC_PORT), 0);
+ ASSERT_EQ(close(sc), 0);
+}
+
+FIXTURE_TEARDOWN(tcp_port_share) {}
+
+/* Verify that an ephemeral port becomes available again after the socket
+ * bound to it and blocking it from reuse is closed.
+ */
+TEST_F(tcp_port_share, can_reuse_port_after_bind_and_close)
+{
+ const typeof(variant) v = variant;
+ struct sockaddr_inet addr;
+ int c1, c2, ln, pb;
+
+ /* Listen on <dst_ip>:<DST_PORT> */
+ ln = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(ln, 0) TH_LOG("socket(): %m");
+ ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+ ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+ ASSERT_EQ(listen(ln, 2), 0);
+
+ /* Connect from <src1_ip>:<SRC_PORT> */
+ c1 = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(c1, 0) TH_LOG("socket(): %m");
+ ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->src1_ip, 0, &addr);
+ ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+ ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str);
+ ASSERT_EQ(getsockname_port(c1), SRC_PORT);
+
+ /* Bind to <bind_ip>:<SRC_PORT>. Block the port from reuse. */
+ pb = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(pb, 0) TH_LOG("socket(): %m");
+ ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr);
+ ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ /* Try to connect from <src2_ip>:<SRC_PORT>. Expect failure. */
+ c2 = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(c2, 0) TH_LOG("socket");
+ ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->src2_ip, 0, &addr);
+ ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+ ASSERT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str);
+ ASSERT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m");
+
+ /* Unbind from <bind_ip>:<SRC_PORT>. Unblock the port for reuse. */
+ ASSERT_EQ(close(pb), 0);
+
+ /* Connect again from <src2_ip>:<SRC_PORT> */
+ EXPECT_EQ(connect(c2, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str);
+ EXPECT_EQ(getsockname_port(c2), SRC_PORT);
+
+ ASSERT_EQ(close(c2), 0);
+ ASSERT_EQ(close(c1), 0);
+ ASSERT_EQ(close(ln), 0);
+}
+
+/* Verify that a socket auto-bound during connect() blocks port reuse after
+ * disconnect (connect(AF_UNSPEC)) followed by an explicit port bind().
+ */
+TEST_F(tcp_port_share, port_block_after_disconnect)
+{
+ const typeof(variant) v = variant;
+ struct sockaddr_inet addr;
+ int c1, c2, ln, pb;
+
+ /* Listen on <dst_ip>:<DST_PORT> */
+ ln = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(ln, 0) TH_LOG("socket(): %m");
+ ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+ ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+ ASSERT_EQ(listen(ln, 2), 0);
+
+ /* Connect from <src1_ip>:<SRC_PORT> */
+ c1 = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(c1, 0) TH_LOG("socket(): %m");
+ ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->src1_ip, 0, &addr);
+ ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+ ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str);
+ ASSERT_EQ(getsockname_port(c1), SRC_PORT);
+
+ /* Disconnect the socket and bind it to <bind_ip>:<SRC_PORT> to block the port */
+ ASSERT_EQ(disconnect(c1), 0) TH_LOG("disconnect: %m");
+ ASSERT_EQ(setsockopt(c1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr);
+ ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ /* Trigger port-addr bucket state update with another bind() and close() */
+ pb = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(pb, 0) TH_LOG("socket(): %m");
+ ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr);
+ ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ ASSERT_EQ(close(pb), 0);
+
+ /* Connect from <src2_ip>:<SRC_PORT>. Expect failure. */
+ c2 = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(c2, 0) TH_LOG("socket: %m");
+ ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->src2_ip, 0, &addr);
+ ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+ EXPECT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str);
+ EXPECT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m");
+
+ ASSERT_EQ(close(c2), 0);
+ ASSERT_EQ(close(c1), 0);
+ ASSERT_EQ(close(ln), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh
deleted file mode 100755
index 3119b80e711f..000000000000
--- a/tools/testing/selftests/net/test_blackhole_dev.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Runs blackhole-dev test using blackhole-dev kernel module
-
-if /sbin/modprobe -q test_blackhole_dev ; then
- /sbin/modprobe -q -r test_blackhole_dev;
- echo "test_blackhole_dev: ok";
-else
- echo "test_blackhole_dev: [FAIL]";
- exit 1;
-fi
diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh
index 1b3f89e2b86e..2a7224fe74f2 100755
--- a/tools/testing/selftests/net/test_bridge_backup_port.sh
+++ b/tools/testing/selftests/net/test_bridge_backup_port.sh
@@ -315,6 +315,29 @@ backup_port()
tc_check_packets $sw1 "dev vx0 egress" 101 1
log_test $? 0 "No forwarding out of vx0"
+ # Check that packets are forwarded out of vx0 when swp1 is
+ # administratively down and out of swp1 when it is administratively up
+ # again.
+ run_cmd "ip -n $sw1 link set dev swp1 down"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+ log_test $? 0 "swp1 administratively down"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 3
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "Forwarding out of vx0"
+
+ run_cmd "ip -n $sw1 link set dev swp1 up"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
+ log_test $? 0 "swp1 administratively up"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 4
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "No forwarding out of vx0"
+
# Remove vx0 as the backup port of swp1 and check that packets are no
# longer forwarded out of vx0 when swp1 does not have a carrier.
run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port"
@@ -322,9 +345,9 @@ backup_port()
log_test $? 1 "vx0 not configured as backup port of swp1"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets $sw1 "dev swp1 egress" 101 4
+ tc_check_packets $sw1 "dev swp1 egress" 101 5
log_test $? 0 "Forwarding out of swp1"
- tc_check_packets $sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
log_test $? 0 "No forwarding out of vx0"
run_cmd "ip -n $sw1 link set dev swp1 carrier off"
@@ -332,9 +355,9 @@ backup_port()
log_test $? 0 "swp1 carrier off"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets $sw1 "dev swp1 egress" 101 4
+ tc_check_packets $sw1 "dev swp1 egress" 101 5
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets $sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
log_test $? 0 "No forwarding out of vx0"
}
diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
index 02b986c9c247..9067197c9055 100755
--- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -51,7 +51,9 @@ ret=0
# All tests in this script. Can be overridden with -t option.
TESTS="
neigh_suppress_arp
+ neigh_suppress_uc_arp
neigh_suppress_ns
+ neigh_suppress_uc_ns
neigh_vlan_suppress_arp
neigh_vlan_suppress_ns
"
@@ -388,6 +390,52 @@ neigh_suppress_arp()
neigh_suppress_arp_common $vid $sip $tip
}
+neigh_suppress_uc_arp_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local tip=$1; shift
+ local tmac
+
+ echo
+ echo "Unicast ARP, per-port ARP suppression - VLAN $vid"
+ echo "-----------------------------------------------"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+ run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid"
+
+ run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass"
+
+ run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass"
+
+ run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q"
+ tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+ log_test $? 0 "Unicast ARP, suppression on, h1 filter"
+ tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+ log_test $? 0 "Unicast ARP, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_arp()
+{
+ local vid=10
+ local sip=192.0.2.1
+ local tip=192.0.2.2
+
+ neigh_suppress_uc_arp_common $vid $sip $tip
+
+ vid=20
+ sip=192.0.2.17
+ tip=192.0.2.18
+ neigh_suppress_uc_arp_common $vid $sip $tip
+}
+
neigh_suppress_ns_common()
{
local vid=$1; shift
@@ -494,6 +542,78 @@ neigh_suppress_ns()
neigh_suppress_ns_common $vid $saddr $daddr $maddr
}
+icmpv6_header_get()
+{
+ local csum=$1; shift
+ local tip=$1; shift
+ local type
+ local p
+
+ # Type 135 (Neighbor Solicitation), hex format
+ type="87"
+ p=$(:
+ )"$type:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"$csum:"$( : ICMPv6.checksum
+ )"00:00:00:00:"$( : Reserved
+ )"$tip:"$( : Target Address
+ )
+ echo $p
+}
+
+neigh_suppress_uc_ns_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local full_dip=$1; shift
+ local csum=$1; shift
+ local tmac
+
+ echo
+ echo "Unicast NS, per-port NS suppression - VLAN $vid"
+ echo "---------------------------------------------"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+ run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid"
+
+ run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass"
+
+ run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass"
+
+ run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q"
+ tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+ log_test $? 0 "Unicast NS, suppression on, h1 filter"
+ tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+ log_test $? 0 "Unicast NS, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_ns()
+{
+ local vid=10
+ local saddr=2001:db8:1::1
+ local daddr=2001:db8:1::2
+ local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02
+ local csum="ef:79"
+
+ neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+
+ vid=20
+ saddr=2001:db8:2::1
+ daddr=2001:db8:2::2
+ full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02
+ csum="ef:76"
+
+ neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+}
+
neigh_vlan_suppress_arp()
{
local vid1=10
@@ -825,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then
exit $ksft_skip
fi
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
bridge link help 2>&1 | grep -q "neigh_vlan_suppress"
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support"
diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh
new file mode 100755
index 000000000000..7c594bf6ead0
--- /dev/null
+++ b/tools/testing/selftests/net/test_neigh.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+ extern_valid_ipv4
+ extern_valid_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ stderr=
+ fi
+
+ out=$(eval "$cmd" "$stderr")
+ rc=$?
+ if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+################################################################################
+# Setup
+
+setup()
+{
+ set -e
+
+ setup_ns ns1 ns2
+
+ ip -n "$ns1" link add veth0 type veth peer name veth1 netns "$ns2"
+ ip -n "$ns1" link set dev veth0 up
+ ip -n "$ns2" link set dev veth1 up
+
+ ip -n "$ns1" address add 192.0.2.1/24 dev veth0
+ ip -n "$ns1" address add 2001:db8:1::1/64 dev veth0 nodad
+ ip -n "$ns2" address add 192.0.2.2/24 dev veth1
+ ip -n "$ns2" address add 2001:db8:1::2/64 dev veth1 nodad
+
+ ip netns exec "$ns1" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec "$ns2" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+
+ sleep 5
+
+ set +e
+}
+
+exit_cleanup_all()
+{
+ cleanup_all_ns
+ exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+extern_valid_common()
+{
+ local af_str=$1; shift
+ local ip_addr=$1; shift
+ local tbl_name=$1; shift
+ local subnet=$1; shift
+ local mac
+
+ mac=$(ip -n "$ns2" -j link show dev veth1 | jq -r '.[]["address"]')
+
+ RET=0
+
+ # Check that simple addition works.
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "No \"extern_valid\" flag after addition"
+
+ log_test "$af_str \"extern_valid\" flag: Add entry"
+
+ RET=0
+
+ # Check that an entry cannot be added with "extern_valid" flag and an
+ # invalid state.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr nud none dev veth0 extern_valid"
+ check_fail $? "Managed to add an entry with \"extern_valid\" flag and an invalid state"
+
+ log_test "$af_str \"extern_valid\" flag: Add with an invalid state"
+
+ RET=0
+
+ # Check that entry cannot be added with both "extern_valid" flag and
+ # "use" / "managed" flag.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ check_fail $? "Managed to add an entry with \"extern_valid\" flag and \"use\" flag"
+
+ log_test "$af_str \"extern_valid\" flag: Add with \"use\" flag"
+
+ RET=0
+
+ # Check that "extern_valid" flag can be toggled using replace.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Did not manage to set \"extern_valid\" flag with replace"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_fail $? "Did not manage to clear \"extern_valid\" flag with replace"
+
+ log_test "$af_str \"extern_valid\" flag: Replace entry"
+
+ RET=0
+
+ # Check that an existing "extern_valid" entry can be marked as
+ # "managed".
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid managed"
+ check_err $? "Did not manage to add \"managed\" flag to an existing \"extern_valid\" entry"
+
+ log_test "$af_str \"extern_valid\" flag: Replace entry with \"managed\" flag"
+
+ RET=0
+
+ # Check that entry cannot be replaced with "extern_valid" flag and an
+ # invalid state.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr nud none dev veth0 extern_valid"
+ check_fail $? "Managed to replace an entry with \"extern_valid\" flag and an invalid state"
+
+ log_test "$af_str \"extern_valid\" flag: Replace with an invalid state"
+
+ RET=0
+
+ # Check that an "extern_valid" entry is flushed when the interface is
+ # put administratively down.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 link set dev veth0 down"
+ run_cmd "ip -n $ns1 link set dev veth0 up"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+ check_fail $? "\"extern_valid\" entry not flushed upon interface down"
+
+ log_test "$af_str \"extern_valid\" flag: Interface down"
+
+ RET=0
+
+ # Check that an "extern_valid" entry is not flushed when the interface
+ # loses its carrier.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns2 link set dev veth1 down"
+ run_cmd "ip -n $ns2 link set dev veth1 up"
+ run_cmd "sleep 2"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+ check_err $? "\"extern_valid\" entry flushed upon carrier down"
+
+ log_test "$af_str \"extern_valid\" flag: Carrier down"
+
+ RET=0
+
+ # Check that when entry transitions to "reachable" state it maintains
+ # the "extern_valid" flag. Wait "delay_probe" seconds for ARP request /
+ # NS to be sent.
+ local delay_probe
+
+ delay_probe=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["delay_probe"]')
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ run_cmd "sleep $((delay_probe / 1000 + 2))"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"REACHABLE\""
+ check_err $? "Entry did not transition to \"reachable\" state"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry did not maintain \"extern_valid\" flag after transition to \"reachable\" state"
+
+ log_test "$af_str \"extern_valid\" flag: Transition to \"reachable\" state"
+
+ RET=0
+
+ # Drop all packets, trigger resolution and check that entry goes back
+ # to "stale" state instead of "failed".
+ local mcast_reprobes
+ local retrans_time
+ local ucast_probes
+ local app_probes
+ local probes
+ local delay
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "tc -n $ns2 qdisc add dev veth1 clsact"
+ run_cmd "tc -n $ns2 filter add dev veth1 ingress proto all matchall action drop"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ retrans_time=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["retrans"]')
+ ucast_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["ucast_probes"]')
+ app_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["app_probes"]')
+ mcast_reprobes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["mcast_reprobes"]')
+ delay=$((delay_probe + (ucast_probes + app_probes + mcast_reprobes) * retrans_time))
+ run_cmd "sleep $((delay / 1000 + 2))"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"STALE\""
+ check_err $? "Entry did not return to \"stale\" state"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry did not maintain \"extern_valid\" flag after returning to \"stale\" state"
+ probes=$(ip -n "$ns1" -j -s neigh get "$ip_addr" dev veth0 | jq '.[]["probes"]')
+ if [[ $probes -eq 0 ]]; then
+ check_err 1 "No probes were sent"
+ fi
+
+ log_test "$af_str \"extern_valid\" flag: Transition back to \"stale\" state"
+
+ run_cmd "tc -n $ns2 qdisc del dev veth1 clsact"
+
+ RET=0
+
+ # Forced garbage collection runs whenever the number of entries is
+ # larger than "thresh3" and deletes stale entries that have not been
+ # updated in the last 5 seconds.
+ #
+ # Check that an "extern_valid" entry survives a forced garbage
+ # collection. Add an entry, wait 5 seconds and add more entries than
+ # "thresh3" so that forced garbage collection will run.
+ #
+ # Note that the garbage collection thresholds are global resources and
+ # that changes in the initial namespace affect all the namespaces.
+ local forced_gc_runs_t0
+ local forced_gc_runs_t1
+ local orig_thresh1
+ local orig_thresh2
+ local orig_thresh3
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+ orig_thresh2=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh2")) | .["thresh2"]')
+ orig_thresh3=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh3")) | .["thresh3"]')
+ run_cmd "ip ntable change name $tbl_name thresh3 10 thresh2 9 thresh1 8"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+ run_cmd "sleep 5"
+ forced_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+ for i in {1..20}; do
+ run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+ done
+ forced_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+ if [[ $forced_gc_runs_t1 -eq $forced_gc_runs_t0 ]]; then
+ check_err 1 "Forced garbage collection did not run"
+ fi
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry with \"extern_valid\" flag did not survive forced garbage collection"
+ run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+ check_fail $? "Entry without \"extern_valid\" flag survived forced garbage collection"
+
+ log_test "$af_str \"extern_valid\" flag: Forced garbage collection"
+
+ run_cmd "ip ntable change name $tbl_name thresh3 $orig_thresh3 thresh2 $orig_thresh2 thresh1 $orig_thresh1"
+
+ RET=0
+
+ # Periodic garbage collection runs every "base_reachable"/2 seconds and
+ # if the number of entries is larger than "thresh1", then it deletes
+ # stale entries that have not been used in the last "gc_stale" seconds.
+ #
+ # Check that an "extern_valid" entry survives a periodic garbage
+ # collection. Add an "extern_valid" entry, add more than "thresh1"
+ # regular entries, wait "base_reachable" (longer than "gc_stale")
+ # seconds and check that the "extern_valid" entry was not deleted.
+ #
+ # Note that the garbage collection thresholds and "base_reachable" are
+ # global resources and that changes in the initial namespace affect all
+ # the namespaces.
+ local periodic_gc_runs_t0
+ local periodic_gc_runs_t1
+ local orig_base_reachable
+ local orig_gc_stale
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+ orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]')
+ run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000"
+ orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]')
+ run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 1000"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+ # Wait orig_base_reachable/2 for the new interval to take effect.
+ run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))"
+ for i in {1..20}; do
+ run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+ done
+ periodic_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+ run_cmd "sleep 10"
+ periodic_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+ [[ $periodic_gc_runs_t1 -ne $periodic_gc_runs_t0 ]]
+ check_err $? "Periodic garbage collection did not run"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry with \"extern_valid\" flag did not survive periodic garbage collection"
+ run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+ check_fail $? "Entry without \"extern_valid\" flag survived periodic garbage collection"
+
+ log_test "$af_str \"extern_valid\" flag: Periodic garbage collection"
+
+ run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale $orig_gc_stale"
+ run_cmd "ip ntable change name $tbl_name thresh1 $orig_thresh1 base_reachable $orig_base_reachable"
+}
+
+extern_valid_ipv4()
+{
+ extern_valid_common "IPv4" 192.0.2.2 "arp_cache" 192.0.2.
+}
+
+extern_valid_ipv6()
+{
+ extern_valid_common "IPv6" 2001:db8:1::2 "ndisc_cache" 2001:db8:1::
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$((VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+require_command jq
+
+if ! ip neigh help 2>&1 | grep -q "extern_valid"; then
+ echo "SKIP: iproute2 ip too old, missing \"extern_valid\" support"
+ exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+ setup; $t; cleanup_all_ns;
+done
diff --git a/tools/testing/selftests/net/test_so_rcv.sh b/tools/testing/selftests/net/test_so_rcv.sh
new file mode 100755
index 000000000000..d8aa4362879d
--- /dev/null
+++ b/tools/testing/selftests/net/test_so_rcv.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+HOSTS=("127.0.0.1" "::1")
+PORT=1234
+TOTAL_TESTS=0
+FAILED_TESTS=0
+
+declare -A TESTS=(
+ ["SO_RCVPRIORITY"]="-P 2"
+ ["SO_RCVMARK"]="-M 3"
+)
+
+check_result() {
+ ((TOTAL_TESTS++))
+ if [ "$1" -ne 0 ]; then
+ ((FAILED_TESTS++))
+ fi
+}
+
+cleanup()
+{
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+setup_ns NS
+
+for HOST in "${HOSTS[@]}"; do
+ PROTOCOL="IPv4"
+ if [[ "$HOST" == "::1" ]]; then
+ PROTOCOL="IPv6"
+ fi
+ for test_name in "${!TESTS[@]}"; do
+ echo "Running $test_name test, $PROTOCOL"
+ arg=${TESTS[$test_name]}
+
+ ip netns exec $NS ./so_rcv_listener $arg $HOST $PORT &
+ LISTENER_PID=$!
+
+ sleep 0.5
+
+ if ! ip netns exec $NS ./cmsg_sender $arg $HOST $PORT; then
+ echo "Sender failed for $test_name, $PROTOCOL"
+ kill "$LISTENER_PID" 2>/dev/null
+ wait "$LISTENER_PID"
+ check_result 1
+ continue
+ fi
+
+ wait "$LISTENER_PID"
+ LISTENER_EXIT_CODE=$?
+
+ if [ "$LISTENER_EXIT_CODE" -eq 0 ]; then
+ echo "Rcv test OK for $test_name, $PROTOCOL"
+ check_result 0
+ else
+ echo "Rcv test FAILED for $test_name, $PROTOCOL"
+ check_result 1
+ fi
+ done
+done
+
+if [ "$FAILED_TESTS" -ne 0 ]; then
+ echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed"
+ exit ${KSFT_FAIL}
+else
+ echo "OK - All $TOTAL_TESTS tests passed"
+ exit ${KSFT_PASS}
+fi
diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
index 2d442cdab11e..8b414d0edada 100755
--- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
+++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
@@ -1,29 +1,114 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Check FDB default-remote handling across "ip link set".
+ALL_TESTS="
+ test_set_remote
+ test_change_mc_remote
+"
+source lib.sh
check_remotes()
{
local what=$1; shift
local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l)
- echo -ne "expected two remotes after $what\t"
- if [[ $N != 2 ]]; then
- echo "[FAIL]"
- EXIT_STATUS=1
+ ((N == 2))
+ check_err $? "expected 2 remotes after $what, got $N"
+}
+
+# Check FDB default-remote handling across "ip link set".
+test_set_remote()
+{
+ RET=0
+
+ adf_ip_link_add vx up type vxlan id 2000 dstport 4789
+ bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
+ bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
+ check_remotes "fdb append"
+
+ ip link set dev vx type vxlan remote 192.0.2.30
+ check_remotes "link set"
+
+ log_test 'FDB default-remote handling across "ip link set"'
+}
+
+fmt_remote()
+{
+ local addr=$1; shift
+
+ if [[ $addr == 224.* ]]; then
+ echo "group $addr"
else
- echo "[ OK ]"
+ echo "remote $addr"
fi
}
-ip link add name vx up type vxlan id 2000 dstport 4789
-bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
-bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
-check_remotes "fdb append"
+change_remote()
+{
+ local remote=$1; shift
+
+ ip link set dev vx type vxlan $(fmt_remote $remote) dev v1
+}
+
+check_membership()
+{
+ local check_vec=("$@")
+
+ local memberships
+ memberships=$(
+ netstat -n --groups |
+ sed -n '/^v1\b/p' |
+ grep -o '[^ ]*$'
+ )
+ check_err $? "Couldn't obtain group memberships"
+
+ local item
+ for item in "${check_vec[@]}"; do
+ eval "local $item"
+ echo "$memberships" | grep -q "\b$group\b"
+ check_err_fail $fail $? "$group is_ex reported in IGMP query response"
+ done
+}
+
+test_change_mc_remote()
+{
+ check_command netstat || return
+
+ adf_ip_link_add v1 up type veth peer name v2
+ adf_ip_link_set_up v2
+
+ RET=0
+
+ adf_ip_link_add vx up type vxlan dstport 4789 \
+ local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000
+
+ check_membership "group=224.1.1.1 fail=0" \
+ "group=224.1.1.2 fail=1" \
+ "group=224.1.1.3 fail=1"
+
+ log_test "MC group report after VXLAN creation"
+
+ RET=0
+
+ change_remote 224.1.1.2
+ check_membership "group=224.1.1.1 fail=1" \
+ "group=224.1.1.2 fail=0" \
+ "group=224.1.1.3 fail=1"
+
+ log_test "MC group report after changing VXLAN remote MC->MC"
+
+ RET=0
+
+ change_remote 192.0.2.2
+ check_membership "group=224.1.1.1 fail=1" \
+ "group=224.1.1.2 fail=1" \
+ "group=224.1.1.3 fail=1"
+
+ log_test "MC group report after changing VXLAN remote MC->UC"
+}
+
+trap defer_scopes_cleanup EXIT
-ip link set dev vx type vxlan remote 192.0.2.30
-check_remotes "link set"
+tests_run
-ip link del dev vx
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh
new file mode 100755
index 000000000000..20f3369f776b
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_nh.sh
@@ -0,0 +1,223 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+ basic_tx_ipv4
+ basic_tx_ipv6
+ learning
+ proxy_ipv4
+ proxy_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ stderr=
+ fi
+
+ out=$(eval "$cmd" "$stderr")
+ rc=$?
+ if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+################################################################################
+# Cleanup
+
+exit_cleanup_all()
+{
+ cleanup_all_ns
+ exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+nh_stats_get()
+{
+ ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]"
+}
+
+tc_stats_get()
+{
+ tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1"
+}
+
+basic_tx_common()
+{
+ local af_str=$1; shift
+ local proto=$1; shift
+ local local_addr=$1; shift
+ local plen=$1; shift
+ local remote_addr=$1; shift
+
+ RET=0
+
+ # Test basic Tx functionality. Check that stats are incremented on
+ # both the FDB nexthop group and the egress device.
+
+ run_cmd "ip -n $ns1 link add name dummy1 up type dummy"
+ run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1"
+ run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact"
+ run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass"
+
+ run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789"
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q"
+
+ busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null
+ check_err $? "FDB nexthop group stats did not increase"
+
+ busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null
+ check_err $? "tc filter stats did not increase"
+
+ log_test "VXLAN FDB nexthop: $af_str basic Tx"
+}
+
+basic_tx_ipv4()
+{
+ basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2
+}
+
+basic_tx_ipv6()
+{
+ basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2
+}
+
+learning()
+{
+ RET=0
+
+ # When learning is enabled on the VXLAN device, an incoming packet
+ # might try to refresh an FDB entry that points to an FDB nexthop group
+ # instead of an ordinary remote destination. Check that the kernel does
+ # not crash in this situation.
+
+ run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo"
+ run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass"
+ run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning"
+
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020"
+ run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q"
+
+ log_test "VXLAN FDB nexthop: learning"
+}
+
+proxy_common()
+{
+ local af_str=$1; shift
+ local local_addr=$1; shift
+ local plen=$1; shift
+ local remote_addr=$1; shift
+ local neigh_addr=$1; shift
+ local ping_cmd=$1; shift
+
+ RET=0
+
+ # When the "proxy" option is enabled on the VXLAN device, the device
+ # will suppress ARP requests and IPv6 Neighbor Solicitation messages if
+ # it is able to reply on behalf of the remote host. That is, if a
+ # matching and valid neighbor entry is configured on the VXLAN device
+ # whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The
+ # FDB entry for the neighbor's MAC address might point to an FDB
+ # nexthop group instead of an ordinary remote destination. Check that
+ # the kernel does not crash in this situation.
+
+ run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy"
+
+ run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0"
+
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 $ping_cmd"
+
+ log_test "VXLAN FDB nexthop: $af_str proxy"
+}
+
+proxy_ipv4()
+{
+ proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \
+ "arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3"
+}
+
+proxy_ipv6()
+{
+ proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \
+ "ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$((VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+require_command mausezahn
+require_command arping
+require_command ndisc6
+require_command jq
+
+if ! ip nexthop help 2>&1 | grep -q "stats"; then
+ echo "SKIP: iproute2 ip too old, missing nexthop stats support"
+ exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+ setup_ns ns1; $t; cleanup_all_ns;
+done
diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
index 6127a78ee988..8deacc565afa 100755
--- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -146,18 +146,17 @@ run_cmd()
}
check_hv_connectivity() {
- ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
- sleep 1
- ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
+ slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
+ slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
return $?
}
check_vm_connectivity() {
- run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
+ slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
- run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
+ slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
}
diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c
new file mode 100644
index 000000000000..eb3cac5e583c
--- /dev/null
+++ b/tools/testing/selftests/net/tfo.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <errno.h>
+
+static int cfg_server;
+static int cfg_client;
+static int cfg_port = 8000;
+static struct sockaddr_in6 cfg_addr;
+static char *cfg_outfile;
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+ int ret;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+
+ ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+ if (ret != 1) {
+ /* fallback to plain IPv4 */
+ ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+ if (ret != 1)
+ return -1;
+
+ /* add ::ffff prefix */
+ sin6->sin6_addr.s6_addr32[0] = 0;
+ sin6->sin6_addr.s6_addr32[1] = 0;
+ sin6->sin6_addr.s6_addr16[4] = 0;
+ sin6->sin6_addr.s6_addr16[5] = 0xffff;
+ }
+
+ return 0;
+}
+
+static void run_server(void)
+{
+ unsigned long qlen = 32;
+ int fd, opt, connfd;
+ socklen_t len;
+ char buf[64];
+ FILE *outfile;
+
+ outfile = fopen(cfg_outfile, "w");
+ if (!outfile)
+ error(1, errno, "fopen() outfile");
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket()");
+
+ opt = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0)
+ error(1, errno, "setsockopt(SO_REUSEADDR)");
+
+ if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) < 0)
+ error(1, errno, "setsockopt(TCP_FASTOPEN)");
+
+ if (bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)) < 0)
+ error(1, errno, "bind()");
+
+ if (listen(fd, 5) < 0)
+ error(1, errno, "listen()");
+
+ len = sizeof(cfg_addr);
+ connfd = accept(fd, (struct sockaddr *)&cfg_addr, &len);
+ if (connfd < 0)
+ error(1, errno, "accept()");
+
+ len = sizeof(opt);
+ if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0)
+ error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)");
+
+ read(connfd, buf, 64);
+ fprintf(outfile, "%d\n", opt);
+
+ fclose(outfile);
+ close(connfd);
+ close(fd);
+}
+
+static void run_client(void)
+{
+ int fd;
+ char *msg = "Hello, world!";
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket()");
+
+ sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+
+ close(fd);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s (-s|-c) -h<server_ip> -p<port> -o<outfile> ", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ struct sockaddr_in6 *addr6 = (void *) &cfg_addr;
+ char *addr = NULL;
+ int ret;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+
+ while ((c = getopt(argc, argv, "sch:p:o:")) != -1) {
+ switch (c) {
+ case 's':
+ if (cfg_client)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_server = 1;
+ break;
+ case 'c':
+ if (cfg_server)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_client = 1;
+ break;
+ case 'h':
+ addr = optarg;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 'o':
+ cfg_outfile = strdup(optarg);
+ if (!cfg_outfile)
+ error(1, 0, "outfile invalid");
+ break;
+ }
+ }
+
+ if (cfg_server && addr)
+ error(1, 0, "Server cannot have -h specified");
+
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ addr6->sin6_addr = in6addr_any;
+ if (addr) {
+ ret = parse_address(addr, cfg_port, addr6);
+ if (ret)
+ error(1, 0, "Client address parse error: %s", addr);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ if (cfg_server)
+ run_server();
+ else if (cfg_client)
+ run_client();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh
new file mode 100755
index 000000000000..a4550511830a
--- /dev/null
+++ b/tools/testing/selftests/net/tfo_passive.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+source lib.sh
+
+NSIM_SV_ID=$((256 + RANDOM % 256))
+NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
+NSIM_CL_ID=$((512 + RANDOM % 256))
+NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+SERVER_IP=192.168.1.1
+CLIENT_IP=192.168.1.2
+SERVER_PORT=48675
+
+setup_ns()
+{
+ set -e
+ ip netns add nssv
+ ip netns add nscl
+
+ NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_SV_SYS/net -exec basename {} \;)
+ NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_CL_SYS/net -exec basename {} \;)
+
+ ip link set $NSIM_SV_NAME netns nssv
+ ip link set $NSIM_CL_NAME netns nscl
+
+ ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME
+ ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME
+
+ ip netns exec nssv ip link set dev $NSIM_SV_NAME up
+ ip netns exec nscl ip link set dev $NSIM_CL_NAME up
+
+ # Enable passive TFO
+ ip netns exec nssv sysctl -w net.ipv4.tcp_fastopen=519 > /dev/null
+
+ set +e
+}
+
+cleanup_ns()
+{
+ ip netns del nscl
+ ip netns del nssv
+}
+
+###
+### Code start
+###
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_SV_FD=$((256 + RANDOM % 256))
+exec {NSIM_SV_FD}</var/run/netns/nssv
+NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex)
+
+NSIM_CL_FD=$((256 + RANDOM % 256))
+exec {NSIM_CL_FD}</var/run/netns/nscl
+NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex)
+
+echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \
+ $NSIM_DEV_SYS_LINK
+
+if [ $? -ne 0 ]; then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup_ns
+ exit 1
+fi
+
+out_file=$(mktemp)
+
+timeout -k 1s 30s ip netns exec nssv ./tfo \
+ -s \
+ -p ${SERVER_PORT} \
+ -o ${out_file}&
+
+wait_local_port_listen nssv ${SERVER_PORT} tcp
+
+ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT}
+
+wait
+
+res=$(cat $out_file)
+rm $out_file
+
+if [ "$res" = "0" ]; then
+ echo "got invalid NAPI ID from passive TFO socket"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit 0
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index f27a12d2a2c9..a3ef4b57eb5f 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -21,7 +21,7 @@
#include <sys/socket.h>
#include <sys/stat.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define TLS_PAYLOAD_MAX_LEN 16384
#define SOL_TLS 282
@@ -44,9 +44,11 @@ struct tls_crypto_info_keys {
};
static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
- struct tls_crypto_info_keys *tls12)
+ struct tls_crypto_info_keys *tls12,
+ char key_generation)
{
- memset(tls12, 0, sizeof(*tls12));
+ memset(tls12, key_generation, sizeof(*tls12));
+ memset(tls12, 0, sizeof(struct tls_crypto_info));
switch (cipher_type) {
case TLS_CIPHER_CHACHA20_POLY1305:
@@ -179,13 +181,12 @@ static int tls_send_cmsg(int fd, unsigned char record_type,
return sendmsg(fd, &msg, flags);
}
-static int tls_recv_cmsg(struct __test_metadata *_metadata,
- int fd, unsigned char record_type,
- void *data, size_t len, int flags)
+static int __tls_recv_cmsg(struct __test_metadata *_metadata,
+ int fd, unsigned char *ctype,
+ void *data, size_t len, int flags)
{
char cbuf[CMSG_SPACE(sizeof(char))];
struct cmsghdr *cmsg;
- unsigned char ctype;
struct msghdr msg;
struct iovec vec;
int n;
@@ -204,7 +205,20 @@ static int tls_recv_cmsg(struct __test_metadata *_metadata,
EXPECT_NE(cmsg, NULL);
EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
- ctype = *((unsigned char *)CMSG_DATA(cmsg));
+ if (ctype)
+ *ctype = *((unsigned char *)CMSG_DATA(cmsg));
+
+ return n;
+}
+
+static int tls_recv_cmsg(struct __test_metadata *_metadata,
+ int fd, unsigned char record_type,
+ void *data, size_t len, int flags)
+{
+ unsigned char ctype;
+ int n;
+
+ n = __tls_recv_cmsg(_metadata, fd, &ctype, data, len, flags);
EXPECT_EQ(ctype, record_type);
return n;
@@ -266,6 +280,25 @@ TEST_F(tls_basic, bad_cipher)
EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
}
+TEST_F(tls_basic, recseq_wrap)
+{
+ struct tls_crypto_info_keys tls12;
+ char const *test_str = "test_read";
+ int send_len = 10;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0);
+ memset(&tls12.aes128.rec_seq, 0xff, sizeof(tls12.aes128.rec_seq));
+
+ ASSERT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+ ASSERT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+}
+
FIXTURE(tls)
{
int fd, cfd;
@@ -372,7 +405,7 @@ FIXTURE_SETUP(tls)
SKIP(return, "Unsupported cipher in FIPS mode");
tls_crypto_info_init(variant->tls_version, variant->cipher_type,
- &tls12);
+ &tls12, 0);
ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
@@ -406,6 +439,8 @@ TEST_F(tls, sendfile)
EXPECT_GE(filefd, 0);
fstat(filefd, &st);
EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+
+ close(filefd);
}
TEST_F(tls, send_then_sendfile)
@@ -427,6 +462,9 @@ TEST_F(tls, send_then_sendfile)
EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size);
+
+ free(buf);
+ close(filefd);
}
static void chunked_sendfile(struct __test_metadata *_metadata,
@@ -526,6 +564,40 @@ TEST_F(tls, msg_more)
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
+TEST_F(tls, cmsg_msg_more)
+{
+ char *test_str = "test_read";
+ char record_type = 100;
+ int send_len = 10;
+
+ /* we don't allow MSG_MORE with non-DATA records */
+ EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len,
+ MSG_MORE), -1);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+TEST_F(tls, msg_more_then_cmsg)
+{
+ char *test_str = "test_read";
+ char record_type = 100;
+ int send_len = 10;
+ char buf[10 * 2];
+ int ret;
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
+
+ ret = tls_send_cmsg(self->fd, record_type, test_str, send_len, 0);
+ EXPECT_EQ(ret, send_len);
+
+ /* initial DATA record didn't get merged with the non-DATA record */
+ EXPECT_EQ(recv(self->cfd, buf, send_len * 2, 0), send_len);
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+ buf, sizeof(buf), MSG_WAITALL),
+ send_len);
+}
+
TEST_F(tls, msg_more_unsent)
{
char const *test_str = "test_read";
@@ -874,6 +946,37 @@ TEST_F(tls, peek_and_splice)
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
+#define MAX_FRAGS 48
+TEST_F(tls, splice_short)
+{
+ struct iovec sendchar_iov;
+ char read_buf[0x10000];
+ char sendbuf[0x100];
+ char sendchar = 'S';
+ int pipefds[2];
+ int i;
+
+ sendchar_iov.iov_base = &sendchar;
+ sendchar_iov.iov_len = 1;
+
+ memset(sendbuf, 's', sizeof(sendbuf));
+
+ ASSERT_GE(pipe2(pipefds, O_NONBLOCK), 0);
+ ASSERT_GE(fcntl(pipefds[0], F_SETPIPE_SZ, (MAX_FRAGS + 1) * 0x1000), 0);
+
+ for (i = 0; i < MAX_FRAGS; i++)
+ ASSERT_GE(vmsplice(pipefds[1], &sendchar_iov, 1, 0), 0);
+
+ ASSERT_EQ(write(pipefds[1], sendbuf, sizeof(sendbuf)), sizeof(sendbuf));
+
+ EXPECT_EQ(splice(pipefds[0], NULL, self->fd, NULL, MAX_FRAGS + 0x1000, 0),
+ MAX_FRAGS + sizeof(sendbuf));
+ EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), 0), MAX_FRAGS + sizeof(sendbuf));
+ EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), MSG_DONTWAIT), -1);
+ EXPECT_EQ(errno, EAGAIN);
+}
+#undef MAX_FRAGS
+
TEST_F(tls, recvmsg_single)
{
char const *test_str = "test_recvmsg_single";
@@ -1156,7 +1259,7 @@ TEST_F(tls, bidir)
struct tls_crypto_info_keys tls12;
tls_crypto_info_init(variant->tls_version, variant->cipher_type,
- &tls12);
+ &tls12, 0);
ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
tls12.len);
@@ -1595,7 +1698,7 @@ TEST_F(tls, getsockopt)
EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type);
/* get the full crypto_info */
- tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect, 0);
len = expect.len;
memrnd(&get, sizeof(get));
EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0);
@@ -1649,6 +1752,778 @@ TEST_F(tls, recv_efault)
EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0);
}
+#define TLS_RECORD_TYPE_HANDSHAKE 0x16
+/* key_update, length 1, update_not_requested */
+static const char key_update_msg[] = "\x18\x00\x00\x01\x00";
+static void tls_send_keyupdate(struct __test_metadata *_metadata, int fd)
+{
+ size_t len = sizeof(key_update_msg);
+
+ EXPECT_EQ(tls_send_cmsg(fd, TLS_RECORD_TYPE_HANDSHAKE,
+ (char *)key_update_msg, len, 0),
+ len);
+}
+
+static void tls_recv_keyupdate(struct __test_metadata *_metadata, int fd, int flags)
+{
+ char buf[100];
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, fd, TLS_RECORD_TYPE_HANDSHAKE, buf, sizeof(buf), flags),
+ sizeof(key_update_msg));
+ EXPECT_EQ(memcmp(buf, key_update_msg, sizeof(key_update_msg)), 0);
+}
+
+/* set the key to 0 then 1 for RX, immediately to 1 for TX */
+TEST_F(tls_basic, rekey_rx)
+{
+ struct tls_crypto_info_keys tls12_0, tls12_1;
+ char const *test_str = "test_message";
+ int send_len = strlen(test_str) + 1;
+ char buf[20];
+ int ret;
+
+ if (self->notls)
+ return;
+
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &tls12_0, 0);
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &tls12_1, 1);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_0, tls12_0.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len);
+ EXPECT_EQ(ret, 0);
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+/* set the key to 0 then 1 for TX, immediately to 1 for RX */
+TEST_F(tls_basic, rekey_tx)
+{
+ struct tls_crypto_info_keys tls12_0, tls12_1;
+ char const *test_str = "test_message";
+ int send_len = strlen(test_str) + 1;
+ char buf[20];
+ int ret;
+
+ if (self->notls)
+ return;
+
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &tls12_0, 0);
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &tls12_1, 1);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_0, tls12_0.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len);
+ EXPECT_EQ(ret, 0);
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls_basic, disconnect)
+{
+ char const *test_str = "test_message";
+ int send_len = strlen(test_str) + 1;
+ struct tls_crypto_info_keys key;
+ struct sockaddr_in addr;
+ char buf[20];
+ int ret;
+
+ if (self->notls)
+ return;
+
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &key, 0);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len);
+ ASSERT_EQ(ret, 0);
+
+ /* Pre-queue the data so that setsockopt parses it but doesn't
+ * dequeue it from the TCP socket. recvmsg would dequeue.
+ */
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len);
+ ASSERT_EQ(ret, 0);
+
+ addr.sin_family = AF_UNSPEC;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = 0;
+ ret = connect(self->cfd, &addr, sizeof(addr));
+ EXPECT_EQ(ret, -1);
+ EXPECT_EQ(errno, EOPNOTSUPP);
+
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+}
+
+TEST_F(tls, rekey)
+{
+ char const *test_str_1 = "test_message_before_rekey";
+ char const *test_str_2 = "test_message_after_rekey";
+ struct tls_crypto_info_keys tls12;
+ int send_len;
+ char buf[100];
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ /* initial send/recv */
+ send_len = strlen(test_str_1) + 1;
+ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ /* send after rekey */
+ send_len = strlen(test_str_2) + 1;
+ EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len);
+
+ /* can't receive the KeyUpdate without a control message */
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* recv blocking -> -EKEYEXPIRED */
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* recv non-blocking -> -EKEYEXPIRED */
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* update RX key */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ /* recv after rekey */
+ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0);
+}
+
+TEST_F(tls, rekey_fail)
+{
+ char const *test_str_1 = "test_message_before_rekey";
+ char const *test_str_2 = "test_message_after_rekey";
+ struct tls_crypto_info_keys tls12;
+ int send_len;
+ char buf[100];
+
+ /* initial send/recv */
+ send_len = strlen(test_str_1) + 1;
+ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+
+ if (variant->tls_version != TLS_1_3_VERSION) {
+ /* just check that rekey is not supported and return */
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1);
+ EXPECT_EQ(errno, EBUSY);
+ return;
+ }
+
+ /* successful update */
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ /* invalid update: change of version */
+ tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* invalid update (RX socket): change of version */
+ tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* invalid update: change of cipher */
+ if (variant->cipher_type == TLS_CIPHER_AES_GCM_256)
+ tls_crypto_info_init(variant->tls_version, TLS_CIPHER_CHACHA20_POLY1305, &tls12, 1);
+ else
+ tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_256, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* send after rekey, the invalid updates shouldn't have an effect */
+ send_len = strlen(test_str_2) + 1;
+ EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len);
+
+ /* can't receive the KeyUpdate without a control message */
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* recv blocking -> -EKEYEXPIRED */
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* recv non-blocking -> -EKEYEXPIRED */
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* update RX key */
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ /* recv after rekey */
+ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0);
+}
+
+TEST_F(tls, rekey_peek)
+{
+ char const *test_str_1 = "test_message_before_rekey";
+ struct tls_crypto_info_keys tls12;
+ int send_len;
+ char buf[100];
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ send_len = strlen(test_str_1) + 1;
+ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ /* can't receive the KeyUpdate without a control message */
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+
+ /* peek KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* update RX key */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+}
+
+TEST_F(tls, splice_rekey)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN / 2;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ struct tls_crypto_info_keys tls12;
+ int p[2];
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ memrnd(mem_send, sizeof(mem_send));
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+
+ /* can't splice the KeyUpdate */
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* peek KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* can't splice before updating the key */
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* update RX key */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, rekey_peek_splice)
+{
+ char const *test_str_1 = "test_message_before_rekey";
+ struct tls_crypto_info_keys tls12;
+ int send_len;
+ char buf[100];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ int p[2];
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ ASSERT_GE(pipe(p), 0);
+
+ send_len = strlen(test_str_1) + 1;
+ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+ EXPECT_EQ(memcmp(mem_recv, test_str_1, send_len), 0);
+}
+
+TEST_F(tls, rekey_getsockopt)
+{
+ struct tls_crypto_info_keys tls12;
+ struct tls_crypto_info_keys tls12_get;
+ socklen_t len;
+
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 0);
+
+ len = tls12.len;
+ EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0);
+ EXPECT_EQ(len, tls12.len);
+ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+
+ len = tls12.len;
+ EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0);
+ EXPECT_EQ(len, tls12.len);
+ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ len = tls12.len;
+ EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0);
+ EXPECT_EQ(len, tls12.len);
+ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+
+ len = tls12.len;
+ EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0);
+ EXPECT_EQ(len, tls12.len);
+ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+}
+
+TEST_F(tls, rekey_poll_pending)
+{
+ char const *test_str = "test_message_after_rekey";
+ struct tls_crypto_info_keys tls12;
+ struct pollfd pfd = { };
+ int send_len;
+ int ret;
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* send immediately after rekey */
+ send_len = strlen(test_str) + 1;
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+ /* key hasn't been updated, expect cfd to be non-readable */
+ pfd.fd = self->cfd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret) {
+ int pid2, status;
+
+ /* wait before installing the new key */
+ sleep(1);
+
+ /* update RX key while poll() is sleeping */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ pid2 = wait(&status);
+ EXPECT_EQ(pid2, ret);
+ EXPECT_EQ(status, 0);
+ } else {
+ pfd.fd = self->cfd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 5000), 1);
+
+ exit(!__test_passed(_metadata));
+ }
+}
+
+TEST_F(tls, rekey_poll_delay)
+{
+ char const *test_str = "test_message_after_rekey";
+ struct tls_crypto_info_keys tls12;
+ struct pollfd pfd = { };
+ int send_len;
+ int ret;
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret) {
+ int pid2, status;
+
+ /* wait before installing the new key */
+ sleep(1);
+
+ /* update RX key while poll() is sleeping */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ sleep(1);
+ send_len = strlen(test_str) + 1;
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+ pid2 = wait(&status);
+ EXPECT_EQ(pid2, ret);
+ EXPECT_EQ(status, 0);
+ } else {
+ pfd.fd = self->cfd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 5000), 1);
+ exit(!__test_passed(_metadata));
+ }
+}
+
+struct raw_rec {
+ unsigned int plain_len;
+ unsigned char plain_data[100];
+ unsigned int cipher_len;
+ unsigned char cipher_data[128];
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: 'Hello world' */
+static const struct raw_rec id0_data_l11 = {
+ .plain_len = 11,
+ .plain_data = {
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+ 0x72, 0x6c, 0x64,
+ },
+ .cipher_len = 40,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0xa2, 0x33,
+ 0xde, 0x8d, 0x94, 0xf0, 0x29, 0x6c, 0xb1, 0xaf,
+ 0x6a, 0x75, 0xb2, 0x93, 0xad, 0x45, 0xd5, 0xfd,
+ 0x03, 0x51, 0x57, 0x8f, 0xf9, 0xcc, 0x3b, 0x42,
+ },
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:0, plaintext: '' */
+static const struct raw_rec id0_ctrl_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x38, 0x7b,
+ 0xa6, 0x1c, 0xdd, 0xa7, 0x19, 0x33, 0xab, 0xae,
+ 0x88, 0xe1, 0xd2, 0x08, 0x4f,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: '' */
+static const struct raw_rec id0_data_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x37, 0x90,
+ 0x70, 0x45, 0x89, 0xfb, 0x5c, 0xc7, 0x89, 0x03,
+ 0x68, 0x80, 0xd3, 0xd8, 0xcc,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: 'Hello world' */
+static const struct raw_rec id1_data_l11 = {
+ .plain_len = 11,
+ .plain_data = {
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+ 0x72, 0x6c, 0x64,
+ },
+ .cipher_len = 40,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x3a, 0x1a, 0x9c,
+ 0xd0, 0xa8, 0x9a, 0xd6, 0x69, 0xd6, 0x1a, 0xe3,
+ 0xb5, 0x1f, 0x0d, 0x2c, 0xe2, 0x97, 0x46, 0xff,
+ 0x2b, 0xcc, 0x5a, 0xc4, 0xa3, 0xb9, 0xef, 0xba,
+ },
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:1, plaintext: '' */
+static const struct raw_rec id1_ctrl_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0xf0, 0xfe,
+ 0xee, 0xd9, 0xe2, 0x5d, 0xc7, 0x11, 0x4c, 0xe6,
+ 0xb4, 0x7e, 0xef, 0x40, 0x2b,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: '' */
+static const struct raw_rec id1_data_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xce, 0xfc, 0x86,
+ 0xc8, 0xf0, 0x55, 0xf9, 0x47, 0x3f, 0x74, 0xdc,
+ 0xc9, 0xbf, 0xfe, 0x5b, 0xb1,
+ },
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: 'Hello world' */
+static const struct raw_rec id2_ctrl_l11 = {
+ .plain_len = 11,
+ .plain_data = {
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+ 0x72, 0x6c, 0x64,
+ },
+ .cipher_len = 40,
+ .cipher_data = {
+ 0x16, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19,
+ 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87,
+ 0x2a, 0x04, 0x11, 0x3d, 0xf8, 0x64, 0x5f, 0x36,
+ 0x8b, 0xa8, 0xee, 0x4c, 0x6d, 0x62, 0xa5, 0x00,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: 'Hello world' */
+static const struct raw_rec id2_data_l11 = {
+ .plain_len = 11,
+ .plain_data = {
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+ 0x72, 0x6c, 0x64,
+ },
+ .cipher_len = 40,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19,
+ 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87,
+ 0x8e, 0xa1, 0xd0, 0xcd, 0x33, 0xb5, 0x86, 0x2b,
+ 0x17, 0xf1, 0x52, 0x2a, 0x55, 0x62, 0x65, 0x11,
+ },
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: '' */
+static const struct raw_rec id2_ctrl_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0xdc, 0x5c, 0x0e,
+ 0x41, 0xdd, 0xba, 0xd3, 0xcc, 0xcf, 0x6d, 0xd9,
+ 0x06, 0xdb, 0x79, 0xe5, 0x5d,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: '' */
+static const struct raw_rec id2_data_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0xc3, 0xca, 0x26,
+ 0x22, 0xe4, 0x25, 0xfb, 0x5f, 0x6d, 0xbf, 0x83,
+ 0x30, 0x48, 0x69, 0x1a, 0x47,
+ },
+};
+
+FIXTURE(zero_len)
+{
+ int fd, cfd;
+ bool notls;
+};
+
+FIXTURE_VARIANT(zero_len)
+{
+ const struct raw_rec *recs[4];
+ ssize_t recv_ret[4];
+};
+
+FIXTURE_VARIANT_ADD(zero_len, data_data_data)
+{
+ .recs = { &id0_data_l11, &id1_data_l11, &id2_data_l11, },
+ .recv_ret = { 33, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, data_0ctrl_data)
+{
+ .recs = { &id0_data_l11, &id1_ctrl_l0, &id2_data_l11, },
+ .recv_ret = { 11, 0, 11, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0data)
+{
+ .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l0, },
+ .recv_ret = { -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_ctrl)
+{
+ .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l11, },
+ .recv_ret = { 0, 11, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0ctrl)
+{
+ .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l0, },
+ .recv_ret = { 0, 0, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0ctrl_0ctrl_0ctrl)
+{
+ .recs = { &id0_ctrl_l0, &id1_ctrl_l0, &id2_ctrl_l0, },
+ .recv_ret = { 0, 0, 0, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_data)
+{
+ .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l11, },
+ .recv_ret = { 11, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, data_0data_0data)
+{
+ .recs = { &id0_data_l11, &id1_data_l0, &id2_data_l0, },
+ .recv_ret = { 11, -EAGAIN, },
+};
+
+FIXTURE_SETUP(zero_len)
+{
+ struct tls_crypto_info_keys tls12;
+ int ret;
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+ &tls12, 0);
+
+ ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+ if (self->notls)
+ return;
+
+ /* Don't install keys on fd, we'll send raw records */
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(zero_len)
+{
+ close(self->fd);
+ close(self->cfd);
+}
+
+TEST_F(zero_len, test)
+{
+ const struct raw_rec *const *rec;
+ unsigned char buf[128];
+ int rec_off;
+ int i;
+
+ for (i = 0; i < 4 && variant->recs[i]; i++)
+ EXPECT_EQ(send(self->fd, variant->recs[i]->cipher_data,
+ variant->recs[i]->cipher_len, 0),
+ variant->recs[i]->cipher_len);
+
+ rec = &variant->recs[0];
+ rec_off = 0;
+ for (i = 0; i < 4; i++) {
+ int j, ret;
+
+ ret = variant->recv_ret[i] >= 0 ? variant->recv_ret[i] : -1;
+ EXPECT_EQ(__tls_recv_cmsg(_metadata, self->cfd, NULL,
+ buf, sizeof(buf), MSG_DONTWAIT), ret);
+ if (ret == -1)
+ EXPECT_EQ(errno, -variant->recv_ret[i]);
+ if (variant->recv_ret[i] == -EAGAIN)
+ break;
+
+ for (j = 0; j < ret; j++) {
+ while (rec_off == (*rec)->plain_len) {
+ rec++;
+ rec_off = 0;
+ }
+ EXPECT_EQ(buf[j], (*rec)->plain_data[rec_off]);
+ rec_off++;
+ }
+ }
+};
+
FIXTURE(tls_err)
{
int fd, cfd;
@@ -1677,7 +2552,7 @@ FIXTURE_SETUP(tls_err)
int ret;
tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128,
- &tls12);
+ &tls12, 0);
ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls);
@@ -1965,6 +2840,163 @@ TEST_F(tls_err, poll_partial_rec_async)
}
}
+/* Use OOB+large send to trigger copy mode due to memory pressure.
+ * OOB causes a short read.
+ */
+TEST_F(tls_err, oob_pressure)
+{
+ char buf[1<<16];
+ int i;
+
+ memrnd(buf, sizeof(buf));
+
+ EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5);
+ EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf));
+ for (i = 0; i < 64; i++)
+ EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5);
+}
+
+/*
+ * Parse a stream of TLS records and ensure that each record respects
+ * the specified @max_payload_len.
+ */
+static size_t parse_tls_records(struct __test_metadata *_metadata,
+ const __u8 *rx_buf, int rx_len, int overhead,
+ __u16 max_payload_len)
+{
+ const __u8 *rec = rx_buf;
+ size_t total_plaintext_rx = 0;
+ const __u8 rec_header_len = 5;
+
+ while (rec < rx_buf + rx_len) {
+ __u16 record_payload_len;
+ __u16 plaintext_len;
+
+ /* Sanity check that it's a TLS header for application data */
+ ASSERT_EQ(rec[0], 23);
+ ASSERT_EQ(rec[1], 0x3);
+ ASSERT_EQ(rec[2], 0x3);
+
+ memcpy(&record_payload_len, rec + 3, 2);
+ record_payload_len = ntohs(record_payload_len);
+ ASSERT_GE(record_payload_len, overhead);
+
+ plaintext_len = record_payload_len - overhead;
+ total_plaintext_rx += plaintext_len;
+
+ /* Plaintext must not exceed the specified limit */
+ ASSERT_LE(plaintext_len, max_payload_len);
+ rec += rec_header_len + record_payload_len;
+ }
+
+ return total_plaintext_rx;
+}
+
+TEST(tls_12_tx_max_payload_len)
+{
+ struct tls_crypto_info_keys tls12;
+ int cfd, ret, fd, overhead;
+ size_t total_plaintext_rx = 0;
+ __u8 tx[1024], rx[2000];
+ __u16 limit = 128;
+ __u16 opt = 0;
+ unsigned int optlen = sizeof(opt);
+ bool notls;
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+ &tls12, 0);
+
+ ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+ if (notls)
+ exit(KSFT_SKIP);
+
+ /* Don't install keys on fd, we'll parse raw records */
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit,
+ sizeof(limit));
+ ASSERT_EQ(ret, 0);
+
+ ret = getsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &opt, &optlen);
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(limit, opt);
+ EXPECT_EQ(optlen, sizeof(limit));
+
+ memset(tx, 0, sizeof(tx));
+ ASSERT_EQ(send(cfd, tx, sizeof(tx), 0), sizeof(tx));
+ close(cfd);
+
+ ret = recv(fd, rx, sizeof(rx), 0);
+
+ /*
+ * 16B tag + 8B IV -- record header (5B) is not counted but we'll
+ * need it to walk the record stream
+ */
+ overhead = 16 + 8;
+ total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead,
+ limit);
+
+ ASSERT_EQ(total_plaintext_rx, sizeof(tx));
+ close(fd);
+}
+
+TEST(tls_12_tx_max_payload_len_open_rec)
+{
+ struct tls_crypto_info_keys tls12;
+ int cfd, ret, fd, overhead;
+ size_t total_plaintext_rx = 0;
+ __u8 tx[1024], rx[2000];
+ __u16 tx_partial = 256;
+ __u16 og_limit = 512, limit = 128;
+ bool notls;
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+ &tls12, 0);
+
+ ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+ if (notls)
+ exit(KSFT_SKIP);
+
+ /* Don't install keys on fd, we'll parse raw records */
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &og_limit,
+ sizeof(og_limit));
+ ASSERT_EQ(ret, 0);
+
+ memset(tx, 0, sizeof(tx));
+ ASSERT_EQ(send(cfd, tx, tx_partial, MSG_MORE), tx_partial);
+
+ /*
+ * Changing the payload limit with a pending open record should
+ * not be allowed.
+ */
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit,
+ sizeof(limit));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EBUSY);
+
+ ASSERT_EQ(send(cfd, tx + tx_partial, sizeof(tx) - tx_partial, MSG_EOR),
+ sizeof(tx) - tx_partial);
+ close(cfd);
+
+ ret = recv(fd, rx, sizeof(rx), 0);
+
+ /*
+ * 16B tag + 8B IV -- record header (5B) is not counted but we'll
+ * need it to walk the record stream
+ */
+ overhead = 16 + 8;
+ total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead,
+ og_limit);
+ ASSERT_EQ(total_plaintext_rx, sizeof(tx));
+ close(fd);
+}
+
TEST(non_established) {
struct tls12_crypto_info_aes_gcm_256 tls12;
struct sockaddr_in addr;
@@ -2099,7 +3131,7 @@ TEST(tls_v6ops) {
int sfd, ret, fd;
socklen_t len, len2;
- tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0);
addr.sin6_family = AF_INET6;
addr.sin6_addr = in6addr_any;
@@ -2158,7 +3190,7 @@ TEST(prequeue) {
len = sizeof(addr);
memrnd(buf, sizeof(buf));
- tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12);
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12, 0);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
@@ -2193,6 +3225,67 @@ TEST(prequeue) {
close(cfd);
}
+TEST(data_steal) {
+ struct tls_crypto_info_keys tls;
+ char buf[20000], buf2[20000];
+ struct sockaddr_in addr;
+ int sfd, cfd, ret, fd;
+ int pid, status;
+ socklen_t len;
+
+ len = sizeof(addr);
+ memrnd(buf, sizeof(buf));
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls, 0);
+
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = 0;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ sfd = socket(AF_INET, SOCK_STREAM, 0);
+
+ ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0);
+ ASSERT_EQ(listen(sfd, 10), 0);
+ ASSERT_EQ(getsockname(sfd, &addr, &len), 0);
+ ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0);
+ ASSERT_GE(cfd = accept(sfd, &addr, &len), 0);
+ close(sfd);
+
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ if (ret) {
+ ASSERT_EQ(errno, ENOENT);
+ SKIP(return, "no TLS support");
+ }
+ ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0);
+
+ /* Spawn a child and get it into the read wait path of the underlying
+ * TCP socket.
+ */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (!pid) {
+ EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL),
+ sizeof(buf) / 2);
+ exit(!__test_passed(_metadata));
+ }
+
+ usleep(10000);
+ ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0);
+ ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0);
+
+ EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf));
+ EXPECT_EQ(wait(&status), pid);
+ EXPECT_EQ(status, 0);
+ EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1);
+ /* Don't check errno, the error will be different depending
+ * on what random bytes TLS interpreted as the record length.
+ */
+
+ close(fd);
+ close(cfd);
+}
+
static void __attribute__((constructor)) fips_check(void) {
int res;
FILE *f;
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
deleted file mode 100644
index 9ba03164d73a..000000000000
--- a/tools/testing/selftests/net/toeplitz.c
+++ /dev/null
@@ -1,589 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Toeplitz test
- *
- * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
- * 2. Compute the rx_hash in software based on the packet contents
- * 3. Compare the two
- *
- * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
- *
- * If '-C $rx_irq_cpu_list' is given, also
- *
- * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
- * 5. Compute the rxqueue that RSS would select based on this rx_hash
- * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
- * 7. Compare the cpus from 4 and 6
- *
- * Else if '-r $rps_bitmap' is given, also
- *
- * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
- * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
- * 6. Compare the cpus from 4 and 5
- */
-
-#define _GNU_SOURCE
-
-#include <arpa/inet.h>
-#include <errno.h>
-#include <error.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <linux/filter.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <net/if.h>
-#include <netdb.h>
-#include <netinet/ip.h>
-#include <netinet/ip6.h>
-#include <netinet/tcp.h>
-#include <netinet/udp.h>
-#include <poll.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "../kselftest.h"
-
-#define TOEPLITZ_KEY_MIN_LEN 40
-#define TOEPLITZ_KEY_MAX_LEN 60
-
-#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
-#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
-#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
-
-#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
-
-#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
-
-#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
-
-/* configuration options (cmdline arguments) */
-static uint16_t cfg_dport = 8000;
-static int cfg_family = AF_INET6;
-static char *cfg_ifname = "eth0";
-static int cfg_num_queues;
-static int cfg_num_rps_cpus;
-static bool cfg_sink;
-static int cfg_type = SOCK_STREAM;
-static int cfg_timeout_msec = 1000;
-static bool cfg_verbose;
-
-/* global vars */
-static int num_cpus;
-static int ring_block_nr;
-static int ring_block_sz;
-
-/* stats */
-static int frames_received;
-static int frames_nohash;
-static int frames_error;
-
-#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0)
-
-/* tpacket ring */
-struct ring_state {
- int fd;
- char *mmap;
- int idx;
- int cpu;
-};
-
-static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
-static int rps_silo_to_cpu[RPS_MAX_CPUS];
-static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
-static struct ring_state rings[RSS_MAX_CPUS];
-
-static inline uint32_t toeplitz(const unsigned char *four_tuple,
- const unsigned char *key)
-{
- int i, bit, ret = 0;
- uint32_t key32;
-
- key32 = ntohl(*((uint32_t *)key));
- key += 4;
-
- for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
- for (bit = 7; bit >= 0; bit--) {
- if (four_tuple[i] & (1 << bit))
- ret ^= key32;
-
- key32 <<= 1;
- key32 |= !!(key[0] & (1 << bit));
- }
- key++;
- }
-
- return ret;
-}
-
-/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
-static void verify_rss(uint32_t rx_hash, int cpu)
-{
- int queue = rx_hash % cfg_num_queues;
-
- log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
- if (rx_irq_cpus[queue] != cpu) {
- log_verbose(". error: rss cpu mismatch (%d)", cpu);
- frames_error++;
- }
-}
-
-static void verify_rps(uint64_t rx_hash, int cpu)
-{
- int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
-
- log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
- if (rps_silo_to_cpu[silo] != cpu) {
- log_verbose(". error: rps cpu mismatch (%d)", cpu);
- frames_error++;
- }
-}
-
-static void log_rxhash(int cpu, uint32_t rx_hash,
- const char *addrs, int addr_len)
-{
- char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
- uint16_t *ports;
-
- if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
- !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
- error(1, 0, "address parse error");
-
- ports = (void *)addrs + (addr_len * 2);
- log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
- cpu, rx_hash, saddr, daddr,
- ntohs(ports[0]), ntohs(ports[1]));
-}
-
-/* Compare computed rxhash with rxhash received from tpacket_v3 */
-static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
-{
- unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
- uint32_t rx_hash_sw;
- const char *addrs;
- int addr_len;
-
- if (cfg_family == AF_INET) {
- addr_len = sizeof(struct in_addr);
- addrs = pkt + offsetof(struct iphdr, saddr);
- } else {
- addr_len = sizeof(struct in6_addr);
- addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
- }
-
- memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
- rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
-
- if (cfg_verbose)
- log_rxhash(cpu, rx_hash, addrs, addr_len);
-
- if (rx_hash != rx_hash_sw) {
- log_verbose(" != expected 0x%x\n", rx_hash_sw);
- frames_error++;
- return;
- }
-
- log_verbose(" OK");
- if (cfg_num_queues)
- verify_rss(rx_hash, cpu);
- else if (cfg_num_rps_cpus)
- verify_rps(rx_hash, cpu);
- log_verbose("\n");
-}
-
-static char *recv_frame(const struct ring_state *ring, char *frame)
-{
- struct tpacket3_hdr *hdr = (void *)frame;
-
- if (hdr->hv1.tp_rxhash)
- verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
- ring->cpu);
- else
- frames_nohash++;
-
- return frame + hdr->tp_next_offset;
-}
-
-/* A single TPACKET_V3 block can hold multiple frames */
-static bool recv_block(struct ring_state *ring)
-{
- struct tpacket_block_desc *block;
- char *frame;
- int i;
-
- block = (void *)(ring->mmap + ring->idx * ring_block_sz);
- if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
- return false;
-
- frame = (char *)block;
- frame += block->hdr.bh1.offset_to_first_pkt;
-
- for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
- frame = recv_frame(ring, frame);
- frames_received++;
- }
-
- block->hdr.bh1.block_status = TP_STATUS_KERNEL;
- ring->idx = (ring->idx + 1) % ring_block_nr;
-
- return true;
-}
-
-/* simple test: sleep once unconditionally and then process all rings */
-static void process_rings(void)
-{
- int i;
-
- usleep(1000 * cfg_timeout_msec);
-
- for (i = 0; i < num_cpus; i++)
- do {} while (recv_block(&rings[i]));
-
- fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
- frames_received - frames_nohash - frames_error,
- frames_nohash, frames_error);
-}
-
-static char *setup_ring(int fd)
-{
- struct tpacket_req3 req3 = {0};
- void *ring;
-
- req3.tp_retire_blk_tov = cfg_timeout_msec / 8;
- req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
-
- req3.tp_frame_size = 2048;
- req3.tp_frame_nr = 1 << 10;
- req3.tp_block_nr = 16;
-
- req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
- req3.tp_block_size /= req3.tp_block_nr;
-
- if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
- error(1, errno, "setsockopt PACKET_RX_RING");
-
- ring_block_sz = req3.tp_block_size;
- ring_block_nr = req3.tp_block_nr;
-
- ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
- if (ring == MAP_FAILED)
- error(1, 0, "mmap failed");
-
- return ring;
-}
-
-static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
-{
- struct sock_filter filter[] = {
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
- BPF_STMT(BPF_RET + BPF_K, 0),
- BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
- };
- struct sock_fprog prog = {};
-
- prog.filter = filter;
- prog.len = ARRAY_SIZE(filter);
- if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
- error(1, errno, "setsockopt filter");
-}
-
-/* filter on transport protocol and destination port */
-static void set_filter(int fd)
-{
- const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
- uint8_t proto;
-
- proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
- if (cfg_family == AF_INET)
- __set_filter(fd, offsetof(struct iphdr, protocol), proto,
- sizeof(struct iphdr) + off_dport);
- else
- __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
- sizeof(struct ip6_hdr) + off_dport);
-}
-
-/* drop everything: used temporarily during setup */
-static void set_filter_null(int fd)
-{
- struct sock_filter filter[] = {
- BPF_STMT(BPF_RET + BPF_K, 0),
- };
- struct sock_fprog prog = {};
-
- prog.filter = filter;
- prog.len = ARRAY_SIZE(filter);
- if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
- error(1, errno, "setsockopt filter");
-}
-
-static int create_ring(char **ring)
-{
- struct fanout_args args = {
- .id = 1,
- .type_flags = PACKET_FANOUT_CPU,
- .max_num_members = RSS_MAX_CPUS
- };
- struct sockaddr_ll ll = { 0 };
- int fd, val;
-
- fd = socket(PF_PACKET, SOCK_DGRAM, 0);
- if (fd == -1)
- error(1, errno, "socket creation failed");
-
- val = TPACKET_V3;
- if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
- error(1, errno, "setsockopt PACKET_VERSION");
- *ring = setup_ring(fd);
-
- /* block packets until all rings are added to the fanout group:
- * else packets can arrive during setup and get misclassified
- */
- set_filter_null(fd);
-
- ll.sll_family = AF_PACKET;
- ll.sll_ifindex = if_nametoindex(cfg_ifname);
- ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
- htons(ETH_P_IPV6);
- if (bind(fd, (void *)&ll, sizeof(ll)))
- error(1, errno, "bind");
-
- /* must come after bind: verifies all programs in group match */
- if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
- /* on failure, retry using old API if that is sufficient:
- * it has a hard limit of 256 sockets, so only try if
- * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
- * in this API, the third argument is left implicit.
- */
- if (cfg_num_queues || num_cpus > 256 ||
- setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
- &args, sizeof(uint32_t)))
- error(1, errno, "setsockopt PACKET_FANOUT cpu");
- }
-
- return fd;
-}
-
-/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
-static int setup_sink(void)
-{
- int fd, val;
-
- fd = socket(cfg_family, cfg_type, 0);
- if (fd == -1)
- error(1, errno, "socket %d.%d", cfg_family, cfg_type);
-
- val = 1 << 20;
- if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
- error(1, errno, "setsockopt rcvbuf");
-
- return fd;
-}
-
-static void setup_rings(void)
-{
- int i;
-
- for (i = 0; i < num_cpus; i++) {
- rings[i].cpu = i;
- rings[i].fd = create_ring(&rings[i].mmap);
- }
-
- /* accept packets once all rings in the fanout group are up */
- for (i = 0; i < num_cpus; i++)
- set_filter(rings[i].fd);
-}
-
-static void cleanup_rings(void)
-{
- int i;
-
- for (i = 0; i < num_cpus; i++) {
- if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
- error(1, errno, "munmap");
- if (close(rings[i].fd))
- error(1, errno, "close");
- }
-}
-
-static void parse_cpulist(const char *arg)
-{
- do {
- rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
-
- arg = strchr(arg, ',');
- if (!arg)
- break;
- arg++; // skip ','
- } while (1);
-}
-
-static void show_cpulist(void)
-{
- int i;
-
- for (i = 0; i < cfg_num_queues; i++)
- fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
-}
-
-static void show_silos(void)
-{
- int i;
-
- for (i = 0; i < cfg_num_rps_cpus; i++)
- fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
-}
-
-static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
-{
- int i, ret, off;
-
- if (slen < TOEPLITZ_STR_MIN_LEN ||
- slen > TOEPLITZ_STR_MAX_LEN + 1)
- error(1, 0, "invalid toeplitz key");
-
- for (i = 0, off = 0; off < slen; i++, off += 3) {
- ret = sscanf(str + off, "%hhx", &key[i]);
- if (ret != 1)
- error(1, 0, "key parse error at %d off %d len %d",
- i, off, slen);
- }
-}
-
-static void parse_rps_bitmap(const char *arg)
-{
- unsigned long bitmap;
- int i;
-
- bitmap = strtoul(arg, NULL, 0);
-
- if (bitmap & ~(RPS_MAX_CPUS - 1))
- error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
- bitmap, RPS_MAX_CPUS - 1);
-
- for (i = 0; i < RPS_MAX_CPUS; i++)
- if (bitmap & 1UL << i)
- rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
-}
-
-static void parse_opts(int argc, char **argv)
-{
- static struct option long_options[] = {
- {"dport", required_argument, 0, 'd'},
- {"cpus", required_argument, 0, 'C'},
- {"key", required_argument, 0, 'k'},
- {"iface", required_argument, 0, 'i'},
- {"ipv4", no_argument, 0, '4'},
- {"ipv6", no_argument, 0, '6'},
- {"sink", no_argument, 0, 's'},
- {"tcp", no_argument, 0, 't'},
- {"timeout", required_argument, 0, 'T'},
- {"udp", no_argument, 0, 'u'},
- {"verbose", no_argument, 0, 'v'},
- {"rps", required_argument, 0, 'r'},
- {0, 0, 0, 0}
- };
- bool have_toeplitz = false;
- int index, c;
-
- while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) {
- switch (c) {
- case '4':
- cfg_family = AF_INET;
- break;
- case '6':
- cfg_family = AF_INET6;
- break;
- case 'C':
- parse_cpulist(optarg);
- break;
- case 'd':
- cfg_dport = strtol(optarg, NULL, 0);
- break;
- case 'i':
- cfg_ifname = optarg;
- break;
- case 'k':
- parse_toeplitz_key(optarg, strlen(optarg),
- toeplitz_key);
- have_toeplitz = true;
- break;
- case 'r':
- parse_rps_bitmap(optarg);
- break;
- case 's':
- cfg_sink = true;
- break;
- case 't':
- cfg_type = SOCK_STREAM;
- break;
- case 'T':
- cfg_timeout_msec = strtol(optarg, NULL, 0);
- break;
- case 'u':
- cfg_type = SOCK_DGRAM;
- break;
- case 'v':
- cfg_verbose = true;
- break;
-
- default:
- error(1, 0, "unknown option %c", optopt);
- break;
- }
- }
-
- if (!have_toeplitz)
- error(1, 0, "Must supply rss key ('-k')");
-
- num_cpus = get_nprocs();
- if (num_cpus > RSS_MAX_CPUS)
- error(1, 0, "increase RSS_MAX_CPUS");
-
- if (cfg_num_queues && cfg_num_rps_cpus)
- error(1, 0,
- "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
- if (cfg_verbose) {
- show_cpulist();
- show_silos();
- }
-}
-
-int main(int argc, char **argv)
-{
- const int min_tests = 10;
- int fd_sink = -1;
-
- parse_opts(argc, argv);
-
- if (cfg_sink)
- fd_sink = setup_sink();
-
- setup_rings();
- process_rings();
- cleanup_rings();
-
- if (cfg_sink && close(fd_sink))
- error(1, errno, "close sink");
-
- if (frames_received - frames_nohash < min_tests)
- error(1, 0, "too few frames for verification");
-
- return frames_error;
-}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
deleted file mode 100755
index 8ff172f7bb1b..000000000000
--- a/tools/testing/selftests/net/toeplitz.sh
+++ /dev/null
@@ -1,199 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
-# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
-# ('-rps <rps_map>')
-#
-# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
-# which is a driver-specific encoding.
-#
-# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
-# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
-
-source setup_loopback.sh
-readonly SERVER_IP4="192.168.1.200/24"
-readonly SERVER_IP6="fda8::1/64"
-readonly SERVER_MAC="aa:00:00:00:00:02"
-
-readonly CLIENT_IP4="192.168.1.100/24"
-readonly CLIENT_IP6="fda8::2/64"
-readonly CLIENT_MAC="aa:00:00:00:00:01"
-
-PORT=8000
-KEY="$(</proc/sys/net/core/netdev_rss_key)"
-TEST_RSS=false
-RPS_MAP=""
-PROTO_FLAG=""
-IP_FLAG=""
-DEV="eth0"
-
-# Return the number of rxqs among which RSS is configured to spread packets.
-# This is determined by reading the RSS indirection table using ethtool.
-get_rss_cfg_num_rxqs() {
- echo $(ethtool -x "${DEV}" |
- grep -E [[:space:]]+[0-9]+:[[:space:]]+ |
- cut -d: -f2- |
- awk '{$1=$1};1' |
- tr ' ' '\n' |
- sort -u |
- wc -l)
-}
-
-# Return a list of the receive irq handler cpus.
-# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
-# Reads /sys/kernel/irq/ in order, so algorithm depends on
-# irq_{rxq-0} < irq_{rxq-1}, etc.
-get_rx_irq_cpus() {
- CPUS=""
- # sort so that irq 2 is read before irq 10
- SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
- # Consider only as many queues as RSS actually uses. We assume that
- # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
- RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
- RXQ_COUNT=0
-
- for i in ${SORTED_IRQS}
- do
- [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
- # lookup relevant IRQs by action name
- [[ -e "$i/actions" ]] || continue
- cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
- irqname=$(<"$i/actions")
-
- # does the IRQ get called
- irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
- [[ -n "${irqcount}" ]] || continue
-
- # lookup CPU
- irq=$(basename "$i")
- cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
-
- if [[ -z "${CPUS}" ]]; then
- CPUS="${cpu}"
- else
- CPUS="${CPUS},${cpu}"
- fi
- RXQ_COUNT=$((RXQ_COUNT+1))
- done
-
- echo "${CPUS}"
-}
-
-get_disable_rfs_cmd() {
- echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
-}
-
-get_set_rps_bitmaps_cmd() {
- CMD=""
- for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
- do
- CMD="${CMD} echo $1 > ${i};"
- done
-
- echo "${CMD}"
-}
-
-get_disable_rps_cmd() {
- echo "$(get_set_rps_bitmaps_cmd 0)"
-}
-
-die() {
- echo "$1"
- exit 1
-}
-
-check_nic_rxhash_enabled() {
- local -r pattern="receive-hashing:\ on"
-
- ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
-}
-
-parse_opts() {
- local prog=$0
- shift 1
-
- while [[ "$1" =~ "-" ]]; do
- if [[ "$1" = "-irq_prefix" ]]; then
- shift
- IRQ_PATTERN="^$1-[0-9]*$"
- elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
- PROTO_FLAG="$1"
- elif [[ "$1" = "-4" ]]; then
- IP_FLAG="$1"
- SERVER_IP="${SERVER_IP4}"
- CLIENT_IP="${CLIENT_IP4}"
- elif [[ "$1" = "-6" ]]; then
- IP_FLAG="$1"
- SERVER_IP="${SERVER_IP6}"
- CLIENT_IP="${CLIENT_IP6}"
- elif [[ "$1" = "-rss" ]]; then
- TEST_RSS=true
- elif [[ "$1" = "-rps" ]]; then
- shift
- RPS_MAP="$1"
- elif [[ "$1" = "-i" ]]; then
- shift
- DEV="$1"
- else
- die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
- [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
- fi
- shift
- done
-}
-
-setup() {
- setup_loopback_environment "${DEV}"
-
- # Set up server_ns namespace and client_ns namespace
- setup_macvlan_ns "${DEV}" $server_ns server \
- "${SERVER_MAC}" "${SERVER_IP}"
- setup_macvlan_ns "${DEV}" $client_ns client \
- "${CLIENT_MAC}" "${CLIENT_IP}"
-}
-
-cleanup() {
- cleanup_macvlan_ns $server_ns server $client_ns client
- cleanup_loopback "${DEV}"
-}
-
-parse_opts $0 $@
-
-setup
-trap cleanup EXIT
-
-check_nic_rxhash_enabled
-
-# Actual test starts here
-if [[ "${TEST_RSS}" = true ]]; then
- # RPS/RFS must be disabled because they move packets between cpus,
- # which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
- eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
- ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
- -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
- -C "$(get_rx_irq_cpus)" -s -v &
-elif [[ ! -z "${RPS_MAP}" ]]; then
- eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
- ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
- -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
- -r "0x${RPS_MAP}" -s -v &
-else
- ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
- -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
-fi
-
-server_pid=$!
-
-ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
- "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
-
-client_pid=$!
-
-wait "${server_pid}"
-exit_code=$?
-kill -9 "${client_pid}"
-if [[ "${exit_code}" -eq 0 ]]; then
- echo "Test Succeeded!"
-fi
-exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
deleted file mode 100755
index 2fef34f4aba1..000000000000
--- a/tools/testing/selftests/net/toeplitz_client.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# A simple program for generating traffic for the toeplitz test.
-#
-# This program sends packets periodically for, conservatively, 20 seconds. The
-# intent is for the calling program to kill this program once it is no longer
-# needed, rather than waiting for the 20 second expiration.
-
-send_traffic() {
- expiration=$((SECONDS+20))
- while [[ "${SECONDS}" -lt "${expiration}" ]]
- do
- if [[ "${PROTO}" == "-u" ]]; then
- echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
- else
- echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
- fi
- sleep 0.001
- done
-}
-
-PROTO=$1
-IPVER=$2
-ADDR=$3
-PORT=$4
-
-send_traffic
diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh
index 282f14760940..a7c6ab8a0347 100755
--- a/tools/testing/selftests/net/traceroute.sh
+++ b/tools/testing/selftests/net/traceroute.sh
@@ -10,28 +10,6 @@ PAUSE_ON_FAIL=no
################################################################################
#
-log_test()
-{
- local rc=$1
- local expected=$2
- local msg="$3"
-
- if [ ${rc} -eq ${expected} ]; then
- printf "TEST: %-60s [ OK ]\n" "${msg}"
- nsuccess=$((nsuccess+1))
- else
- ret=1
- nfail=$((nfail+1))
- printf "TEST: %-60s [FAIL]\n" "${msg}"
- if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
- echo
- echo "hit enter to continue, 'q' to quit"
- read a
- [ "$a" = "q" ] && exit 1
- fi
- fi
-}
-
run_cmd()
{
local ns
@@ -58,6 +36,35 @@ run_cmd()
return $rc
}
+__check_traceroute_version()
+{
+ local cmd=$1; shift
+ local req_ver=$1; shift
+ local ver
+
+ req_ver=$(echo "$req_ver" | sed 's/\.//g')
+ ver=$($cmd -V 2>&1 | grep -Eo '[0-9]+.[0-9]+.[0-9]+' | sed 's/\.//g')
+ if [[ $ver -lt $req_ver ]]; then
+ return 1
+ else
+ return 0
+ fi
+}
+
+check_traceroute6_version()
+{
+ local req_ver=$1; shift
+
+ __check_traceroute_version traceroute6 "$req_ver"
+}
+
+check_traceroute_version()
+{
+ local req_ver=$1; shift
+
+ __check_traceroute_version traceroute "$req_ver"
+}
+
################################################################################
# create namespaces and interconnects
@@ -81,6 +88,8 @@ create_ns()
ip netns exec ${ns} ip -6 ro add unreachable default metric 8192
ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ${ns} sysctl -qw net.ipv4.icmp_ratelimit=0
+ ip netns exec ${ns} sysctl -qw net.ipv6.icmp.ratelimit=0
ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
@@ -203,34 +212,275 @@ setup_traceroute6()
run_traceroute6()
{
- if [ ! -x "$(command -v traceroute6)" ]; then
- echo "SKIP: Could not run IPV6 test without traceroute6"
- return
- fi
-
setup_traceroute6
+ RET=0
+
# traceroute6 host-2 from host-1 (expects 2000:102::2)
run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2"
- log_test $? 0 "IPV6 traceroute"
+ check_err $? "traceroute6 did not return 2000:102::2"
+ log_test "IPv6 traceroute"
cleanup_traceroute6
}
################################################################################
+# traceroute6 with VRF test
+#
+# Verify that in this scenario
+#
+# ------------------------ N2
+# | |
+# ------ ------ N3 ----
+# | R1 | | R2 |------|H2|
+# ------ ------ ----
+# | |
+# ------------------------ N1
+# |
+# ----
+# |H1|
+# ----
+#
+# Where H1's default route goes through R1 and R1's default route goes through
+# R2 over N2, traceroute6 from H1 to H2 reports R2's address on N2 and not N1.
+# The interfaces connecting R2 to the different subnets are membmer in a VRF
+# and the intention is to check that traceroute6 does not report the VRF's
+# address.
+#
+# Addresses are assigned as follows:
+#
+# N1: 2000:101::/64
+# N2: 2000:102::/64
+# N3: 2000:103::/64
+#
+# R1's host part of address: 1
+# R2's host part of address: 2
+# H1's host part of address: 3
+# H2's host part of address: 4
+#
+# For example:
+# the IPv6 address of R1's interface on N2 is 2000:102::1/64
+
+cleanup_traceroute6_vrf()
+{
+ cleanup_all_ns
+}
+
+setup_traceroute6_vrf()
+{
+ # Start clean
+ cleanup_traceroute6_vrf
+
+ setup_ns h1 h2 r1 r2
+ create_ns "$h1"
+ create_ns "$h2"
+ create_ns "$r1"
+ create_ns "$r2"
+
+ ip -n "$r2" link add name vrf100 up type vrf table 100
+ ip -n "$r2" addr add 2001:db8:100::1/64 dev vrf100
+
+ # Setup N3
+ connect_ns "$r2" eth3 - 2000:103::2/64 "$h2" eth3 - 2000:103::4/64
+
+ ip -n "$r2" link set dev eth3 master vrf100
+
+ ip -n "$h2" route add default via 2000:103::2
+
+ # Setup N2
+ connect_ns "$r1" eth2 - 2000:102::1/64 "$r2" eth2 - 2000:102::2/64
+
+ ip -n "$r1" route add default via 2000:102::2
+
+ ip -n "$r2" link set dev eth2 master vrf100
+
+ # Setup N1. host-1 and router-2 connect to a bridge in router-1.
+ ip -n "$r1" link add name br100 up type bridge
+ ip -n "$r1" addr add 2000:101::1/64 dev br100
+
+ connect_ns "$h1" eth0 - 2000:101::3/64 "$r1" eth0 - -
+
+ ip -n "$h1" route add default via 2000:101::1
+
+ ip -n "$r1" link set dev eth0 master br100
+
+ connect_ns "$r2" eth1 - 2000:101::2/64 "$r1" eth1 - -
+
+ ip -n "$r2" link set dev eth1 master vrf100
+
+ ip -n "$r1" link set dev eth1 master br100
+
+ # Prime the network
+ ip netns exec "$h1" ping6 -c5 2000:103::4 >/dev/null 2>&1
+}
+
+run_traceroute6_vrf()
+{
+ setup_traceroute6_vrf
+
+ RET=0
+
+ # traceroute6 host-2 from host-1 (expects 2000:102::2)
+ run_cmd "$h1" "traceroute6 2000:103::4 | grep 2000:102::2"
+ check_err $? "traceroute6 did not return 2000:102::2"
+ log_test "IPv6 traceroute with VRF"
+
+ cleanup_traceroute6_vrf
+}
+
+################################################################################
+# traceroute6 with ICMP extensions test
+#
+# Verify that in this scenario
+#
+# ---- ---- ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ---- N1 ---- N2 ----
+#
+# ICMP extensions are correctly reported. The loopback interfaces on all the
+# nodes are assigned global addresses and the interfaces connecting the nodes
+# are assigned IPv6 link-local addresses.
+
+cleanup_traceroute6_ext()
+{
+ cleanup_all_ns
+}
+
+setup_traceroute6_ext()
+{
+ # Start clean
+ cleanup_traceroute6_ext
+
+ setup_ns h1 r1 h2
+ create_ns "$h1"
+ create_ns "$r1"
+ create_ns "$h2"
+
+ # Setup N1
+ connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64
+ # Setup N2
+ connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64
+
+ # Setup H1
+ ip -n "$h1" address add 2001:db8:1::1/128 dev lo
+ ip -n "$h1" route add ::/0 nexthop via fe80::2 dev eth1
+
+ # Setup R1
+ ip -n "$r1" address add 2001:db8:1::2/128 dev lo
+ ip -n "$r1" route add 2001:db8:1::1/128 nexthop via fe80::1 dev eth1
+ ip -n "$r1" route add 2001:db8:1::3/128 nexthop via fe80::4 dev eth2
+
+ # Setup H2
+ ip -n "$h2" address add 2001:db8:1::3/128 dev lo
+ ip -n "$h2" route add ::/0 nexthop via fe80::3 dev eth2
+
+ # Prime the network
+ ip netns exec "$h1" ping6 -c5 2001:db8:1::3 >/dev/null 2>&1
+}
+
+traceroute6_ext_iio_iif_test()
+{
+ local r1_ifindex h2_ifindex
+ local pkt_len=$1; shift
+
+ # Test that incoming interface info is not appended by default.
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+ check_fail $? "Incoming interface info appended by default when should not"
+
+ # Test that the extension is appended when enabled.
+ run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+ check_err $? "Failed to enable incoming interface info extension on R1"
+
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+ check_err $? "Incoming interface info not appended after enable"
+
+ # Test that the extension is not appended when disabled.
+ run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+ check_err $? "Failed to disable incoming interface info extension on R1"
+
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+ check_fail $? "Incoming interface info appended after disable"
+
+ # Test that the extension is sent correctly from both R1 and H2.
+ run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01"
+ r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]')
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from R1"
+
+ run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01"
+ h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]')
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from H2"
+
+ # Add a global address on the incoming interface of R1 and check that
+ # it is reported.
+ run_cmd "$r1" "ip address add 2001:db8:100::1/64 dev eth1 nodad"
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,2001:db8:100::1,\"eth1\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from R1 after address addition"
+ run_cmd "$r1" "ip address del 2001:db8:100::1/64 dev eth1"
+
+ # Change name and MTU and make sure the result is still correct.
+ run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501"
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'"
+ check_err $? "Wrong incoming interface info reported from R1 after name and MTU change"
+ run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500"
+
+ run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00"
+ run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00"
+}
+
+run_traceroute6_ext()
+{
+ # Need at least version 2.1.5 for RFC 5837 support.
+ if ! check_traceroute6_version 2.1.5; then
+ log_test_skip "traceroute6 too old, missing ICMP extensions support"
+ return
+ fi
+
+ setup_traceroute6_ext
+
+ RET=0
+
+ ## General ICMP extensions tests
+
+ # Test that ICMP extensions are disabled by default.
+ run_cmd "$h1" "sysctl net.ipv6.icmp.errors_extension_mask | grep \"= 0$\""
+ check_err $? "ICMP extensions are not disabled by default"
+
+ # Test that unsupported values are rejected. Do not use "sysctl" as
+ # older versions do not return an error code upon failure.
+ run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+ check_fail $? "Unsupported sysctl value was not rejected"
+
+ ## Extension-specific tests
+
+ # Incoming interface info test. Test with various packet sizes,
+ # including the default one.
+ traceroute6_ext_iio_iif_test
+ traceroute6_ext_iio_iif_test 127
+ traceroute6_ext_iio_iif_test 128
+ traceroute6_ext_iio_iif_test 129
+
+ log_test "IPv6 traceroute with ICMP extensions"
+
+ cleanup_traceroute6_ext
+}
+
+################################################################################
# traceroute test
#
-# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario
+# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when
+# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively.
#
-# 1.0.3.1/24
+# 1.0.3.3/24 1.0.3.1/24
# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ----
# |H1|--------------------------|R1|--------------------------|H2|
# ---- N1 ---- N2 ----
#
-# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and
-# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary
-# address on N1.
-#
+# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and 1.0.3.1/24 and
+# 1.0.1.1/24 are R1's primary addresses on N1. The kernel is expected to prefer
+# a source address that is on the same subnet as the destination IP of the ICMP
+# error message.
cleanup_traceroute()
{
@@ -250,6 +500,7 @@ setup_traceroute()
connect_ns $h1 eth0 1.0.1.3/24 - \
$router eth1 1.0.3.1/24 -
+ ip -n "$h1" addr add 1.0.3.3/24 dev eth0
ip netns exec $h1 ip route add default via 1.0.1.1
ip netns exec $router ip addr add 1.0.1.1/24 dev eth1
@@ -268,18 +519,232 @@ setup_traceroute()
run_traceroute()
{
- if [ ! -x "$(command -v traceroute)" ]; then
- echo "SKIP: Could not run IPV4 test without traceroute"
+ setup_traceroute
+
+ RET=0
+
+ # traceroute host-2 from host-1. Expect a source IP that is on the same
+ # subnet as destination IP of the ICMP error message.
+ run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep -q 1.0.1.1"
+ check_err $? "traceroute did not return 1.0.1.1"
+ run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep -q 1.0.3.1"
+ check_err $? "traceroute did not return 1.0.3.1"
+ log_test "IPv4 traceroute"
+
+ cleanup_traceroute
+}
+
+################################################################################
+# traceroute with VRF test
+#
+# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when
+# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. The
+# intention is to check that the kernel does not choose an IP assigned to the
+# VRF device, but rather an address from the VRF port (eth1) that received the
+# packet that generates the ICMP error message.
+#
+# 1.0.4.1/24 (vrf100)
+# 1.0.3.3/24 1.0.3.1/24
+# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ---- N1 ---- N2 ----
+
+cleanup_traceroute_vrf()
+{
+ cleanup_all_ns
+}
+
+setup_traceroute_vrf()
+{
+ # Start clean
+ cleanup_traceroute_vrf
+
+ setup_ns h1 h2 router
+ create_ns "$h1"
+ create_ns "$h2"
+ create_ns "$router"
+
+ ip -n "$router" link add name vrf100 up type vrf table 100
+ ip -n "$router" addr add 1.0.4.1/24 dev vrf100
+
+ connect_ns "$h1" eth0 1.0.1.3/24 - \
+ "$router" eth1 1.0.1.1/24 -
+
+ ip -n "$h1" addr add 1.0.3.3/24 dev eth0
+ ip -n "$h1" route add default via 1.0.1.1
+
+ ip -n "$router" link set dev eth1 master vrf100
+ ip -n "$router" addr add 1.0.3.1/24 dev eth1
+ ip netns exec "$router" sysctl -qw \
+ net.ipv4.icmp_errors_use_inbound_ifaddr=1
+
+ connect_ns "$h2" eth0 1.0.2.4/24 - \
+ "$router" eth2 1.0.2.1/24 -
+
+ ip -n "$h2" route add default via 1.0.2.1
+
+ ip -n "$router" link set dev eth2 master vrf100
+
+ # Prime the network
+ ip netns exec "$h1" ping -c5 1.0.2.4 >/dev/null 2>&1
+}
+
+run_traceroute_vrf()
+{
+ setup_traceroute_vrf
+
+ RET=0
+
+ # traceroute host-2 from host-1. Expect a source IP that is on the same
+ # subnet as destination IP of the ICMP error message.
+ run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep 1.0.1.1"
+ check_err $? "traceroute did not return 1.0.1.1"
+ run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep 1.0.3.1"
+ check_err $? "traceroute did not return 1.0.3.1"
+ log_test "IPv4 traceroute with VRF"
+
+ cleanup_traceroute_vrf
+}
+
+################################################################################
+# traceroute with ICMP extensions test
+#
+# Verify that in this scenario
+#
+# ---- ---- ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ---- N1 ---- N2 ----
+#
+# ICMP extensions are correctly reported. The loopback interfaces on all the
+# nodes are assigned global addresses and the interfaces connecting the nodes
+# are assigned IPv6 link-local addresses.
+
+cleanup_traceroute_ext()
+{
+ cleanup_all_ns
+}
+
+setup_traceroute_ext()
+{
+ # Start clean
+ cleanup_traceroute_ext
+
+ setup_ns h1 r1 h2
+ create_ns "$h1"
+ create_ns "$r1"
+ create_ns "$h2"
+
+ # Setup N1
+ connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64
+ # Setup N2
+ connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64
+
+ # Setup H1
+ ip -n "$h1" address add 192.0.2.1/32 dev lo
+ ip -n "$h1" route add 0.0.0.0/0 nexthop via inet6 fe80::2 dev eth1
+
+ # Setup R1
+ ip -n "$r1" address add 192.0.2.2/32 dev lo
+ ip -n "$r1" route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1
+ ip -n "$r1" route add 192.0.2.3/32 nexthop via inet6 fe80::4 dev eth2
+
+ # Setup H2
+ ip -n "$h2" address add 192.0.2.3/32 dev lo
+ ip -n "$h2" route add 0.0.0.0/0 nexthop via inet6 fe80::3 dev eth2
+
+ # Prime the network
+ ip netns exec "$h1" ping -c5 192.0.2.3 >/dev/null 2>&1
+}
+
+traceroute_ext_iio_iif_test()
+{
+ local r1_ifindex h2_ifindex
+ local pkt_len=$1; shift
+
+ # Test that incoming interface info is not appended by default.
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+ check_fail $? "Incoming interface info appended by default when should not"
+
+ # Test that the extension is appended when enabled.
+ run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+ check_err $? "Failed to enable incoming interface info extension on R1"
+
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+ check_err $? "Incoming interface info not appended after enable"
+
+ # Test that the extension is not appended when disabled.
+ run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+ check_err $? "Failed to disable incoming interface info extension on R1"
+
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+ check_fail $? "Incoming interface info appended after disable"
+
+ # Test that the extension is sent correctly from both R1 and H2.
+ run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01"
+ r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]')
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from R1"
+
+ run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01"
+ h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]')
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from H2"
+
+ # Add a global address on the incoming interface of R1 and check that
+ # it is reported.
+ run_cmd "$r1" "ip address add 198.51.100.1/24 dev eth1"
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,198.51.100.1,\"eth1\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from R1 after address addition"
+ run_cmd "$r1" "ip address del 198.51.100.1/24 dev eth1"
+
+ # Change name and MTU and make sure the result is still correct.
+ # Re-add the route towards H1 since it was deleted when we removed the
+ # last IPv4 address from eth1 on R1.
+ run_cmd "$r1" "ip route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1"
+ run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501"
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'"
+ check_err $? "Wrong incoming interface info reported from R1 after name and MTU change"
+ run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500"
+
+ run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00"
+ run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00"
+}
+
+run_traceroute_ext()
+{
+ # Need at least version 2.1.5 for RFC 5837 support.
+ if ! check_traceroute_version 2.1.5; then
+ log_test_skip "traceroute too old, missing ICMP extensions support"
return
fi
- setup_traceroute
+ setup_traceroute_ext
- # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while.
- run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1"
- log_test $? 0 "IPV4 traceroute"
+ RET=0
- cleanup_traceroute
+ ## General ICMP extensions tests
+
+ # Test that ICMP extensions are disabled by default.
+ run_cmd "$h1" "sysctl net.ipv4.icmp_errors_extension_mask | grep \"= 0$\""
+ check_err $? "ICMP extensions are not disabled by default"
+
+ # Test that unsupported values are rejected. Do not use "sysctl" as
+ # older versions do not return an error code upon failure.
+ run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+ check_fail $? "Unsupported sysctl value was not rejected"
+
+ ## Extension-specific tests
+
+ # Incoming interface info test. Test with various packet sizes,
+ # including the default one.
+ traceroute_ext_iio_iif_test
+ traceroute_ext_iio_iif_test 127
+ traceroute_ext_iio_iif_test 128
+ traceroute_ext_iio_iif_test 129
+
+ log_test "IPv4 traceroute with ICMP extensions"
+
+ cleanup_traceroute_ext
}
################################################################################
@@ -288,15 +753,16 @@ run_traceroute()
run_tests()
{
run_traceroute6
+ run_traceroute6_vrf
+ run_traceroute6_ext
run_traceroute
+ run_traceroute_vrf
+ run_traceroute_ext
}
################################################################################
# main
-declare -i nfail=0
-declare -i nsuccess=0
-
while getopts :pv o
do
case $o in
@@ -306,7 +772,10 @@ do
esac
done
+require_command traceroute6
+require_command traceroute
+require_command jq
+
run_tests
-printf "\nTests passed: %3d\n" ${nsuccess}
-printf "Tests failed: %3d\n" ${nfail}
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c
index fa83918b62d1..0efc67b0357a 100644
--- a/tools/testing/selftests/net/tun.c
+++ b/tools/testing/selftests/net/tun.c
@@ -15,7 +15,7 @@
#include <sys/ioctl.h>
#include <sys/socket.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static int tun_attach(int fd, char *dev)
{
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index ec60a16c9307..bcc14688661d 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -77,6 +77,8 @@ static bool cfg_epollet;
static bool cfg_do_listen;
static uint16_t dest_port = 9000;
static bool cfg_print_nsec;
+static uint32_t ts_opt_id;
+static bool cfg_use_cmsg_opt_id;
static struct sockaddr_in daddr;
static struct sockaddr_in6 daddr6;
@@ -136,12 +138,13 @@ static void validate_key(int tskey, int tstype)
/* compare key for each subsequent request
* must only test for one type, the first one requested
*/
- if (saved_tskey == -1)
+ if (saved_tskey == -1 || cfg_use_cmsg_opt_id)
saved_tskey_type = tstype;
else if (saved_tskey_type != tstype)
return;
stepsize = cfg_proto == SOCK_STREAM ? cfg_payload_len : 1;
+ stepsize = cfg_use_cmsg_opt_id ? 0 : stepsize;
if (tskey != saved_tskey + stepsize) {
fprintf(stderr, "ERROR: key %d, expected %d\n",
tskey, saved_tskey + stepsize);
@@ -214,7 +217,7 @@ static void print_timestamp_usr(void)
static void print_timestamp(struct scm_timestamping *tss, int tstype,
int tskey, int payload_len)
{
- const char *tsname;
+ const char *tsname = NULL;
validate_key(tskey, tstype);
@@ -356,8 +359,12 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
}
}
- if (batch > 1)
+ if (batch > 1) {
fprintf(stderr, "batched %d timestamps\n", batch);
+ } else if (!batch) {
+ fprintf(stderr, "Failed to report timestamps\n");
+ test_failed = true;
+ }
}
static int recv_errmsg(int fd)
@@ -480,7 +487,7 @@ static void fill_header_udp(void *p, bool is_ipv4)
static void do_test(int family, unsigned int report_opt)
{
- char control[CMSG_SPACE(sizeof(uint32_t))];
+ char control[2 * CMSG_SPACE(sizeof(uint32_t))];
struct sockaddr_ll laddr;
unsigned int sock_opt;
struct cmsghdr *cmsg;
@@ -620,18 +627,32 @@ static void do_test(int family, unsigned int report_opt)
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
- if (cfg_use_cmsg) {
+ if (cfg_use_cmsg || cfg_use_cmsg_opt_id) {
memset(control, 0, sizeof(control));
msg.msg_control = control;
- msg.msg_controllen = sizeof(control);
+ msg.msg_controllen = cfg_use_cmsg * CMSG_SPACE(sizeof(uint32_t));
+ msg.msg_controllen += cfg_use_cmsg_opt_id * CMSG_SPACE(sizeof(uint32_t));
+
+ cmsg = NULL;
+ if (cfg_use_cmsg) {
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SO_TIMESTAMPING;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t));
- cmsg = CMSG_FIRSTHDR(&msg);
- cmsg->cmsg_level = SOL_SOCKET;
- cmsg->cmsg_type = SO_TIMESTAMPING;
- cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t));
+ *((uint32_t *)CMSG_DATA(cmsg)) = report_opt;
+ }
+ if (cfg_use_cmsg_opt_id) {
+ cmsg = cmsg ? CMSG_NXTHDR(&msg, cmsg) : CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_TS_OPT_ID;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t));
+
+ *((uint32_t *)CMSG_DATA(cmsg)) = ts_opt_id;
+ saved_tskey = ts_opt_id;
+ }
- *((uint32_t *) CMSG_DATA(cmsg)) = report_opt;
}
val = sendmsg(fd, &msg, 0);
@@ -681,6 +702,7 @@ static void __attribute__((noreturn)) usage(const char *filepath)
" -L listen on hostname and port\n"
" -n: set no-payload option\n"
" -N: print timestamps and durations in nsec (instead of usec)\n"
+ " -o N: use SCM_TS_OPT_ID control message to provide N as tskey\n"
" -p N: connect to port N\n"
" -P: use PF_PACKET\n"
" -r: use raw\n"
@@ -701,7 +723,7 @@ static void parse_opt(int argc, char **argv)
int c;
while ((c = getopt(argc, argv,
- "46bc:CeEFhIl:LnNp:PrRS:t:uv:V:x")) != -1) {
+ "46bc:CeEFhIl:LnNo:p:PrRS:t:uv:V:x")) != -1) {
switch (c) {
case '4':
do_ipv6 = 0;
@@ -742,6 +764,10 @@ static void parse_opt(int argc, char **argv)
case 'N':
cfg_print_nsec = true;
break;
+ case 'o':
+ ts_opt_id = strtoul(optarg, NULL, 10);
+ cfg_use_cmsg_opt_id = true;
+ break;
case 'p':
dest_port = strtoul(optarg, NULL, 10);
break;
@@ -799,6 +825,8 @@ static void parse_opt(int argc, char **argv)
error(1, 0, "cannot ask for pktinfo over pf_packet");
if (cfg_busy_poll && cfg_use_epoll)
error(1, 0, "pass epoll or busy_poll, not both");
+ if (cfg_proto == SOCK_STREAM && cfg_use_cmsg_opt_id)
+ error(1, 0, "TCP sockets don't support SCM_TS_OPT_ID");
if (optind != argc - 1)
error(1, 0, "missing required hostname argument");
diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh
index 25baca4b148e..fe4649bb8786 100755
--- a/tools/testing/selftests/net/txtimestamp.sh
+++ b/tools/testing/selftests/net/txtimestamp.sh
@@ -37,11 +37,13 @@ run_test_v4v6() {
run_test_tcpudpraw() {
local -r args=$@
- run_test_v4v6 ${args} # tcp
- run_test_v4v6 ${args} -u # udp
- run_test_v4v6 ${args} -r # raw
- run_test_v4v6 ${args} -R # raw (IPPROTO_RAW)
- run_test_v4v6 ${args} -P # pf_packet
+ run_test_v4v6 ${args} # tcp
+ run_test_v4v6 ${args} -u # udp
+ run_test_v4v6 ${args} -u -o 42 # udp with fixed tskey
+ run_test_v4v6 ${args} -r # raw
+ run_test_v4v6 ${args} -r -o 42 # raw
+ run_test_v4v6 ${args} -R # raw (IPPROTO_RAW)
+ run_test_v4v6 ${args} -P # pf_packet
}
run_test_all() {
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 11a1ebda564f..b17e032a6d75 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,12 +3,10 @@
#
# Run a series of udpgro functional tests.
-source net_helper.sh
+source lib.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.bpf.o"
-
# set global exit status, but never reset nonzero one.
check_err()
{
@@ -38,7 +36,7 @@ cfg_veth() {
ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
ip -netns "${PEER_NS}" link set dev veth1 up
- ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
+ ip netns exec "${PEER_NS}" ethtool -K veth1 gro on
}
run_one() {
@@ -46,17 +44,19 @@ run_one() {
local -r all="$@"
local -r tx_args=${all%rx*}
local -r rx_args=${all#*rx}
+ local ret=0
cfg_veth
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \
- echo "ok" || \
- echo "failed" &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} &
+ local PID1=$!
wait_local_port_listen ${PEER_NS} 8000 udp
./udpgso_bench_tx ${tx_args}
- ret=$?
- wait $(jobs -p)
+ check_err $?
+ wait ${PID1}
+ check_err $?
+ [ "$ret" -eq 0 ] && echo "ok" || echo "failed"
return $ret
}
@@ -73,6 +73,7 @@ run_one_nat() {
local -r all="$@"
local -r tx_args=${all%rx*}
local -r rx_args=${all#*rx}
+ local ret=0
if [[ ${tx_args} = *-4* ]]; then
ipt_cmd=iptables
@@ -93,16 +94,17 @@ run_one_nat() {
# ... so that GRO will match the UDP_GRO enabled socket, but packets
# will land on the 'plain' one
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 &
- pid=$!
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \
- echo "ok" || \
- echo "failed"&
+ local PID1=$!
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${family} -b ${addr2%/*} ${rx_args} &
+ local PID2=$!
wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
- ret=$?
- kill -INT $pid
- wait $(jobs -p)
+ check_err $?
+ kill -INT ${PID1}
+ wait ${PID2}
+ check_err $?
+ [ "$ret" -eq 0 ] && echo "ok" || echo "failed"
return $ret
}
@@ -111,20 +113,26 @@ run_one_2sock() {
local -r all="$@"
local -r tx_args=${all%rx*}
local -r rx_args=${all#*rx}
+ local ret=0
cfg_veth
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 &
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \
- echo "ok" || \
- echo "failed" &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} -p 12345 &
+ local PID1=$!
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 100 ${rx_args} &
+ local PID2=$!
wait_local_port_listen "${PEER_NS}" 12345 udp
./udpgso_bench_tx ${tx_args} -p 12345
+ check_err $?
wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
- ret=$?
- wait $(jobs -p)
+ check_err $?
+ wait ${PID1}
+ check_err $?
+ wait ${PID2}
+ check_err $?
+ [ "$ret" -eq 0 ] && echo "ok" || echo "failed"
return $ret
}
@@ -196,11 +204,6 @@ run_all() {
return $ret
}
-if [ ! -f ${BPF_FILE} ]; then
- echo "Missing ${BPF_FILE}. Run 'make' first"
- exit -1
-fi
-
if [[ $# -eq 0 ]]; then
run_all
elif [[ $1 == "__subprocess" ]]; then
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index c51ea90a1395..54fa4821bc5e 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,11 +3,11 @@
#
# Run a series of udpgro benchmarks
-source net_helper.sh
+source lib.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
cleanup() {
local -r jobs="$(jobs -p)"
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 17404f49cdb6..9a2cfec1153e 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -3,11 +3,11 @@
#
# Run a series of udpgro benchmarks
-source net_helper.sh
+source lib.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
cleanup() {
local -r jobs="$(jobs -p)"
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 550d8eb3e224..a39fdc4aa2ff 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -1,9 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source net_helper.sh
+source lib.sh
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
readonly BASE="ns-$(mktemp -u XXXXXX)"
readonly SRC=2
readonly DST=1
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 3e74cfa1a2bf..36ff28af4b19 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -67,6 +67,7 @@ struct testcase {
int gso_len; /* mss after applying gso */
int r_num_mss; /* recv(): number of calls of full mss */
int r_len_last; /* recv(): size of last non-mss dgram, if any */
+ bool v6_ext_hdr; /* send() dgrams with IPv6 extension headers */
};
const struct in6_addr addr6 = {
@@ -77,6 +78,8 @@ const struct in_addr addr4 = {
__constant_htonl(0x0a000001), /* 10.0.0.1 */
};
+static const char ipv6_hopopts_pad1[8] = { 0 };
+
struct testcase testcases_v4[] = {
{
/* no GSO: send a single byte */
@@ -100,6 +103,19 @@ struct testcase testcases_v4[] = {
.r_num_mss = 1,
},
{
+ /* datalen <= MSS < gso_len: will fall back to no GSO */
+ .tlen = CONST_MSS_V4,
+ .gso_len = CONST_MSS_V4 + 1,
+ .r_num_mss = 0,
+ .r_len_last = CONST_MSS_V4,
+ },
+ {
+ /* MSS < datalen < gso_len: fail */
+ .tlen = CONST_MSS_V4 + 1,
+ .gso_len = CONST_MSS_V4 + 2,
+ .tfail = true,
+ },
+ {
/* send a single MSS + 1B */
.tlen = CONST_MSS_V4 + 1,
.gso_len = CONST_MSS_V4,
@@ -203,6 +219,19 @@ struct testcase testcases_v6[] = {
.r_num_mss = 1,
},
{
+ /* datalen <= MSS < gso_len: will fall back to no GSO */
+ .tlen = CONST_MSS_V6,
+ .gso_len = CONST_MSS_V6 + 1,
+ .r_num_mss = 0,
+ .r_len_last = CONST_MSS_V6,
+ },
+ {
+ /* MSS < datalen < gso_len: fail */
+ .tlen = CONST_MSS_V6 + 1,
+ .gso_len = CONST_MSS_V6 + 2,
+ .tfail = true
+ },
+ {
/* send a single MSS + 1B */
.tlen = CONST_MSS_V6 + 1,
.gso_len = CONST_MSS_V6,
@@ -256,6 +285,13 @@ struct testcase testcases_v6[] = {
.r_num_mss = 2,
},
{
+ /* send 2 1B segments with extension headers */
+ .tlen = 2,
+ .gso_len = 1,
+ .r_num_mss = 2,
+ .v6_ext_hdr = true,
+ },
+ {
/* send 2B + 2B + 1B segments */
.tlen = 5,
.gso_len = 2,
@@ -396,11 +432,18 @@ static void run_one(struct testcase *test, int fdt, int fdr,
int i, ret, val, mss;
bool sent;
- fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
+ fprintf(stderr, "ipv%d tx:%d gso:%d %s%s\n",
addr->sa_family == AF_INET ? 4 : 6,
test->tlen, test->gso_len,
+ test->v6_ext_hdr ? "ext-hdr " : "",
test->tfail ? "(fail)" : "");
+ if (test->v6_ext_hdr) {
+ if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS,
+ ipv6_hopopts_pad1, sizeof(ipv6_hopopts_pad1)))
+ error(1, errno, "setsockopt ipv6 hopopts");
+ }
+
val = test->gso_len;
if (cfg_do_setsockopt) {
if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
@@ -412,6 +455,12 @@ static void run_one(struct testcase *test, int fdt, int fdr,
error(1, 0, "send succeeded while expecting failure");
if (!sent && !test->tfail)
error(1, 0, "send failed while expecting success");
+
+ if (test->v6_ext_hdr) {
+ if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, NULL, 0))
+ error(1, errno, "setsockopt ipv6 hopopts clear");
+ }
+
if (!sent)
return;
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index 640bc43452fa..88fa1d53ba2b 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -92,6 +92,9 @@ run_udp() {
echo "udp"
run_in_netns ${args}
+ echo "udp sendmmsg"
+ run_in_netns ${args} -m
+
echo "udp gso"
run_in_netns ${args} -S 0
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index 477392715a9a..86d80cce55b4 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -25,7 +25,7 @@
#include <sys/types.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef ETH_MAX_MTU
#define ETH_MAX_MTU 0xFFFFU
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index f52aa5f7da52..3e751234ccfe 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -30,14 +30,7 @@
source lib.sh
-# nettest can be run from PATH or from same directory as this selftest
-if ! which nettest >/dev/null; then
- PATH=$PWD:$PATH
- if ! which nettest >/dev/null; then
- echo "'nettest' command not found; skipping tests"
- exit $ksft_skip
- fi
-fi
+check_gen_prog "nettest"
result=0
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 4f1edbafb946..9709dd067c72 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
readonly BASE=`basename $STATS`
readonly SRC=2
@@ -46,8 +46,6 @@ create_ns() {
ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
done
- echo "#kernel" > $BASE
- chmod go-rw $BASE
}
__chk_flag() {
diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh
new file mode 100755
index 000000000000..e8c02c64e03a
--- /dev/null
+++ b/tools/testing/selftests/net/vlan_bridge_binding.sh
@@ -0,0 +1,258 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+ALL_TESTS="
+ test_binding_on
+ test_binding_off
+ test_binding_toggle_on
+ test_binding_toggle_off
+ test_binding_toggle_on_when_upper_down
+ test_binding_toggle_off_when_upper_down
+ test_binding_toggle_on_when_lower_down
+ test_binding_toggle_off_when_lower_down
+"
+
+setup_prepare()
+{
+ local port
+
+ adf_ip_link_add br up type bridge vlan_filtering 1
+
+ for port in d1 d2 d3; do
+ adf_ip_link_add $port type veth peer name r$port
+ adf_ip_link_set_up $port
+ adf_ip_link_set_up r$port
+ adf_ip_link_set_master $port br
+ done
+
+ adf_bridge_vlan_add vid 11 dev br self
+ adf_bridge_vlan_add vid 11 dev d1 master
+
+ adf_bridge_vlan_add vid 12 dev br self
+ adf_bridge_vlan_add vid 12 dev d2 master
+
+ adf_bridge_vlan_add vid 13 dev br self
+ adf_bridge_vlan_add vid 13 dev d1 master
+ adf_bridge_vlan_add vid 13 dev d2 master
+
+ adf_bridge_vlan_add vid 14 dev br self
+ adf_bridge_vlan_add vid 14 dev d1 master
+ adf_bridge_vlan_add vid 14 dev d2 master
+ adf_bridge_vlan_add vid 14 dev d3 master
+}
+
+operstate_is()
+{
+ local dev=$1; shift
+ local expect=$1; shift
+
+ local operstate=$(ip -j link show $dev | jq -r .[].operstate)
+ if [[ $operstate == UP ]]; then
+ operstate=1
+ elif [[ $operstate == DOWN || $operstate == LOWERLAYERDOWN ]]; then
+ operstate=0
+ fi
+ echo -n $operstate
+ [[ $operstate == $expect ]]
+}
+
+check_operstate()
+{
+ local dev=$1; shift
+ local expect=$1; shift
+ local operstate
+
+ operstate=$(busywait 1000 \
+ operstate_is "$dev" "$expect")
+ check_err $? "Got operstate of $operstate, expected $expect"
+}
+
+add_one_vlan()
+{
+ local link=$1; shift
+ local id=$1; shift
+
+ adf_ip_link_add $link.$id link $link type vlan id $id "$@"
+}
+
+add_vlans()
+{
+ add_one_vlan br 11 "$@"
+ add_one_vlan br 12 "$@"
+ add_one_vlan br 13 "$@"
+ add_one_vlan br 14 "$@"
+}
+
+set_vlans()
+{
+ ip link set dev br.11 "$@"
+ ip link set dev br.12 "$@"
+ ip link set dev br.13 "$@"
+ ip link set dev br.14 "$@"
+}
+
+down_netdevs()
+{
+ local dev
+
+ for dev in "$@"; do
+ adf_ip_link_set_down $dev
+ done
+}
+
+check_operstates()
+{
+ local opst_11=$1; shift
+ local opst_12=$1; shift
+ local opst_13=$1; shift
+ local opst_14=$1; shift
+
+ check_operstate br.11 $opst_11
+ check_operstate br.12 $opst_12
+ check_operstate br.13 $opst_13
+ check_operstate br.14 $opst_14
+}
+
+do_test_binding()
+{
+ local inject=$1; shift
+ local what=$1; shift
+ local opsts_d1=$1; shift
+ local opsts_d2=$1; shift
+ local opsts_d12=$1; shift
+ local opsts_d123=$1; shift
+
+ RET=0
+
+ defer_scope_push
+ down_netdevs d1
+ $inject
+ check_operstates $opsts_d1
+ defer_scope_pop
+
+ defer_scope_push
+ down_netdevs d2
+ $inject
+ check_operstates $opsts_d2
+ defer_scope_pop
+
+ defer_scope_push
+ down_netdevs d1 d2
+ $inject
+ check_operstates $opsts_d12
+ defer_scope_pop
+
+ defer_scope_push
+ down_netdevs d1 d2 d3
+ $inject
+ check_operstates $opsts_d123
+ defer_scope_pop
+
+ log_test "Test bridge_binding $what"
+}
+
+do_test_binding_on()
+{
+ local inject=$1; shift
+ local what=$1; shift
+
+ do_test_binding "$inject" "$what" \
+ "0 1 1 1" \
+ "1 0 1 1" \
+ "0 0 0 1" \
+ "0 0 0 0"
+}
+
+do_test_binding_off()
+{
+ local inject=$1; shift
+ local what=$1; shift
+
+ do_test_binding "$inject" "$what" \
+ "1 1 1 1" \
+ "1 1 1 1" \
+ "1 1 1 1" \
+ "0 0 0 0"
+}
+
+test_binding_on()
+{
+ add_vlans bridge_binding on
+ set_vlans up
+ do_test_binding_on : "on"
+}
+
+test_binding_off()
+{
+ add_vlans bridge_binding off
+ set_vlans up
+ do_test_binding_off : "off"
+}
+
+test_binding_toggle_on()
+{
+ add_vlans bridge_binding off
+ set_vlans up
+ set_vlans type vlan bridge_binding on
+ do_test_binding_on : "off->on"
+}
+
+test_binding_toggle_off()
+{
+ add_vlans bridge_binding on
+ set_vlans up
+ set_vlans type vlan bridge_binding off
+ do_test_binding_off : "on->off"
+}
+
+adf_set_binding_on()
+{
+ set_vlans type vlan bridge_binding on
+ defer set_vlans type vlan bridge_binding off
+}
+
+adf_set_binding_off()
+{
+ set_vlans type vlan bridge_binding off
+ defer set_vlans type vlan bridge_binding on
+}
+
+test_binding_toggle_on_when_lower_down()
+{
+ add_vlans bridge_binding off
+ set_vlans up
+ do_test_binding_on adf_set_binding_on "off->on when lower down"
+}
+
+test_binding_toggle_off_when_lower_down()
+{
+ add_vlans bridge_binding on
+ set_vlans up
+ do_test_binding_off adf_set_binding_off "on->off when lower down"
+}
+
+test_binding_toggle_on_when_upper_down()
+{
+ add_vlans bridge_binding off
+ set_vlans type vlan bridge_binding on
+ set_vlans up
+ do_test_binding_on : "off->on when upper down"
+}
+
+test_binding_toggle_off_when_upper_down()
+{
+ add_vlans bridge_binding on
+ set_vlans type vlan bridge_binding off
+ set_vlans up
+ do_test_binding_off : "on->off when upper down"
+}
+
+require_command jq
+
+trap defer_scopes_cleanup EXIT
+setup_prepare
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh
index 7bc804ffaf7c..e195d5cab6f7 100755
--- a/tools/testing/selftests/net/vlan_hw_filter.sh
+++ b/tools/testing/selftests/net/vlan_hw_filter.sh
@@ -3,27 +3,101 @@
readonly NETNS="ns-$(mktemp -u XXXXXX)"
+ALL_TESTS="
+ test_vlan_filter_check
+ test_vlan0_del_crash_01
+ test_vlan0_del_crash_02
+ test_vlan0_del_crash_03
+ test_vid0_memleak
+"
+
ret=0
+setup() {
+ ip netns add ${NETNS}
+}
+
cleanup() {
- ip netns del $NETNS
+ ip netns del $NETNS 2>/dev/null
}
trap cleanup EXIT
fail() {
- echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
- ret=1
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ ret=1
+}
+
+tests_run()
+{
+ local current_test
+ for current_test in ${TESTS:-$ALL_TESTS}; do
+ $current_test
+ done
+}
+
+test_vlan_filter_check() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2
+ ip netns exec ${NETNS} ip link set bond_slave_1 master bond0
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0
+ ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0
+ ip netns exec ${NETNS} ip link set bond_slave_1 nomaster
+ ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function"
+ cleanup
}
-ip netns add ${NETNS}
-ip netns exec ${NETNS} ip link add bond0 type bond mode 0
-ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2
-ip netns exec ${NETNS} ip link set bond_slave_1 master bond0
-ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
-ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0
-ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0
-ip netns exec ${NETNS} ip link set bond_slave_1 nomaster
-ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function"
+#enable vlan_filter feature of real_dev with vlan0 during running time
+test_vlan0_del_crash_01() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link set dev bond0 up
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+ ip netns exec ${NETNS} ip link set dev bond0 down
+ ip netns exec ${NETNS} ip link set dev bond0 up
+ ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+
+#enable vlan_filter feature and add vlan0 for real_dev during running time
+test_vlan0_del_crash_02() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link set dev bond0 up
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+ ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+ ip netns exec ${NETNS} ip link set dev bond0 down
+ ip netns exec ${NETNS} ip link set dev bond0 up
+ ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+
+#enable vlan_filter feature of real_dev during running time
+#test kernel_bug of vlan unregister
+test_vlan0_del_crash_03() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link set dev bond0 up
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+ ip netns exec ${NETNS} ip link set dev bond0 down
+ ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+
+test_vid0_memleak() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 up type bond mode 0
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link del dev bond0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+tests_run
exit $ret
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index 2da32f4c479b..ce34cb2e6e0b 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -275,7 +275,7 @@ setup_sym()
# Wait for ip config to settle
- sleep 2
+ slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
}
setup_asym()
@@ -370,7 +370,7 @@ setup_asym()
ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
# Wait for ip config to settle
- sleep 2
+ slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
}
check_connectivity()
@@ -533,6 +533,86 @@ ipv6_ping_frag_asym()
ipv6_ping_frag asym
}
+ipv4_ping_local()
+{
+ log_section "IPv4 (sym route): VRF ICMP local error route lookup ping"
+
+ setup_sym
+
+ check_connectivity || return
+
+ run_cmd ip netns exec $r1 ip vrf exec blue ping -c1 -w1 ${H2_N2_IP}
+ log_test $? 0 "VRF ICMP local IPv4"
+}
+
+ipv4_tcp_local()
+{
+ log_section "IPv4 (sym route): VRF tcp local connection"
+
+ setup_sym
+
+ check_connectivity || return
+
+ run_cmd nettest -s -O "$h2" -l ${H2_N2_IP} -I eth0 -3 eth0 &
+ sleep 1
+ run_cmd nettest -N "$r1" -d blue -r ${H2_N2_IP}
+ log_test $? 0 "VRF tcp local connection IPv4"
+}
+
+ipv4_udp_local()
+{
+ log_section "IPv4 (sym route): VRF udp local connection"
+
+ setup_sym
+
+ check_connectivity || return
+
+ run_cmd nettest -s -D -O "$h2" -l ${H2_N2_IP} -I eth0 -3 eth0 &
+ sleep 1
+ run_cmd nettest -D -N "$r1" -d blue -r ${H2_N2_IP}
+ log_test $? 0 "VRF udp local connection IPv4"
+}
+
+ipv6_ping_local()
+{
+ log_section "IPv6 (sym route): VRF ICMP local error route lookup ping"
+
+ setup_sym
+
+ check_connectivity6 || return
+
+ run_cmd ip netns exec $r1 ip vrf exec blue ${ping6} -c1 -w1 ${H2_N2_IP6}
+ log_test $? 0 "VRF ICMP local IPv6"
+}
+
+ipv6_tcp_local()
+{
+ log_section "IPv6 (sym route): VRF tcp local connection"
+
+ setup_sym
+
+ check_connectivity6 || return
+
+ run_cmd nettest -s -6 -O "$h2" -l ${H2_N2_IP6} -I eth0 -3 eth0 &
+ sleep 1
+ run_cmd nettest -6 -N "$r1" -d blue -r ${H2_N2_IP6}
+ log_test $? 0 "VRF tcp local connection IPv6"
+}
+
+ipv6_udp_local()
+{
+ log_section "IPv6 (sym route): VRF udp local connection"
+
+ setup_sym
+
+ check_connectivity6 || return
+
+ run_cmd nettest -s -6 -D -O "$h2" -l ${H2_N2_IP6} -I eth0 -3 eth0 &
+ sleep 1
+ run_cmd nettest -6 -D -N "$r1" -d blue -r ${H2_N2_IP6}
+ log_test $? 0 "VRF udp local connection IPv6"
+}
+
################################################################################
# usage
@@ -555,8 +635,12 @@ EOF
# Some systems don't have a ping6 binary anymore
command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
-TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym"
-TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_ttl_asym ipv6_traceroute_asym"
+check_gen_prog "nettest"
+
+TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_local ipv4_tcp_local
+ipv4_udp_local ipv4_ping_ttl_asym ipv4_traceroute_asym"
+TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_local ipv6_tcp_local ipv6_udp_local
+ipv6_ping_ttl_asym ipv6_traceroute_asym"
ret=0
nsuccess=0
@@ -594,12 +678,18 @@ do
ipv4_traceroute|traceroute) ipv4_traceroute;;&
ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;&
ipv4_ping_frag|ping) ipv4_ping_frag;;&
+ ipv4_ping_local|ping) ipv4_ping_local;;&
+ ipv4_tcp_local) ipv4_tcp_local;;&
+ ipv4_udp_local) ipv4_udp_local;;&
ipv6_ping_ttl|ping) ipv6_ping_ttl;;&
ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;&
ipv6_traceroute|traceroute) ipv6_traceroute;;&
ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;&
ipv6_ping_frag|ping) ipv6_ping_frag;;&
+ ipv6_ping_local|ping) ipv6_ping_local;;&
+ ipv6_tcp_local) ipv6_tcp_local;;&
+ ipv6_udp_local) ipv6_udp_local;;&
# setup namespaces and config, but do not run any tests
setup_sym|setup) setup_sym; exit 0;;
diff --git a/tools/testing/selftests/net/xfrm_policy_add_speed.sh b/tools/testing/selftests/net/xfrm_policy_add_speed.sh
new file mode 100755
index 000000000000..2fab29d3cb91
--- /dev/null
+++ b/tools/testing/selftests/net/xfrm_policy_add_speed.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+source lib.sh
+
+timeout=4m
+ret=0
+tmp=$(mktemp)
+cleanup() {
+ cleanup_all_ns
+ rm -f "$tmp"
+}
+
+trap cleanup EXIT
+
+maxpolicies=100000
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && maxpolicies=10000
+
+do_dummies4() {
+ local dir="$1"
+ local max="$2"
+
+ local policies
+ local pfx
+ pfx=30
+ policies=0
+
+ ip netns exec "$ns" ip xfrm policy flush
+
+ for i in $(seq 1 100);do
+ local s
+ local d
+ for j in $(seq 1 255);do
+ s=$((i+0))
+ d=$((i+100))
+
+ for a in $(seq 1 8 255); do
+ policies=$((policies+1))
+ [ "$policies" -gt "$max" ] && return
+ echo xfrm policy add src 10.$s.$j.0/30 dst 10.$d.$j.$a/$pfx dir $dir action block
+ done
+ for a in $(seq 1 8 255); do
+ policies=$((policies+1))
+ [ "$policies" -gt "$max" ] && return
+ echo xfrm policy add src 10.$s.$j.$a/30 dst 10.$d.$j.0/$pfx dir $dir action block
+ done
+ done
+ done
+}
+
+setup_ns ns
+
+do_bench()
+{
+ local max="$1"
+
+ start=$(date +%s%3N)
+ do_dummies4 "out" "$max" > "$tmp"
+ if ! timeout "$timeout" ip netns exec "$ns" ip -batch "$tmp";then
+ echo "WARNING: policy insertion cancelled after $timeout"
+ ret=1
+ fi
+ stop=$(date +%s%3N)
+
+ result=$((stop-start))
+
+ policies=$(wc -l < "$tmp")
+ printf "Inserted %-06s policies in $result ms\n" $policies
+
+ have=$(ip netns exec "$ns" ip xfrm policy show | grep "action block" | wc -l)
+ if [ "$have" -ne "$policies" ]; then
+ echo "WARNING: mismatch, have $have policies, expected $policies"
+ ret=1
+ fi
+}
+
+p=100
+while [ $p -le "$maxpolicies" ]; do
+ do_bench "$p"
+ p="${p}0"
+done
+
+exit $ret
diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk
index 59cb26cf3f73..793a2fc33d9f 100644
--- a/tools/testing/selftests/net/ynl.mk
+++ b/tools/testing/selftests/net/ynl.mk
@@ -5,10 +5,13 @@
# Inputs:
#
# YNL_GENS: families we need in the selftests
-# YNL_PROGS: TEST_PROGS which need YNL (TODO, none exist, yet)
+# YNL_GEN_PROGS: TEST_GEN_PROGS which need YNL
# YNL_GEN_FILES: TEST_GEN_FILES which need YNL
-YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES))
+YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) \
+ $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_PROGS))
+YNL_SPECS := \
+ $(patsubst %,$(top_srcdir)/Documentation/netlink/specs/%.yaml,$(YNL_GENS))
$(YNL_OUTPUTS): $(OUTPUT)/libynl.a
$(YNL_OUTPUTS): CFLAGS += \
@@ -16,6 +19,22 @@ $(YNL_OUTPUTS): CFLAGS += \
-I$(top_srcdir)/tools/net/ynl/lib/ \
-I$(top_srcdir)/tools/net/ynl/generated/
-$(OUTPUT)/libynl.a:
- $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a
+# Make sure we rebuild libynl if user added a new family. We can't easily
+# depend on the contents of a variable so create a fake file with a hash.
+YNL_GENS_HASH := $(shell echo $(YNL_GENS) | sha1sum | cut -c1-8)
+$(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig:
+ $(Q)rm -f $(OUTPUT)/.libynl-*.sig
+ $(Q)touch $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig
+
+$(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig
+ $(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a
+ $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \
+ GENS="$(YNL_GENS)" RSTS="" libynl.a
$(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a
+
+EXTRA_CLEAN += \
+ $(top_srcdir)/tools/net/ynl/pyynl/__pycache__ \
+ $(top_srcdir)/tools/net/ynl/pyynl/lib/__pycache__ \
+ $(top_srcdir)/tools/net/ynl/lib/*.[ado] \
+ $(OUTPUT)/.libynl-*.sig \
+ $(OUTPUT)/libynl.a